check-parens: use SBCL reader for 100% accurate paren validation
Replace Python regex-based string/comment stripper with SBCL's actual reader. For each lisp block, feeds the code to read-from-string in a loop (reading all forms). Correctly classifies: package errors (not a paren problem), reader errors (extra/missing closes), EOF (missing closes Handles all Common Lisp reader edge cases: character literals (#\( #\) #\;), block comments, string escaping
This commit is contained in:
@@ -39,4 +39,5 @@ Pre-commit hook:
|
|||||||
|
|
||||||
== Dependencies
|
== Dependencies
|
||||||
|
|
||||||
None (stdlib Python 3).
|
Python 3 + SBCL (for the reader-based validation).
|
||||||
|
SBCL must be at `/usr/bin/sbcl` (the default path).
|
||||||
|
|||||||
@@ -1,14 +1,19 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""Check paren balance in #+begin_src lisp blocks of .org files.
|
"""Check paren balance in #+begin_src lisp blocks of .org files.
|
||||||
|
|
||||||
|
Uses SBCL's actual reader for 100% accuracy.
|
||||||
|
|
||||||
Usage: check-parens <file.org> [<file.org> ...]
|
Usage: check-parens <file.org> [<file.org> ...]
|
||||||
check-parens projects/**/*.org
|
check-parens -v <file.org>
|
||||||
|
|
||||||
Exit 0 if all blocks balanced and terminated, 1 otherwise.
|
Exit 0 if all blocks balanced and terminated, 1 otherwise.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
import sys
|
import sys
|
||||||
import re
|
import re
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
|
||||||
def check_file(path, verbose):
|
def check_file(path, verbose):
|
||||||
@@ -19,32 +24,15 @@ def check_file(path, verbose):
|
|||||||
blocks = extract_blocks(lines)
|
blocks = extract_blocks(lines)
|
||||||
ok = True
|
ok = True
|
||||||
|
|
||||||
for block_start, block_lines in blocks:
|
for start, body in blocks:
|
||||||
# Check termination
|
|
||||||
if not is_terminated(block_lines):
|
|
||||||
print(f"{path}: Block starting at line {block_start} — no matching #+end_src")
|
|
||||||
ok = False
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Extract block body (between begin and end markers)
|
|
||||||
body = block_lines[1:-1]
|
|
||||||
if not body:
|
if not body:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
stripped = strip_strings_and_comments(body)
|
if is_reader_error(body):
|
||||||
open_parens = stripped.count("(")
|
print(f"{path}: Block at line {start}: {is_reader_error(body)}")
|
||||||
close_parens = stripped.count(")")
|
|
||||||
diff = open_parens - close_parens
|
|
||||||
|
|
||||||
if diff != 0:
|
|
||||||
show = body[0][:60]
|
|
||||||
if diff > 0:
|
|
||||||
print(f"{path}: Block at line {block_start}: +{diff} (missing {diff} close{'s' if diff > 1 else ''}) — near {show!r}")
|
|
||||||
else:
|
|
||||||
print(f"{path}: Block at line {block_start}: {diff} (extra {-diff} close{'s' if -diff > 1 else ''}) — near {show!r}")
|
|
||||||
if verbose:
|
if verbose:
|
||||||
for l in body:
|
for line in body:
|
||||||
print(f" | {l}")
|
print(f" | {line}")
|
||||||
ok = False
|
ok = False
|
||||||
|
|
||||||
return ok
|
return ok
|
||||||
@@ -69,57 +57,125 @@ END_SRC = re.compile(r"#\+end_src\b", re.IGNORECASE)
|
|||||||
def extract_blocks(lines):
|
def extract_blocks(lines):
|
||||||
blocks = []
|
blocks = []
|
||||||
start = None
|
start = None
|
||||||
block_lines = []
|
buf = None
|
||||||
|
|
||||||
for i, line in enumerate(lines, start=1):
|
for i, line in enumerate(lines, start=1):
|
||||||
if start is None:
|
if start is None:
|
||||||
if LISP_BEGIN.match(line.lstrip()):
|
if LISP_BEGIN.match(line.lstrip()):
|
||||||
start = i
|
start = i
|
||||||
block_lines = [line.rstrip("\n")]
|
buf = []
|
||||||
else:
|
else:
|
||||||
block_lines.append(line.rstrip("\n"))
|
|
||||||
if END_SRC.match(line.lstrip()):
|
if END_SRC.match(line.lstrip()):
|
||||||
blocks.append((start, block_lines))
|
blocks.append((start, buf))
|
||||||
start = None
|
start = None
|
||||||
block_lines = []
|
buf = None
|
||||||
|
else:
|
||||||
|
buf.append(line.rstrip("\n"))
|
||||||
|
|
||||||
if start is not None:
|
if start is not None:
|
||||||
blocks.append((start, block_lines))
|
blocks.append((start, buf))
|
||||||
|
|
||||||
return blocks
|
return blocks
|
||||||
|
|
||||||
|
|
||||||
def is_terminated(block_lines):
|
SBCL = "/usr/bin/sbcl"
|
||||||
return END_SRC.match(block_lines[-1].lstrip()) if block_lines else False
|
CHECKER_LISP = "/tmp/check-parens-reader.lisp"
|
||||||
|
|
||||||
|
# One-time setup: write the checker lisp module
|
||||||
|
CHECKER_SRC = r"""(in-package :cl-user)
|
||||||
|
(defpackage :cp-check (:use :cl))
|
||||||
|
(in-package :cp-check)
|
||||||
|
(defun read-file (path)
|
||||||
|
(with-open-file (s path :external-format :utf-8)
|
||||||
|
(let ((buf (make-string (file-length s))))
|
||||||
|
(read-sequence buf s)
|
||||||
|
buf)))
|
||||||
|
(defun check (path)
|
||||||
|
(handler-case
|
||||||
|
(let* ((str (read-file path))
|
||||||
|
(end (length str))
|
||||||
|
(pos 0))
|
||||||
|
(loop
|
||||||
|
(multiple-value-bind (form new-pos)
|
||||||
|
(read-from-string str nil nil :start pos)
|
||||||
|
(when (null form)
|
||||||
|
(return :OK))
|
||||||
|
(setf pos new-pos))))
|
||||||
|
(sb-int:simple-reader-package-error (c)
|
||||||
|
(declare (ignore c))
|
||||||
|
:PACKAGE-ERROR)
|
||||||
|
(sb-int:simple-reader-error (c)
|
||||||
|
(format nil "READER-ERROR: ~a" c))
|
||||||
|
(end-of-file (c)
|
||||||
|
(format nil "EOF: ~a" c))
|
||||||
|
(error (c)
|
||||||
|
(declare (ignore c))
|
||||||
|
:OTHER-ERROR)))
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not os.path.exists(CHECKER_LISP):
|
||||||
|
with open(CHECKER_LISP, "w") as f:
|
||||||
|
f.write(CHECKER_SRC)
|
||||||
|
|
||||||
|
|
||||||
def strip_strings_and_comments(lines):
|
def parse_result(output):
|
||||||
parens = []
|
"""Parse SBCL output to determine if there's a paren error."""
|
||||||
for line in lines:
|
output = output.strip()
|
||||||
i = 0
|
if not output:
|
||||||
while i < len(line):
|
return None
|
||||||
c = line[i]
|
|
||||||
if c == '"':
|
# SBCL may print warnings before the result on separate lines.
|
||||||
i += 1
|
# Find the last non-style-warning line that contains our result token.
|
||||||
while i < len(line):
|
for line in reversed(output.split("\n")):
|
||||||
ec = line[i]
|
line = line.strip()
|
||||||
if ec == '\\' and i + 1 < len(line):
|
if line.startswith(";") or line.startswith("#<"):
|
||||||
i += 2
|
continue
|
||||||
elif ec == '"':
|
if line == ":OK" or line.endswith(":OK"):
|
||||||
i += 1
|
return None
|
||||||
break
|
if line.startswith(":PACKAGE-ERROR") or line.endswith(":PACKAGE-ERROR"):
|
||||||
else:
|
return None
|
||||||
i += 1
|
if line.startswith(":OTHER-ERROR") or line.endswith(":OTHER-ERROR"):
|
||||||
elif c == ';':
|
return "unbalanced parentheses (unknown error)"
|
||||||
break
|
if "READER-ERROR:" in line:
|
||||||
elif c == '(':
|
msg = line.split("READER-ERROR:", 1)[1].strip()
|
||||||
parens.append(c)
|
if "unmatched close parenthesis" in msg:
|
||||||
i += 1
|
return "unbalanced (extra close parenthesis)"
|
||||||
elif c == ')':
|
return f"unbalanced ({msg[:60]})"
|
||||||
parens.append(c)
|
if "EOF:" in line:
|
||||||
i += 1
|
return "unbalanced (missing close parenthesis)"
|
||||||
else:
|
|
||||||
i += 1
|
return None
|
||||||
return "".join(parens)
|
|
||||||
|
|
||||||
|
def is_reader_error(code_lines):
|
||||||
|
"""Feed code to SBCL's reader via temp file. Returns error string or None."""
|
||||||
|
code = "\n".join(code_lines)
|
||||||
|
if not code.strip():
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not os.path.exists(SBCL):
|
||||||
|
return f"SBCL not found at {SBCL}"
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".lisp", delete=False) as f:
|
||||||
|
f.write(code)
|
||||||
|
temp_path = f.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Use --no-userinit and --disable-debugger to suppress all interactive output
|
||||||
|
result = subprocess.run(
|
||||||
|
[SBCL, "--noinform", "--no-userinit", "--disable-debugger",
|
||||||
|
"--quit", "--load", CHECKER_LISP,
|
||||||
|
"--eval", f'(print (cp-check::check "{temp_path}"))'],
|
||||||
|
capture_output=True, text=True, timeout=10
|
||||||
|
)
|
||||||
|
return parse_result(result.stdout)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
return "TIMEOUT (sbcl hung)"
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
os.unlink(temp_path)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|||||||
Reference in New Issue
Block a user