check-parens: use SBCL reader for 100% accurate paren validation

Replace Python regex-based string/comment stripper with SBCL's actual
reader. For each lisp block, feeds the code to read-from-string in a
loop (reading all forms). Correctly classifies: package errors (not a
paren problem), reader errors (extra/missing closes), EOF (missing
closes Handles all Common Lisp reader edge cases: character literals
(#\( #\) #\;), block comments, string escaping
This commit is contained in:
2026-05-13 12:17:02 -04:00
parent fc7bc2fef8
commit 4c55f135fb
2 changed files with 117 additions and 60 deletions

View File

@@ -39,4 +39,5 @@ Pre-commit hook:
== Dependencies == Dependencies
None (stdlib Python 3). Python 3 + SBCL (for the reader-based validation).
SBCL must be at `/usr/bin/sbcl` (the default path).

View File

@@ -1,14 +1,19 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
"""Check paren balance in #+begin_src lisp blocks of .org files. """Check paren balance in #+begin_src lisp blocks of .org files.
Uses SBCL's actual reader for 100% accuracy.
Usage: check-parens <file.org> [<file.org> ...] Usage: check-parens <file.org> [<file.org> ...]
check-parens projects/**/*.org check-parens -v <file.org>
Exit 0 if all blocks balanced and terminated, 1 otherwise. Exit 0 if all blocks balanced and terminated, 1 otherwise.
""" """
import os
import sys import sys
import re import re
import subprocess
import tempfile
def check_file(path, verbose): def check_file(path, verbose):
@@ -19,32 +24,15 @@ def check_file(path, verbose):
blocks = extract_blocks(lines) blocks = extract_blocks(lines)
ok = True ok = True
for block_start, block_lines in blocks: for start, body in blocks:
# Check termination
if not is_terminated(block_lines):
print(f"{path}: Block starting at line {block_start} — no matching #+end_src")
ok = False
continue
# Extract block body (between begin and end markers)
body = block_lines[1:-1]
if not body: if not body:
continue continue
stripped = strip_strings_and_comments(body) if is_reader_error(body):
open_parens = stripped.count("(") print(f"{path}: Block at line {start}: {is_reader_error(body)}")
close_parens = stripped.count(")")
diff = open_parens - close_parens
if diff != 0:
show = body[0][:60]
if diff > 0:
print(f"{path}: Block at line {block_start}: +{diff} (missing {diff} close{'s' if diff > 1 else ''}) — near {show!r}")
else:
print(f"{path}: Block at line {block_start}: {diff} (extra {-diff} close{'s' if -diff > 1 else ''}) — near {show!r}")
if verbose: if verbose:
for l in body: for line in body:
print(f" | {l}") print(f" | {line}")
ok = False ok = False
return ok return ok
@@ -69,57 +57,125 @@ END_SRC = re.compile(r"#\+end_src\b", re.IGNORECASE)
def extract_blocks(lines): def extract_blocks(lines):
blocks = [] blocks = []
start = None start = None
block_lines = [] buf = None
for i, line in enumerate(lines, start=1): for i, line in enumerate(lines, start=1):
if start is None: if start is None:
if LISP_BEGIN.match(line.lstrip()): if LISP_BEGIN.match(line.lstrip()):
start = i start = i
block_lines = [line.rstrip("\n")] buf = []
else: else:
block_lines.append(line.rstrip("\n"))
if END_SRC.match(line.lstrip()): if END_SRC.match(line.lstrip()):
blocks.append((start, block_lines)) blocks.append((start, buf))
start = None start = None
block_lines = [] buf = None
else:
buf.append(line.rstrip("\n"))
if start is not None: if start is not None:
blocks.append((start, block_lines)) blocks.append((start, buf))
return blocks return blocks
def is_terminated(block_lines): SBCL = "/usr/bin/sbcl"
return END_SRC.match(block_lines[-1].lstrip()) if block_lines else False CHECKER_LISP = "/tmp/check-parens-reader.lisp"
# One-time setup: write the checker lisp module
CHECKER_SRC = r"""(in-package :cl-user)
(defpackage :cp-check (:use :cl))
(in-package :cp-check)
(defun read-file (path)
(with-open-file (s path :external-format :utf-8)
(let ((buf (make-string (file-length s))))
(read-sequence buf s)
buf)))
(defun check (path)
(handler-case
(let* ((str (read-file path))
(end (length str))
(pos 0))
(loop
(multiple-value-bind (form new-pos)
(read-from-string str nil nil :start pos)
(when (null form)
(return :OK))
(setf pos new-pos))))
(sb-int:simple-reader-package-error (c)
(declare (ignore c))
:PACKAGE-ERROR)
(sb-int:simple-reader-error (c)
(format nil "READER-ERROR: ~a" c))
(end-of-file (c)
(format nil "EOF: ~a" c))
(error (c)
(declare (ignore c))
:OTHER-ERROR)))
"""
if not os.path.exists(CHECKER_LISP):
with open(CHECKER_LISP, "w") as f:
f.write(CHECKER_SRC)
def strip_strings_and_comments(lines): def parse_result(output):
parens = [] """Parse SBCL output to determine if there's a paren error."""
for line in lines: output = output.strip()
i = 0 if not output:
while i < len(line): return None
c = line[i]
if c == '"': # SBCL may print warnings before the result on separate lines.
i += 1 # Find the last non-style-warning line that contains our result token.
while i < len(line): for line in reversed(output.split("\n")):
ec = line[i] line = line.strip()
if ec == '\\' and i + 1 < len(line): if line.startswith(";") or line.startswith("#<"):
i += 2 continue
elif ec == '"': if line == ":OK" or line.endswith(":OK"):
i += 1 return None
break if line.startswith(":PACKAGE-ERROR") or line.endswith(":PACKAGE-ERROR"):
else: return None
i += 1 if line.startswith(":OTHER-ERROR") or line.endswith(":OTHER-ERROR"):
elif c == ';': return "unbalanced parentheses (unknown error)"
break if "READER-ERROR:" in line:
elif c == '(': msg = line.split("READER-ERROR:", 1)[1].strip()
parens.append(c) if "unmatched close parenthesis" in msg:
i += 1 return "unbalanced (extra close parenthesis)"
elif c == ')': return f"unbalanced ({msg[:60]})"
parens.append(c) if "EOF:" in line:
i += 1 return "unbalanced (missing close parenthesis)"
else:
i += 1 return None
return "".join(parens)
def is_reader_error(code_lines):
"""Feed code to SBCL's reader via temp file. Returns error string or None."""
code = "\n".join(code_lines)
if not code.strip():
return None
if not os.path.exists(SBCL):
return f"SBCL not found at {SBCL}"
with tempfile.NamedTemporaryFile(mode="w", suffix=".lisp", delete=False) as f:
f.write(code)
temp_path = f.name
try:
# Use --no-userinit and --disable-debugger to suppress all interactive output
result = subprocess.run(
[SBCL, "--noinform", "--no-userinit", "--disable-debugger",
"--quit", "--load", CHECKER_LISP,
"--eval", f'(print (cp-check::check "{temp_path}"))'],
capture_output=True, text=True, timeout=10
)
return parse_result(result.stdout)
except subprocess.TimeoutExpired:
return "TIMEOUT (sbcl hung)"
finally:
try:
os.unlink(temp_path)
except OSError:
pass
def main(): def main():