From 4c55f135fb1f34f87417c7caf111714950c2da39 Mon Sep 17 00:00:00 2001 From: Amr Gharbeia Date: Wed, 13 May 2026 12:17:02 -0400 Subject: [PATCH] check-parens: use SBCL reader for 100% accurate paren validation Replace Python regex-based string/comment stripper with SBCL's actual reader. For each lisp block, feeds the code to read-from-string in a loop (reading all forms). Correctly classifies: package errors (not a paren problem), reader errors (extra/missing closes), EOF (missing closes Handles all Common Lisp reader edge cases: character literals (#\( #\) #\;), block comments, string escaping --- projects/check-parens/README.org | 3 +- projects/check-parens/check-parens | 174 +++++++++++++++++++---------- 2 files changed, 117 insertions(+), 60 deletions(-) diff --git a/projects/check-parens/README.org b/projects/check-parens/README.org index 921cf3e..db01508 100644 --- a/projects/check-parens/README.org +++ b/projects/check-parens/README.org @@ -39,4 +39,5 @@ Pre-commit hook: == Dependencies -None (stdlib Python 3). +Python 3 + SBCL (for the reader-based validation). +SBCL must be at `/usr/bin/sbcl` (the default path). diff --git a/projects/check-parens/check-parens b/projects/check-parens/check-parens index a746a5c..b45ec70 100755 --- a/projects/check-parens/check-parens +++ b/projects/check-parens/check-parens @@ -1,14 +1,19 @@ #!/usr/bin/env python3 """Check paren balance in #+begin_src lisp blocks of .org files. +Uses SBCL's actual reader for 100% accuracy. + Usage: check-parens [ ...] - check-parens projects/**/*.org + check-parens -v Exit 0 if all blocks balanced and terminated, 1 otherwise. """ +import os import sys import re +import subprocess +import tempfile def check_file(path, verbose): @@ -19,32 +24,15 @@ def check_file(path, verbose): blocks = extract_blocks(lines) ok = True - for block_start, block_lines in blocks: - # Check termination - if not is_terminated(block_lines): - print(f"{path}: Block starting at line {block_start} — no matching #+end_src") - ok = False - continue - - # Extract block body (between begin and end markers) - body = block_lines[1:-1] + for start, body in blocks: if not body: continue - stripped = strip_strings_and_comments(body) - open_parens = stripped.count("(") - close_parens = stripped.count(")") - diff = open_parens - close_parens - - if diff != 0: - show = body[0][:60] - if diff > 0: - print(f"{path}: Block at line {block_start}: +{diff} (missing {diff} close{'s' if diff > 1 else ''}) — near {show!r}") - else: - print(f"{path}: Block at line {block_start}: {diff} (extra {-diff} close{'s' if -diff > 1 else ''}) — near {show!r}") + if is_reader_error(body): + print(f"{path}: Block at line {start}: {is_reader_error(body)}") if verbose: - for l in body: - print(f" | {l}") + for line in body: + print(f" | {line}") ok = False return ok @@ -69,57 +57,125 @@ END_SRC = re.compile(r"#\+end_src\b", re.IGNORECASE) def extract_blocks(lines): blocks = [] start = None - block_lines = [] + buf = None + for i, line in enumerate(lines, start=1): if start is None: if LISP_BEGIN.match(line.lstrip()): start = i - block_lines = [line.rstrip("\n")] + buf = [] else: - block_lines.append(line.rstrip("\n")) if END_SRC.match(line.lstrip()): - blocks.append((start, block_lines)) + blocks.append((start, buf)) start = None - block_lines = [] + buf = None + else: + buf.append(line.rstrip("\n")) if start is not None: - blocks.append((start, block_lines)) + blocks.append((start, buf)) return blocks -def is_terminated(block_lines): - return END_SRC.match(block_lines[-1].lstrip()) if block_lines else False +SBCL = "/usr/bin/sbcl" +CHECKER_LISP = "/tmp/check-parens-reader.lisp" + +# One-time setup: write the checker lisp module +CHECKER_SRC = r"""(in-package :cl-user) +(defpackage :cp-check (:use :cl)) +(in-package :cp-check) +(defun read-file (path) + (with-open-file (s path :external-format :utf-8) + (let ((buf (make-string (file-length s)))) + (read-sequence buf s) + buf))) +(defun check (path) + (handler-case + (let* ((str (read-file path)) + (end (length str)) + (pos 0)) + (loop + (multiple-value-bind (form new-pos) + (read-from-string str nil nil :start pos) + (when (null form) + (return :OK)) + (setf pos new-pos)))) + (sb-int:simple-reader-package-error (c) + (declare (ignore c)) + :PACKAGE-ERROR) + (sb-int:simple-reader-error (c) + (format nil "READER-ERROR: ~a" c)) + (end-of-file (c) + (format nil "EOF: ~a" c)) + (error (c) + (declare (ignore c)) + :OTHER-ERROR))) +""" + +if not os.path.exists(CHECKER_LISP): + with open(CHECKER_LISP, "w") as f: + f.write(CHECKER_SRC) -def strip_strings_and_comments(lines): - parens = [] - for line in lines: - i = 0 - while i < len(line): - c = line[i] - if c == '"': - i += 1 - while i < len(line): - ec = line[i] - if ec == '\\' and i + 1 < len(line): - i += 2 - elif ec == '"': - i += 1 - break - else: - i += 1 - elif c == ';': - break - elif c == '(': - parens.append(c) - i += 1 - elif c == ')': - parens.append(c) - i += 1 - else: - i += 1 - return "".join(parens) +def parse_result(output): + """Parse SBCL output to determine if there's a paren error.""" + output = output.strip() + if not output: + return None + + # SBCL may print warnings before the result on separate lines. + # Find the last non-style-warning line that contains our result token. + for line in reversed(output.split("\n")): + line = line.strip() + if line.startswith(";") or line.startswith("#<"): + continue + if line == ":OK" or line.endswith(":OK"): + return None + if line.startswith(":PACKAGE-ERROR") or line.endswith(":PACKAGE-ERROR"): + return None + if line.startswith(":OTHER-ERROR") or line.endswith(":OTHER-ERROR"): + return "unbalanced parentheses (unknown error)" + if "READER-ERROR:" in line: + msg = line.split("READER-ERROR:", 1)[1].strip() + if "unmatched close parenthesis" in msg: + return "unbalanced (extra close parenthesis)" + return f"unbalanced ({msg[:60]})" + if "EOF:" in line: + return "unbalanced (missing close parenthesis)" + + return None + + +def is_reader_error(code_lines): + """Feed code to SBCL's reader via temp file. Returns error string or None.""" + code = "\n".join(code_lines) + if not code.strip(): + return None + + if not os.path.exists(SBCL): + return f"SBCL not found at {SBCL}" + + with tempfile.NamedTemporaryFile(mode="w", suffix=".lisp", delete=False) as f: + f.write(code) + temp_path = f.name + + try: + # Use --no-userinit and --disable-debugger to suppress all interactive output + result = subprocess.run( + [SBCL, "--noinform", "--no-userinit", "--disable-debugger", + "--quit", "--load", CHECKER_LISP, + "--eval", f'(print (cp-check::check "{temp_path}"))'], + capture_output=True, text=True, timeout=10 + ) + return parse_result(result.stdout) + except subprocess.TimeoutExpired: + return "TIMEOUT (sbcl hung)" + finally: + try: + os.unlink(temp_path) + except OSError: + pass def main():