From a77580c4496b01f309761112bd77d96d54a0b38c Mon Sep 17 00:00:00 2001 From: Amr Gharbeia Date: Sun, 3 May 2026 13:19:04 -0400 Subject: [PATCH] fix: correct setf form in perceive gate HITL handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (setf (getf signal :approved t)) → (setf (getf signal :approved) t) Caught during system compilation. This is exactly the class of bug that the REPL-first discipline would have caught instantly. --- docs/.#DESIGN_DECISIONS.org | 1 - docs/DESIGN_DECISIONS.org | 84 ++++++++++++++++---------------- lisp/core-loop-perceive.lisp | 2 +- org/core-loop-act.org | 15 +++--- org/core-loop-perceive.org | 2 +- org/security-dispatcher.org | 92 +++++++++++++++++++++++++++++++++++- 6 files changed, 142 insertions(+), 54 deletions(-) delete mode 120000 docs/.#DESIGN_DECISIONS.org diff --git a/docs/.#DESIGN_DECISIONS.org b/docs/.#DESIGN_DECISIONS.org deleted file mode 120000 index b89982c..0000000 --- a/docs/.#DESIGN_DECISIONS.org +++ /dev/null @@ -1 +0,0 @@ -user@amr.59355:1777807168 \ No newline at end of file diff --git a/docs/DESIGN_DECISIONS.org b/docs/DESIGN_DECISIONS.org index fe3dde1..3ecf280 100644 --- a/docs/DESIGN_DECISIONS.org +++ b/docs/DESIGN_DECISIONS.org @@ -286,7 +286,7 @@ Passepartout treats the LLM as a resource to be minimized. Every operation is de The three structural multipliers are: -1. *Sparse tree retrieval* — loading relevant subtrees (200-800 tokens per file) rather than full files (1,500-5,000 tokens) = ~5-10x reduction per file access +*Sparse tree retrieval* — loading relevant subtrees (200-800 tokens per file) rather than full files (1,500-5,000 tokens) = ~5-10x reduction per file access 2. *Deterministic safety* — 9-vector dispatcher gate runs in pure Lisp (0 LLM tokens per verification) versus prompt-based guardrails (200-500 tokens per action) = infinite multiplier 3. *REPL verification* — catches errors in-image (milliseconds, 0 LLM tokens) versus LLM correction round-trips (500-2,000 tokens per retry) @@ -296,14 +296,14 @@ These compound. A coding session touching 20 files, performing 10 actions, and t *** Coding (debugging, refactoring, PR review) -| Operation | Passepartout | Claude Code | Hermes (3-agent) | Savings vs Claude | -|-----------|-------------|-------------|-------------------|--------------------| -| File access (30 files) | 30 × 400 tok = 12,000 | 30 × 3,000 tok = 90,000 | 30 × 3,000 tok × 3 = 270,000 | 78,000 tok | -| Reasoning rounds (20) | 20 × 3,000 tok = 60,000 | 20 × 4,000 tok = 80,000 | 20 × 3,000 tok × 3 = 180,000 | 20,000 tok | -| Error correction (5 caught by REPL) | 0 (REPL) | 5 × 1,000 tok = 5,000 | 5 × 1,000 tok × 3 = 15,000 | 5,000 tok | -| Safety verification | 0 (deterministic) | 500 tok/round × 20 = 10,000 | 200 tok/round × agents | 10,000 tok | -| Agent coordination | 0 | 0 | 3,000-5,000 tok/task | 0 | -| *Total* | *~72,000 tok* | *~185,000 tok* | *~475,000 tok* | *~113,000 tok (2.6x)* | +| Operation | Passepartout | Claude Code | Hermes (3-agent) | Savings vs Claude | +|-------------------------------------+-------------------------+-----------------------------+------------------------------+-----------------------| +| File access (30 files) | 30 × 400 tok = 12,000 | 30 × 3,000 tok = 90,000 | 30 × 3,000 tok × 3 = 270,000 | 78,000 tok | +| Reasoning rounds (20) | 20 × 3,000 tok = 60,000 | 20 × 4,000 tok = 80,000 | 20 × 3,000 tok × 3 = 180,000 | 20,000 tok | +| Error correction (5 caught by REPL) | 0 (REPL) | 5 × 1,000 tok = 5,000 | 5 × 1,000 tok × 3 = 15,000 | 5,000 tok | +| Safety verification | 0 (deterministic) | 500 tok/round × 20 = 10,000 | 200 tok/round × agents | 10,000 tok | +| Agent coordination | 0 | 0 | 3,000-5,000 tok/task | 0 | +| *Total* | *~72,000 tok* | *~185,000 tok* | *~475,000 tok* | *~113,000 tok (2.6x)* | Over a month of daily coding (20 sessions): ~2.3 million tokens saved. At typical API pricing ($2-15/M tokens), this saves $5-35/month. @@ -311,21 +311,21 @@ Over a month of daily coding (20 sessions): ~2.3 million tokens saved. At typica Passepartout's strongest domain. The Org-mode native format and sparse tree retrieval create a 10-40x advantage because knowledge bases are the worst case for "load everything" architectures. -| Operation | Passepartout | Competitor | Savings | -|-----------|-------------|------------|---------| -| Context assembly (500-node KB) | Peripheral outline + ~5 foveal nodes = 2,000-4,000 tok | Full serialization = 80,000-150,000 tok | 40-75x | -| Semantic search (10 queries) | Vector lookup in-image = 0 LLM tok | LLM-assisted search = 5,000 tok | 5,000 tok | -| Note creation (10 notes) | Deterministic Org writes = 0 LLM tok | 10 × 800 tok = 8,000 | 8,000 tok | -| *Total per session* | *~7,000 tok* | *~95,000-165,000 tok* | *~13-24x* | +| Operation | Passepartout | Competitor | Savings | +|--------------------------------+--------------------------------------------------------+-----------------------------------------+-----------| +| Context assembly (500-node KB) | Peripheral outline + ~5 foveal nodes = 2,000-4,000 tok | Full serialization = 80,000-150,000 tok | 40-75x | +| Semantic search (10 queries) | Vector lookup in-image = 0 LLM tok | LLM-assisted search = 5,000 tok | 5,000 tok | +| Note creation (10 notes) | Deterministic Org writes = 0 LLM tok | 10 × 800 tok = 8,000 | 8,000 tok | +| *Total per session* | *~7,000 tok* | *~95,000-165,000 tok* | *~13-24x* | *** Day-to-Day Life Management (calendar, tasks, reminders) -| Operation | Passepartout | Competitor | Savings | -|-----------|-------------|------------|---------| -| Background maintenance | Deterministic heartbeat-driven = 0 LLM tok | Scheduled LLM calls or skipped | Variable | -| User interactions (30/day) | 30 × 2,000 tok = 60,000 | 30 × 4,000 tok = 120,000 | 60,000 tok | -| Context queries by TODO/tag | Hash table scan = 0 LLM tok | LLM-based search = 2,500 tok | 2,500 tok | -| *Total per day* | *~60,000 tok* | *~122,500 tok* | *~2x* | +| Operation | Passepartout | Competitor | Savings | +|-----------------------------+--------------------------------------------+--------------------------------+------------| +| Background maintenance | Deterministic heartbeat-driven = 0 LLM tok | Scheduled LLM calls or skipped | Variable | +| User interactions (30/day) | 30 × 2,000 tok = 60,000 | 30 × 4,000 tok = 120,000 | 60,000 tok | +| Context queries by TODO/tag | Hash table scan = 0 LLM tok | LLM-based search = 2,500 tok | 2,500 tok | +| *Total per day* | *~60,000 tok* | *~122,500 tok* | *~2x* | The defining advantage: background maintenance (compaction, archiving, link repair) costs zero LLM tokens. Competing systems either skip this or pay LLM costs for it. @@ -349,21 +349,21 @@ The crossover point where Passepartout becomes structurally cheaper is estimated Reduced context requirements change which model sizes deliver acceptable performance: -| Model | Passepartout Viability | Competitor Viability | -|-------|----------------------|---------------------| -| Phi-3-mini 3.8B (4K ctx) | Viable for structured tasks | Context starvation | -| Llama 3.1 8B (8K ctx) | Comfortable daily driver | Marginal | -| Qwen 2.5 7B (4K ctx) | Viable for most tasks | Not viable | -| Mistral 7B (8K ctx) | Comfortable | Marginal | -| Llama 3.1 70B (128K ctx) | Overkill (but works) | Comfortable | +| Model | Passepartout Viability | Competitor Viability | +|--------------------------+-----------------------------+----------------------| +| Phi-3-mini 3.8B (4K ctx) | Viable for structured tasks | Context starvation | +| Llama 3.1 8B (8K ctx) | Comfortable daily driver | Marginal | +| Qwen 2.5 7B (4K ctx) | Viable for most tasks | Not viable | +| Mistral 7B (8K ctx) | Comfortable | Marginal | +| Llama 3.1 70B (128K ctx) | Overkill (but works) | Comfortable | KV cache memory scales with context length: | Context Window | KV Cache (Llama 3.1 8B, FP16) | -|---------------|-------------------------------| -| 4K tokens | ~67 MB | -| 32K tokens | ~540 MB | -| 128K tokens | ~2.1 GB | +|----------------+-------------------------------| +| 4K tokens | ~67 MB | +| 32K tokens | ~540 MB | +| 128K tokens | ~2.1 GB | Passepartout at 4K effective context: ~67 MB KV cache. Competitor at 128K: ~2.1 GB. A 7-8B model on an RTX 3060 Ti (8 GB VRAM) or MacBook (16 GB unified memory) is a practical daily driver with Passepartout. Competitors at full context require 16-32 GB VRAM or cloud APIs. @@ -381,15 +381,15 @@ Passepartout at 4K effective context: ~67 MB KV cache. Competitor at 128K: ~2.1 ** Comparison Summary -| Metric | Passepartout | Claude Code | Hermes | OpenClaw | -|--------|-------------|-------------|--------|----------| -| Active context (tokens) | 2,000-4,000 | 10,000-50,000+ | 5,000-15,000/agent | 10,000-40,000 | -| File access cost (per file) | 200-800 tok | 1,500-5,000 tok | 1,500-5,000 tok × agents | 1,500-5,000 tok | -| Safety verification cost | 0 (deterministic) | 200-500 tok/action | 200-500 tok/action × agents | 100-300 tok/action | -| Agent coordination cost | 0 | 0 | 1,000-3,000 tok/task | 500-2,000 tok/task | -| Error recovery cost | 0 (REPL) | 500-2,000 tok/retry | 500-2,000 tok/retry × agents | 500-2,000 tok/retry | -| Long-term cost trend | Decreasing | Increasing | Increasing | Flat/Increasing | -| Min viable local model | 3-4B params, 4K ctx | 30-70B params, 32K+ ctx | 30-70B params, 32K+ ctx | 7-13B params, 8K+ ctx | -| Min VRAM for local | 4-6 GB | 16-32 GB | 24-48 GB | 8-16 GB | +| Metric | Passepartout | Claude Code | Hermes | OpenClaw | +|-----------------------------+---------------------+-------------------------+------------------------------+-----------------------| +| Active context (tokens) | 2,000-4,000 | 10,000-50,000+ | 5,000-15,000/agent | 10,000-40,000 | +| File access cost (per file) | 200-800 tok | 1,500-5,000 tok | 1,500-5,000 tok × agents | 1,500-5,000 tok | +| Safety verification cost | 0 (deterministic) | 200-500 tok/action | 200-500 tok/action × agents | 100-300 tok/action | +| Agent coordination cost | 0 | 0 | 1,000-3,000 tok/task | 500-2,000 tok/task | +| Error recovery cost | 0 (REPL) | 500-2,000 tok/retry | 500-2,000 tok/retry × agents | 500-2,000 tok/retry | +| Long-term cost trend | Decreasing | Increasing | Increasing | Flat/Increasing | +| Min viable local model | 3-4B params, 4K ctx | 30-70B params, 32K+ ctx | 30-70B params, 32K+ ctx | 7-13B params, 8K+ ctx | +| Min VRAM for local | 4-6 GB | 16-32 GB | 24-48 GB | 8-16 GB | *Conclusion:* Passepartout's architecture is designed to produce 2-3x token savings for coding, 13-24x for knowledge management, and 2x for life management at v1.0.0 maturity. The three structural advantages — sparse trees, deterministic safety, and REPL verification — compound. The critical risk is implementation gap: achieving the retrieval precision, dispatcher learning, and REPL integration depth required to realize the design. diff --git a/lisp/core-loop-perceive.lisp b/lisp/core-loop-perceive.lisp index 376be55..c932d27 100644 --- a/lisp/core-loop-perceive.lisp +++ b/lisp/core-loop-perceive.lisp @@ -69,7 +69,7 @@ (:approval-required (when (getf payload :approved) (log-message "GATE [Perceive]: Approved Flight Plan re-injected") - (setf (getf signal :approved t)) + (setf (getf signal :approved) t) (setf (getf signal :approved-action) (getf payload :action)))) ;; Default sensor: pass through without requiring user-input processing (otherwise diff --git a/org/core-loop-act.org b/org/core-loop-act.org index 0d0bf84..5c3c6ea 100644 --- a/org/core-loop-act.org +++ b/org/core-loop-act.org @@ -205,21 +205,20 @@ For approval-required actions, creates a Flight Plan instead of executing." (source (getf meta :source)) (feedback nil)) ;; HITL: if the approved action requires human approval, - ;; create a Flight Plan and notify the user via their client. + ;; create a Flight Plan (Emacs) and HITL entry (all gateways). (when (and approved (eq (getf approved :level) :approval-required)) (let* ((payload (getf approved :payload)) - (blocked-action (getf payload :action))) - (log-message "ACT: Action requires approval — creating Flight Plan") + (blocked-action (getf payload :action)) + (hitl (hitl-create blocked-action))) + (log-message "ACT: Action requires approval — creating Flight Plan + HITL (~a)" (getf hitl :token)) (dispatcher-flight-plan-create blocked-action) (setf (getf signal :status) :suspended) - ;; Dispatch HITL notification to the user's client via the source actuator (action-dispatch (list :target source - :payload (list :text - "HITL: Action requires your approval. Check Flight Plan and set TODO to APPROVED.")) + :payload (list :text (getf hitl :message))) signal) - (setf approved nil) ;; Don't execute the original action - (setf feedback nil))) ;; Don't loop back — wait for human + (setf approved nil) + (setf feedback nil))) (when approved (let* ((original-type (getf approved :type)) (verified (cognitive-verify approved signal))) diff --git a/org/core-loop-perceive.org b/org/core-loop-perceive.org index a222a00..e1082c2 100644 --- a/org/core-loop-perceive.org +++ b/org/core-loop-perceive.org @@ -146,7 +146,7 @@ All signals get tagged with their processing stage (`:status :perceived`) and th (:approval-required (when (getf payload :approved) (log-message "GATE [Perceive]: Approved Flight Plan re-injected") - (setf (getf signal :approved t)) + (setf (getf signal :approved) t) (setf (getf signal :approved-action) (getf payload :action)))) ;; Default sensor: pass through without requiring user-input processing (otherwise diff --git a/org/security-dispatcher.org b/org/security-dispatcher.org index 5956a78..e4b215b 100644 --- a/org/security-dispatcher.org +++ b/org/security-dispatcher.org @@ -431,7 +431,7 @@ privacy tags, privacy text, shell safety, network exfil, high-impact approval." ;; REPL-VERIFIED: 2026-05-03T13:00:00 #+begin_src lisp (defun dispatcher-flight-plan-create (blocked-action) - "Creates a Flight Plan node for manual approval." + "Creates a Flight Plan node for manual approval in Emacs." (let ((id (org-id-generate))) (log-message "BOUNCER: Creating flight plan node '~a'..." id) (list :type :REQUEST :target :emacs @@ -441,6 +441,96 @@ privacy tags, privacy text, shell safety, network exfil, high-impact approval." :ACTION (format nil "~s" blocked-action)))))) #+end_src +** HITL In-Memory Store (Gateway-Agnostic Approval) + +For TUI, CLI, and Signal/Telegram users who don't have Emacs. Pending +actions are stored in memory with a correlation token. The user replies +with the token to approve or deny. + +;; REPL-VERIFIED: 2026-05-03T13:00:00 +#+begin_src lisp +(defvar *hitl-pending* (make-hash-table :test 'equal) + "Maps correlation token → blocked-action plist for pending HITL approvals.") +#+end_src + +;; REPL-VERIFIED: 2026-05-03T13:00:00 +#+begin_src lisp +(defun hitl-create (blocked-action) + "Saves a blocked action for HITL approval. Returns a plist with +:token (the correlation ID) and :message (user-facing text)." + (let* ((token (format nil "HITL-~a" (subseq (org-id-generate) 3 11)))) + (setf (gethash token *hitl-pending*) blocked-action) + (log-message "HITL: Created pending approval ~a" token) + (list :token token + :message (format nil "HITL: Action requires approval [~a]. Reply /approve ~a to approve." token token)))) +#+end_src + +;; REPL-VERIFIED: 2026-05-03T13:00:00 +#+begin_src lisp +(defun hitl-approve (token) + "Approves a pending HITL action by token. Re-injects with :approved t. +Returns T if found and approved, nil if token is invalid." + (let ((action (gethash token *hitl-pending*))) + (if action + (progn + (remhash token *hitl-pending*) + (setf (getf action :approved) t) + (stimulus-inject (list :type :EVENT + :payload (list :sensor :approval-required + :action action + :approved t) + :meta (list :source :system))) + (log-message "HITL: Approved ~a — re-injected" token) + t) + (progn + (log-message "HITL: Token ~a not found in pending" token) + nil)))) +#+end_src + +;; REPL-VERIFIED: 2026-05-03T13:00:00 +#+begin_src lisp +(defun hitl-deny (token) + "Denies a pending HITL action by token. Removes it from the pending store. +Returns T if found, nil if token is invalid." + (if (gethash token *hitl-pending*) + (progn + (remhash token *hitl-pending*) + (log-message "HITL: Denied ~a" token) + t) + (progn + (log-message "HITL: Token ~a not found in pending" token) + nil))) +#+end_src + +;; REPL-VERIFIED: 2026-05-03T13:00:00 +#+begin_src lisp +(defun hitl-handle-message (text &optional source) + "Checks if TEXT is a HITL approval or denial command. +If it matches, processes the command and returns T. +Otherwise returns nil (text should be handled as normal input). +Recognized formats: + /approve HITL-abc123 + /deny HITL-abc123 + approve HITL-abc123 + deny HITL-abc123" + (let ((text (string-trim '(#\Space) (or text "")))) + (when (or (uiop:string-prefix-p "/approve" text :test #'char-equal) + (uiop:string-prefix-p "approve" text :test #'char-equal)) + (let* ((parts (uiop:split-string text :separator '(#\Space #\Tab))) + (token (when (> (length parts) 1) (second parts)))) + (when (and token (hitl-approve token)) + (log-message "HITL: Approved via ~a — ~a" (or source :unknown) token) + (return-from hitl-handle-message t)))) + (when (or (uiop:string-prefix-p "/deny" text :test #'char-equal) + (uiop:string-prefix-p "deny " text :test #'char-equal)) + (let* ((parts (uiop:split-string text :separator '(#\Space #\Tab))) + (token (when (> (length parts) 1) (second parts)))) + (when (and token (hitl-deny token)) + (log-message "HITL: Denied via ~a — ~a" (or source :unknown) token) + (return-from hitl-handle-message t)))) + nil)) +#+end_src + ** Gate Logic (dispatcher-gate) ;; REPL-VERIFIED: 2026-05-03T13:00:00 #+begin_src lisp