v0.8.2: cleanup + prose + structure + decomposition + budget + errors

Phase 1 — dedup + hardening (~9 items):
- Remove duplicate *skill-registry* defvar from core-skills
- Merge *backend-registry* into *probabilistic-backends*, delete backend-register
- Remove inject-stimulus alias, standardize on stimulus-inject
- Add pre-eval sandbox (skill-source-scan) blocks restricted symbols before eval
- Remove dead plist-get function; remove duplicate json-alist-to-plist export
- Fix read-framed-message whitespace DoS (4096-iteration max)
- Add *read-eval* nil to dispatcher-approvals-process read-from-string (RCE)
- Add test-op to ASDF; update .asd version 0.4.3→0.7.2

Phase 2 — prose + contracts + reorder:
- Split ROADMAP: 2623→1089 lines (TODO only), CHANGELOG: 260→1528 lines (full DONE history, 14 versions reverse chron)
- Add Contracts + Overview to 6 channel files + embedding-native + programming-standards + symbolic-scope
- Reorder 28 .org files: Contract → Test Suite → Implementation (TDD order)
- Add 7-phase inline prose to think() in core-reason
- Expand USER_MANUAL: 183→461 lines (10 new sections)

Phase 3 — decomposition + export organization:
- Decompose think() into think-assemble-prompt, think-call-llm, think-parse-response orchestrator
- Organize 188 exports into 16 grouped sections by module

Phase 4 — budget enforcement + error protocol:
- Per-session budget enforcement (SESSION_BUDGET_USD env var, budget-exhausted-p, guard in think-call-llm)
- Error condition hierarchy (6 conditions: pipeline-error, llm-error, gate-error, budget-error, protocol-error)
- Restarts in loop-process: skip-signal, use-fallback, abort-pipeline
This commit is contained in:
2026-05-10 09:07:44 -04:00
parent 27d203ad67
commit 8fd56dece3
68 changed files with 7014 additions and 6521 deletions

View File

@@ -1,3 +1,76 @@
(eval-when (:compile-toplevel :load-toplevel :execute)
(ql:quickload :fiveam :silent t))
(defpackage :passepartout-cost-tests
(:use :cl :fiveam :passepartout)
(:export #:cost-suite))
(in-package :passepartout-cost-tests)
(def-suite cost-suite :description "Cost tracking and budget management")
(in-suite cost-suite)
(test test-cost-track-call
"Contract 1: cost-track-call returns a positive number."
(cost-session-reset)
(let ((cost (cost-track-call :deepseek "hello world")))
(is (numberp cost))
(is (> cost 0.0))))
(test test-cost-session-total-accumulates
"Contract 2: session total grows with multiple calls."
(cost-session-reset)
(cost-track-call :deepseek "hello")
(cost-track-call :deepseek "world")
(let ((total (cost-session-total)))
(is (> total 0.0))
(is (= 2 (cost-session-calls)))))
(test test-cost-session-reset
"Contract 3: cost-session-reset zeroes the accumulator."
(cost-session-reset)
(cost-track-call :deepseek "hello")
(is (> (cost-session-total) 0.0))
(cost-session-reset)
(is (= 0.0 (cost-session-total)))
(is (= 0 (cost-session-calls))))
(test test-cost-format-budget-status
"Contract 4: format-budget-status returns a string."
(cost-session-reset)
(cost-track-call :deepseek "hello world")
(let ((status (cost-format-budget-status 100)))
(is (stringp status))
(is (search "$" status))))
(test test-cost-by-provider
"Contract: cost-by-provider returns per-provider breakdown."
(cost-session-reset)
(cost-track-call :deepseek "a")
(cost-track-call :groq "b")
(let ((by (cost-by-provider)))
(is (listp by))
(is (assoc :deepseek by))
(is (assoc :groq by))))
(test test-cost-track-no-response
"Contract 1: cost-track-call works without response-text."
(cost-session-reset)
(let ((cost (cost-track-call :deepseek "test")))
(is (> cost 0.0))))
(test test-cost-session-summary
"Contract 5: cost-session-summary returns plist with total, calls, by-provider."
(cost-session-reset)
(cost-track-call :deepseek "hello")
(cost-track-call :groq "world")
(let ((s (cost-session-summary)))
(is (> (getf s :total) 0.0))
(is (= 2 (getf s :calls)))
(let ((by (getf s :by-provider)))
(is (assoc :deepseek by))
(is (assoc :groq by)))))
(in-package :passepartout)
(defvar *session-cost* (list :total 0.0 :calls 0 :by-provider nil)
@@ -82,75 +155,36 @@ If DAILY-BUDGET is provided, includes percentage of budget used."
"Track cost of a backend cascade call."
(cost-track-call backend prompt-text response-text))
(eval-when (:compile-toplevel :load-toplevel :execute)
(ql:quickload :fiveam :silent t))
(defvar *session-budget*
(ignore-errors (read-from-string (uiop:getenv "SESSION_BUDGET_USD")))
"Maximum USD to spend in this session. NIL means no limit.")
(defpackage :passepartout-cost-tests
(:use :cl :fiveam :passepartout)
(:export #:cost-suite))
(defun budget-remaining-usd ()
"Returns remaining budget in USD, or a large sentinel if unlimited."
(if *session-budget*
(let ((remaining (- *session-budget* (cost-session-total))))
(if (< remaining 0) 0.0 remaining))
most-positive-double-float))
(in-package :passepartout-cost-tests)
(defun budget-exhausted-p ()
"T if the session budget is set and fully consumed."
(and *session-budget* (<= (budget-remaining-usd) 0.0)))
(def-suite cost-suite :description "Cost tracking and budget management")
(in-suite cost-suite)
(defun budget-estimate-call (prompt-text)
"Estimate the dollar cost of a pending LLM call from its prompt text.
Returns 0.0 if the tokenizer is not loaded (allows call through)."
(if (fboundp 'count-tokens)
(let* ((tokens (funcall (symbol-function 'count-tokens) (or prompt-text "")))
(cost (provider-token-cost (first *provider-cascade*) tokens)))
cost)
0.0))
(test test-cost-track-call
"Contract 1: cost-track-call returns a positive number."
(cost-session-reset)
(let ((cost (cost-track-call :deepseek "hello world")))
(is (numberp cost))
(is (> cost 0.0))))
(test test-cost-session-total-accumulates
"Contract 2: session total grows with multiple calls."
(cost-session-reset)
(cost-track-call :deepseek "hello")
(cost-track-call :deepseek "world")
(let ((total (cost-session-total)))
(is (> total 0.0))
(is (= 2 (cost-session-calls)))))
(test test-cost-session-reset
"Contract 3: cost-session-reset zeroes the accumulator."
(cost-session-reset)
(cost-track-call :deepseek "hello")
(is (> (cost-session-total) 0.0))
(cost-session-reset)
(is (= 0.0 (cost-session-total)))
(is (= 0 (cost-session-calls))))
(test test-cost-format-budget-status
"Contract 4: format-budget-status returns a string."
(cost-session-reset)
(cost-track-call :deepseek "hello world")
(let ((status (cost-format-budget-status 100)))
(is (stringp status))
(is (search "$" status))))
(test test-cost-by-provider
"Contract: cost-by-provider returns per-provider breakdown."
(cost-session-reset)
(cost-track-call :deepseek "a")
(cost-track-call :groq "b")
(let ((by (cost-by-provider)))
(is (listp by))
(is (assoc :deepseek by))
(is (assoc :groq by))))
(test test-cost-track-no-response
"Contract 1: cost-track-call works without response-text."
(cost-session-reset)
(let ((cost (cost-track-call :deepseek "test")))
(is (> cost 0.0))))
(test test-cost-session-summary
"Contract 5: cost-session-summary returns plist with total, calls, by-provider."
(cost-session-reset)
(cost-track-call :deepseek "hello")
(cost-track-call :groq "world")
(let ((s (cost-session-summary)))
(is (> (getf s :total) 0.0))
(is (= 2 (getf s :calls)))
(let ((by (getf s :by-provider)))
(is (assoc :deepseek by))
(is (assoc :groq by)))))
(defun budget-exhaustion-message ()
"Returns a user-facing plist explaining that the budget is spent."
(let ((total (cost-session-total))
(cap *session-budget*))
(list :TYPE :REQUEST
:PAYLOAD (list :ACTION :MESSAGE
:TEXT (format nil "Session budget exhausted: $~,4f of $~,2f spent. Raise SESSION_BUDGET_USD or reset with /cost-reset to continue."
total cap)
:EXPLANATION "Budget cap reached. No LLM calls will be made until the limit is raised."))))