v0.8.2: cleanup + prose + structure + decomposition + budget + errors

Phase 1 — dedup + hardening (~9 items): - Remove duplicate *skill-registry* defvar from core-skills - Merge *backend-registry* into *probabilistic-backends*, delete backend-register - Remove inject-stimulus alias, standardize on stimulus-inject - Add pre-eval sandbox (skill-source-scan) blocks restricted symbols before eval - Remove dead plist-get function; remove duplicate json-alist-to-plist export - Fix read-framed-message whitespace DoS (4096-iteration max) - Add *read-eval* nil to dispatcher-approvals-process read-from-string (RCE) - Add test-op to ASDF; update .asd version 0.4.3→0.7.2 Phase 2 — prose + contracts + reorder: - Split ROADMAP: 2623→1089 lines (TODO only), CHANGELOG: 260→1528 lines (full DONE history, 14 versions reverse chron) - Add Contracts + Overview to 6 channel files + embedding-native + programming-standards + symbolic-scope - Reorder 28 .org files: Contract → Test Suite → Implementation (TDD order) - Add 7-phase inline prose to think() in core-reason - Expand USER_MANUAL: 183→461 lines (10 new sections) Phase 3 — decomposition + export organization: - Decompose think() into think-assemble-prompt, think-call-llm, think-parse-response orchestrator - Organize 188 exports into 16 grouped sections by module Phase 4 — budget enforcement + error protocol: - Per-session budget enforcement (SESSION_BUDGET_USD env var, budget-exhausted-p, guard in think-call-llm) - Error condition hierarchy (6 conditions: pipeline-error, llm-error, gate-error, budget-error, protocol-error) - Restarts in loop-process: skip-signal, use-fallback, abort-pipeline
2026-05-10 09:07:44 -04:00
parent 27d203ad67
commit 8fd56dece3
68 changed files with 7014 additions and 6521 deletions
--- a/lisp/tokenizer.lisp
+++ b/lisp/tokenizer.lisp
@@ -1,3 +1,75 @@
+(eval-when (:compile-toplevel :load-toplevel :execute)
+  (ql:quickload :fiveam :silent t))
+
+(defpackage :passepartout-tokenizer-tests
+  (:use :cl :fiveam :passepartout)
+  (:export #:tokenizer-suite))
+
+(in-package :passepartout-tokenizer-tests)
+
+(def-suite tokenizer-suite :description "Token counting and cost estimation")
+(in-suite tokenizer-suite)
+
+(test test-count-tokens-default
+  "Contract 1: count-tokens returns non-zero for a non-empty string."
+  (let ((count (count-tokens "hello world")))
+    (is (> count 0))
+    (is (integerp count))))
+
+(test test-count-tokens-known-model
+  "Contract 1: count-tokens with a known model returns a count."
+  (let ((count (count-tokens "hello world" :model :gpt-4o-mini)))
+    (is (> count 0))
+    (is (integerp count))))
+
+(test test-count-tokens-unknown-model
+  "Contract 1: count-tokens with an unknown model falls back to default."
+  (let ((count (count-tokens "hello world" :model :unknown-model-xyz)))
+    (is (> count 0))
+    (is (integerp count))))
+
+(test test-count-tokens-empty
+  "Contract 1: count-tokens on empty string returns 0."
+  (let ((count (count-tokens "")))
+    (is (= 0 count))))
+
+(test test-model-token-ratio-known
+  "Contract 2: known model returns correct ratio."
+  (is (= 4.0 (model-token-ratio :gpt-4o-mini)))
+  (is (= 4.5 (model-token-ratio :claude-3-5-sonnet)))
+  (is (= 3.5 (model-token-ratio :llama-3.1-70b))))
+
+(test test-model-token-ratio-unknown
+  "Contract 2: unknown model returns default ratio."
+  (is (= 4.0 (model-token-ratio :unknown-model-abc))))
+
+(test test-token-cost-known
+  "Contract 3: token-cost returns a number for known model."
+  (let ((cost (token-cost :gpt-4o-mini 1000)))
+    (is (numberp cost))
+    (is (> cost 0.0))))
+
+(test test-token-cost-unknown
+  "Contract 3: token-cost returns 0.0 for unknown model."
+  (is (= 0.0 (token-cost :no-such-model 1000))))
+
+(test test-provider-token-cost
+  "Contract: provider-token-cost maps provider to model price."
+  (let ((cost (provider-token-cost :deepseek 1000)))
+    (is (numberp cost))
+    (is (> cost 0.0))))
+
+(test test-count-tokens-ratio-sensitivity
+  "Contract 1: longer text produces proportionally more tokens."
+  (let ((short (count-tokens "hi" :model :gpt-4o-mini))
+        (long  (count-tokens "this is a much longer piece of text with many words in it" :model :gpt-4o-mini)))
+    (is (> long short))))
+
+(test test-count-tokens-non-string
+  "Contract 1: non-string values are coerced and counted."
+  (let ((count (count-tokens 12345)))
+    (is (> count 0))))
+
 (in-package :passepartout)

 (defparameter *model-token-ratios*
@@ -72,75 +144,3 @@ Uses the provider's default model for pricing."
    (if model
        (token-cost model token-count)
        0.0)))
-
-(eval-when (:compile-toplevel :load-toplevel :execute)
-  (ql:quickload :fiveam :silent t))
-
-(defpackage :passepartout-tokenizer-tests
-  (:use :cl :fiveam :passepartout)
-  (:export #:tokenizer-suite))
-
-(in-package :passepartout-tokenizer-tests)
-
-(def-suite tokenizer-suite :description "Token counting and cost estimation")
-(in-suite tokenizer-suite)
-
-(test test-count-tokens-default
-  "Contract 1: count-tokens returns non-zero for a non-empty string."
-  (let ((count (count-tokens "hello world")))
-    (is (> count 0))
-    (is (integerp count))))
-
-(test test-count-tokens-known-model
-  "Contract 1: count-tokens with a known model returns a count."
-  (let ((count (count-tokens "hello world" :model :gpt-4o-mini)))
-    (is (> count 0))
-    (is (integerp count))))
-
-(test test-count-tokens-unknown-model
-  "Contract 1: count-tokens with an unknown model falls back to default."
-  (let ((count (count-tokens "hello world" :model :unknown-model-xyz)))
-    (is (> count 0))
-    (is (integerp count))))
-
-(test test-count-tokens-empty
-  "Contract 1: count-tokens on empty string returns 0."
-  (let ((count (count-tokens "")))
-    (is (= 0 count))))
-
-(test test-model-token-ratio-known
-  "Contract 2: known model returns correct ratio."
-  (is (= 4.0 (model-token-ratio :gpt-4o-mini)))
-  (is (= 4.5 (model-token-ratio :claude-3-5-sonnet)))
-  (is (= 3.5 (model-token-ratio :llama-3.1-70b))))
-
-(test test-model-token-ratio-unknown
-  "Contract 2: unknown model returns default ratio."
-  (is (= 4.0 (model-token-ratio :unknown-model-abc))))
-
-(test test-token-cost-known
-  "Contract 3: token-cost returns a number for known model."
-  (let ((cost (token-cost :gpt-4o-mini 1000)))
-    (is (numberp cost))
-    (is (> cost 0.0))))
-
-(test test-token-cost-unknown
-  "Contract 3: token-cost returns 0.0 for unknown model."
-  (is (= 0.0 (token-cost :no-such-model 1000))))
-
-(test test-provider-token-cost
-  "Contract: provider-token-cost maps provider to model price."
-  (let ((cost (provider-token-cost :deepseek 1000)))
-    (is (numberp cost))
-    (is (> cost 0.0))))
-
-(test test-count-tokens-ratio-sensitivity
-  "Contract 1: longer text produces proportionally more tokens."
-  (let ((short (count-tokens "hi" :model :gpt-4o-mini))
-        (long  (count-tokens "this is a much longer piece of text with many words in it" :model :gpt-4o-mini)))
-    (is (> long short))))
-
-(test test-count-tokens-non-string
-  "Contract 1: non-string values are coerced and counted."
-  (let ((count (count-tokens 12345)))
-    (is (> count 0))))