v0.8.2: cleanup + prose + structure + decomposition + budget + errors
Phase 1 — dedup + hardening (~9 items): - Remove duplicate *skill-registry* defvar from core-skills - Merge *backend-registry* into *probabilistic-backends*, delete backend-register - Remove inject-stimulus alias, standardize on stimulus-inject - Add pre-eval sandbox (skill-source-scan) blocks restricted symbols before eval - Remove dead plist-get function; remove duplicate json-alist-to-plist export - Fix read-framed-message whitespace DoS (4096-iteration max) - Add *read-eval* nil to dispatcher-approvals-process read-from-string (RCE) - Add test-op to ASDF; update .asd version 0.4.3→0.7.2 Phase 2 — prose + contracts + reorder: - Split ROADMAP: 2623→1089 lines (TODO only), CHANGELOG: 260→1528 lines (full DONE history, 14 versions reverse chron) - Add Contracts + Overview to 6 channel files + embedding-native + programming-standards + symbolic-scope - Reorder 28 .org files: Contract → Test Suite → Implementation (TDD order) - Add 7-phase inline prose to think() in core-reason - Expand USER_MANUAL: 183→461 lines (10 new sections) Phase 3 — decomposition + export organization: - Decompose think() into think-assemble-prompt, think-call-llm, think-parse-response orchestrator - Organize 188 exports into 16 grouped sections by module Phase 4 — budget enforcement + error protocol: - Per-session budget enforcement (SESSION_BUDGET_USD env var, budget-exhausted-p, guard in think-call-llm) - Error condition hierarchy (6 conditions: pipeline-error, llm-error, gate-error, budget-error, protocol-error) - Restarts in loop-process: skip-signal, use-fallback, abort-pipeline
This commit is contained in:
@@ -1,3 +1,75 @@
|
||||
(eval-when (:compile-toplevel :load-toplevel :execute)
|
||||
(ql:quickload :fiveam :silent t))
|
||||
|
||||
(defpackage :passepartout-tokenizer-tests
|
||||
(:use :cl :fiveam :passepartout)
|
||||
(:export #:tokenizer-suite))
|
||||
|
||||
(in-package :passepartout-tokenizer-tests)
|
||||
|
||||
(def-suite tokenizer-suite :description "Token counting and cost estimation")
|
||||
(in-suite tokenizer-suite)
|
||||
|
||||
(test test-count-tokens-default
|
||||
"Contract 1: count-tokens returns non-zero for a non-empty string."
|
||||
(let ((count (count-tokens "hello world")))
|
||||
(is (> count 0))
|
||||
(is (integerp count))))
|
||||
|
||||
(test test-count-tokens-known-model
|
||||
"Contract 1: count-tokens with a known model returns a count."
|
||||
(let ((count (count-tokens "hello world" :model :gpt-4o-mini)))
|
||||
(is (> count 0))
|
||||
(is (integerp count))))
|
||||
|
||||
(test test-count-tokens-unknown-model
|
||||
"Contract 1: count-tokens with an unknown model falls back to default."
|
||||
(let ((count (count-tokens "hello world" :model :unknown-model-xyz)))
|
||||
(is (> count 0))
|
||||
(is (integerp count))))
|
||||
|
||||
(test test-count-tokens-empty
|
||||
"Contract 1: count-tokens on empty string returns 0."
|
||||
(let ((count (count-tokens "")))
|
||||
(is (= 0 count))))
|
||||
|
||||
(test test-model-token-ratio-known
|
||||
"Contract 2: known model returns correct ratio."
|
||||
(is (= 4.0 (model-token-ratio :gpt-4o-mini)))
|
||||
(is (= 4.5 (model-token-ratio :claude-3-5-sonnet)))
|
||||
(is (= 3.5 (model-token-ratio :llama-3.1-70b))))
|
||||
|
||||
(test test-model-token-ratio-unknown
|
||||
"Contract 2: unknown model returns default ratio."
|
||||
(is (= 4.0 (model-token-ratio :unknown-model-abc))))
|
||||
|
||||
(test test-token-cost-known
|
||||
"Contract 3: token-cost returns a number for known model."
|
||||
(let ((cost (token-cost :gpt-4o-mini 1000)))
|
||||
(is (numberp cost))
|
||||
(is (> cost 0.0))))
|
||||
|
||||
(test test-token-cost-unknown
|
||||
"Contract 3: token-cost returns 0.0 for unknown model."
|
||||
(is (= 0.0 (token-cost :no-such-model 1000))))
|
||||
|
||||
(test test-provider-token-cost
|
||||
"Contract: provider-token-cost maps provider to model price."
|
||||
(let ((cost (provider-token-cost :deepseek 1000)))
|
||||
(is (numberp cost))
|
||||
(is (> cost 0.0))))
|
||||
|
||||
(test test-count-tokens-ratio-sensitivity
|
||||
"Contract 1: longer text produces proportionally more tokens."
|
||||
(let ((short (count-tokens "hi" :model :gpt-4o-mini))
|
||||
(long (count-tokens "this is a much longer piece of text with many words in it" :model :gpt-4o-mini)))
|
||||
(is (> long short))))
|
||||
|
||||
(test test-count-tokens-non-string
|
||||
"Contract 1: non-string values are coerced and counted."
|
||||
(let ((count (count-tokens 12345)))
|
||||
(is (> count 0))))
|
||||
|
||||
(in-package :passepartout)
|
||||
|
||||
(defparameter *model-token-ratios*
|
||||
@@ -72,75 +144,3 @@ Uses the provider's default model for pricing."
|
||||
(if model
|
||||
(token-cost model token-count)
|
||||
0.0)))
|
||||
|
||||
(eval-when (:compile-toplevel :load-toplevel :execute)
|
||||
(ql:quickload :fiveam :silent t))
|
||||
|
||||
(defpackage :passepartout-tokenizer-tests
|
||||
(:use :cl :fiveam :passepartout)
|
||||
(:export #:tokenizer-suite))
|
||||
|
||||
(in-package :passepartout-tokenizer-tests)
|
||||
|
||||
(def-suite tokenizer-suite :description "Token counting and cost estimation")
|
||||
(in-suite tokenizer-suite)
|
||||
|
||||
(test test-count-tokens-default
|
||||
"Contract 1: count-tokens returns non-zero for a non-empty string."
|
||||
(let ((count (count-tokens "hello world")))
|
||||
(is (> count 0))
|
||||
(is (integerp count))))
|
||||
|
||||
(test test-count-tokens-known-model
|
||||
"Contract 1: count-tokens with a known model returns a count."
|
||||
(let ((count (count-tokens "hello world" :model :gpt-4o-mini)))
|
||||
(is (> count 0))
|
||||
(is (integerp count))))
|
||||
|
||||
(test test-count-tokens-unknown-model
|
||||
"Contract 1: count-tokens with an unknown model falls back to default."
|
||||
(let ((count (count-tokens "hello world" :model :unknown-model-xyz)))
|
||||
(is (> count 0))
|
||||
(is (integerp count))))
|
||||
|
||||
(test test-count-tokens-empty
|
||||
"Contract 1: count-tokens on empty string returns 0."
|
||||
(let ((count (count-tokens "")))
|
||||
(is (= 0 count))))
|
||||
|
||||
(test test-model-token-ratio-known
|
||||
"Contract 2: known model returns correct ratio."
|
||||
(is (= 4.0 (model-token-ratio :gpt-4o-mini)))
|
||||
(is (= 4.5 (model-token-ratio :claude-3-5-sonnet)))
|
||||
(is (= 3.5 (model-token-ratio :llama-3.1-70b))))
|
||||
|
||||
(test test-model-token-ratio-unknown
|
||||
"Contract 2: unknown model returns default ratio."
|
||||
(is (= 4.0 (model-token-ratio :unknown-model-abc))))
|
||||
|
||||
(test test-token-cost-known
|
||||
"Contract 3: token-cost returns a number for known model."
|
||||
(let ((cost (token-cost :gpt-4o-mini 1000)))
|
||||
(is (numberp cost))
|
||||
(is (> cost 0.0))))
|
||||
|
||||
(test test-token-cost-unknown
|
||||
"Contract 3: token-cost returns 0.0 for unknown model."
|
||||
(is (= 0.0 (token-cost :no-such-model 1000))))
|
||||
|
||||
(test test-provider-token-cost
|
||||
"Contract: provider-token-cost maps provider to model price."
|
||||
(let ((cost (provider-token-cost :deepseek 1000)))
|
||||
(is (numberp cost))
|
||||
(is (> cost 0.0))))
|
||||
|
||||
(test test-count-tokens-ratio-sensitivity
|
||||
"Contract 1: longer text produces proportionally more tokens."
|
||||
(let ((short (count-tokens "hi" :model :gpt-4o-mini))
|
||||
(long (count-tokens "this is a much longer piece of text with many words in it" :model :gpt-4o-mini)))
|
||||
(is (> long short))))
|
||||
|
||||
(test test-count-tokens-non-string
|
||||
"Contract 1: non-string values are coerced and counted."
|
||||
(let ((count (count-tokens 12345)))
|
||||
(is (> count 0))))
|
||||
|
||||
Reference in New Issue
Block a user