v0.8.2: cleanup + prose + structure + decomposition + budget + errors
Phase 1 — dedup + hardening (~9 items): - Remove duplicate *skill-registry* defvar from core-skills - Merge *backend-registry* into *probabilistic-backends*, delete backend-register - Remove inject-stimulus alias, standardize on stimulus-inject - Add pre-eval sandbox (skill-source-scan) blocks restricted symbols before eval - Remove dead plist-get function; remove duplicate json-alist-to-plist export - Fix read-framed-message whitespace DoS (4096-iteration max) - Add *read-eval* nil to dispatcher-approvals-process read-from-string (RCE) - Add test-op to ASDF; update .asd version 0.4.3→0.7.2 Phase 2 — prose + contracts + reorder: - Split ROADMAP: 2623→1089 lines (TODO only), CHANGELOG: 260→1528 lines (full DONE history, 14 versions reverse chron) - Add Contracts + Overview to 6 channel files + embedding-native + programming-standards + symbolic-scope - Reorder 28 .org files: Contract → Test Suite → Implementation (TDD order) - Add 7-phase inline prose to think() in core-reason - Expand USER_MANUAL: 183→461 lines (10 new sections) Phase 3 — decomposition + export organization: - Decompose think() into think-assemble-prompt, think-call-llm, think-parse-response orchestrator - Organize 188 exports into 16 grouped sections by module Phase 4 — budget enforcement + error protocol: - Per-session budget enforcement (SESSION_BUDGET_USD env var, budget-exhausted-p, guard in think-call-llm) - Error condition hierarchy (6 conditions: pipeline-error, llm-error, gate-error, budget-error, protocol-error) - Restarts in loop-process: skip-signal, use-fallback, abort-pipeline
This commit is contained in:
@@ -1,3 +1,76 @@
|
||||
(eval-when (:compile-toplevel :load-toplevel :execute)
|
||||
(ql:quickload :fiveam :silent t))
|
||||
|
||||
(defpackage :passepartout-cost-tests
|
||||
(:use :cl :fiveam :passepartout)
|
||||
(:export #:cost-suite))
|
||||
|
||||
(in-package :passepartout-cost-tests)
|
||||
|
||||
(def-suite cost-suite :description "Cost tracking and budget management")
|
||||
(in-suite cost-suite)
|
||||
|
||||
(test test-cost-track-call
|
||||
"Contract 1: cost-track-call returns a positive number."
|
||||
(cost-session-reset)
|
||||
(let ((cost (cost-track-call :deepseek "hello world")))
|
||||
(is (numberp cost))
|
||||
(is (> cost 0.0))))
|
||||
|
||||
(test test-cost-session-total-accumulates
|
||||
"Contract 2: session total grows with multiple calls."
|
||||
(cost-session-reset)
|
||||
(cost-track-call :deepseek "hello")
|
||||
(cost-track-call :deepseek "world")
|
||||
(let ((total (cost-session-total)))
|
||||
(is (> total 0.0))
|
||||
(is (= 2 (cost-session-calls)))))
|
||||
|
||||
(test test-cost-session-reset
|
||||
"Contract 3: cost-session-reset zeroes the accumulator."
|
||||
(cost-session-reset)
|
||||
(cost-track-call :deepseek "hello")
|
||||
(is (> (cost-session-total) 0.0))
|
||||
(cost-session-reset)
|
||||
(is (= 0.0 (cost-session-total)))
|
||||
(is (= 0 (cost-session-calls))))
|
||||
|
||||
(test test-cost-format-budget-status
|
||||
"Contract 4: format-budget-status returns a string."
|
||||
(cost-session-reset)
|
||||
(cost-track-call :deepseek "hello world")
|
||||
(let ((status (cost-format-budget-status 100)))
|
||||
(is (stringp status))
|
||||
(is (search "$" status))))
|
||||
|
||||
(test test-cost-by-provider
|
||||
"Contract: cost-by-provider returns per-provider breakdown."
|
||||
(cost-session-reset)
|
||||
(cost-track-call :deepseek "a")
|
||||
(cost-track-call :groq "b")
|
||||
(let ((by (cost-by-provider)))
|
||||
(is (listp by))
|
||||
(is (assoc :deepseek by))
|
||||
(is (assoc :groq by))))
|
||||
|
||||
(test test-cost-track-no-response
|
||||
"Contract 1: cost-track-call works without response-text."
|
||||
(cost-session-reset)
|
||||
(let ((cost (cost-track-call :deepseek "test")))
|
||||
(is (> cost 0.0))))
|
||||
|
||||
(test test-cost-session-summary
|
||||
"Contract 5: cost-session-summary returns plist with total, calls, by-provider."
|
||||
(cost-session-reset)
|
||||
(cost-track-call :deepseek "hello")
|
||||
(cost-track-call :groq "world")
|
||||
(let ((s (cost-session-summary)))
|
||||
(is (> (getf s :total) 0.0))
|
||||
(is (= 2 (getf s :calls)))
|
||||
(let ((by (getf s :by-provider)))
|
||||
(is (assoc :deepseek by))
|
||||
(is (assoc :groq by)))))
|
||||
|
||||
(in-package :passepartout)
|
||||
|
||||
(defvar *session-cost* (list :total 0.0 :calls 0 :by-provider nil)
|
||||
@@ -82,75 +155,36 @@ If DAILY-BUDGET is provided, includes percentage of budget used."
|
||||
"Track cost of a backend cascade call."
|
||||
(cost-track-call backend prompt-text response-text))
|
||||
|
||||
(eval-when (:compile-toplevel :load-toplevel :execute)
|
||||
(ql:quickload :fiveam :silent t))
|
||||
(defvar *session-budget*
|
||||
(ignore-errors (read-from-string (uiop:getenv "SESSION_BUDGET_USD")))
|
||||
"Maximum USD to spend in this session. NIL means no limit.")
|
||||
|
||||
(defpackage :passepartout-cost-tests
|
||||
(:use :cl :fiveam :passepartout)
|
||||
(:export #:cost-suite))
|
||||
(defun budget-remaining-usd ()
|
||||
"Returns remaining budget in USD, or a large sentinel if unlimited."
|
||||
(if *session-budget*
|
||||
(let ((remaining (- *session-budget* (cost-session-total))))
|
||||
(if (< remaining 0) 0.0 remaining))
|
||||
most-positive-double-float))
|
||||
|
||||
(in-package :passepartout-cost-tests)
|
||||
(defun budget-exhausted-p ()
|
||||
"T if the session budget is set and fully consumed."
|
||||
(and *session-budget* (<= (budget-remaining-usd) 0.0)))
|
||||
|
||||
(def-suite cost-suite :description "Cost tracking and budget management")
|
||||
(in-suite cost-suite)
|
||||
(defun budget-estimate-call (prompt-text)
|
||||
"Estimate the dollar cost of a pending LLM call from its prompt text.
|
||||
Returns 0.0 if the tokenizer is not loaded (allows call through)."
|
||||
(if (fboundp 'count-tokens)
|
||||
(let* ((tokens (funcall (symbol-function 'count-tokens) (or prompt-text "")))
|
||||
(cost (provider-token-cost (first *provider-cascade*) tokens)))
|
||||
cost)
|
||||
0.0))
|
||||
|
||||
(test test-cost-track-call
|
||||
"Contract 1: cost-track-call returns a positive number."
|
||||
(cost-session-reset)
|
||||
(let ((cost (cost-track-call :deepseek "hello world")))
|
||||
(is (numberp cost))
|
||||
(is (> cost 0.0))))
|
||||
|
||||
(test test-cost-session-total-accumulates
|
||||
"Contract 2: session total grows with multiple calls."
|
||||
(cost-session-reset)
|
||||
(cost-track-call :deepseek "hello")
|
||||
(cost-track-call :deepseek "world")
|
||||
(let ((total (cost-session-total)))
|
||||
(is (> total 0.0))
|
||||
(is (= 2 (cost-session-calls)))))
|
||||
|
||||
(test test-cost-session-reset
|
||||
"Contract 3: cost-session-reset zeroes the accumulator."
|
||||
(cost-session-reset)
|
||||
(cost-track-call :deepseek "hello")
|
||||
(is (> (cost-session-total) 0.0))
|
||||
(cost-session-reset)
|
||||
(is (= 0.0 (cost-session-total)))
|
||||
(is (= 0 (cost-session-calls))))
|
||||
|
||||
(test test-cost-format-budget-status
|
||||
"Contract 4: format-budget-status returns a string."
|
||||
(cost-session-reset)
|
||||
(cost-track-call :deepseek "hello world")
|
||||
(let ((status (cost-format-budget-status 100)))
|
||||
(is (stringp status))
|
||||
(is (search "$" status))))
|
||||
|
||||
(test test-cost-by-provider
|
||||
"Contract: cost-by-provider returns per-provider breakdown."
|
||||
(cost-session-reset)
|
||||
(cost-track-call :deepseek "a")
|
||||
(cost-track-call :groq "b")
|
||||
(let ((by (cost-by-provider)))
|
||||
(is (listp by))
|
||||
(is (assoc :deepseek by))
|
||||
(is (assoc :groq by))))
|
||||
|
||||
(test test-cost-track-no-response
|
||||
"Contract 1: cost-track-call works without response-text."
|
||||
(cost-session-reset)
|
||||
(let ((cost (cost-track-call :deepseek "test")))
|
||||
(is (> cost 0.0))))
|
||||
|
||||
(test test-cost-session-summary
|
||||
"Contract 5: cost-session-summary returns plist with total, calls, by-provider."
|
||||
(cost-session-reset)
|
||||
(cost-track-call :deepseek "hello")
|
||||
(cost-track-call :groq "world")
|
||||
(let ((s (cost-session-summary)))
|
||||
(is (> (getf s :total) 0.0))
|
||||
(is (= 2 (getf s :calls)))
|
||||
(let ((by (getf s :by-provider)))
|
||||
(is (assoc :deepseek by))
|
||||
(is (assoc :groq by)))))
|
||||
(defun budget-exhaustion-message ()
|
||||
"Returns a user-facing plist explaining that the budget is spent."
|
||||
(let ((total (cost-session-total))
|
||||
(cap *session-budget*))
|
||||
(list :TYPE :REQUEST
|
||||
:PAYLOAD (list :ACTION :MESSAGE
|
||||
:TEXT (format nil "Session budget exhausted: $~,4f of $~,2f spent. Raise SESSION_BUDGET_USD or reset with /cost-reset to continue."
|
||||
total cap)
|
||||
:EXPLANATION "Budget cap reached. No LLM calls will be made until the limit is raised."))))
|
||||
|
||||
Reference in New Issue
Block a user