#+TITLE: Token Economics — caching, budget, and cost wiring #+AUTHOR: Agent #+FILETAGS: :token-economics:budget:caching: #+PROPERTY: header-args:lisp :tangle ../lisp/token-economics.lisp * Architectural Intent Token economics transforms the architecture's theoretical cost advantage into operational reality. Three subsystems work together: 1. ~Prompt prefix caching~ — the IDENTITY+TOOLS portion of the system prompt is static across calls (changes only on skill load or identity config). Hashing and caching it avoids retransmitting ~500-1500 tokens per call. 2. ~Incremental context assembly~ — the CONTEXT section is only regenerated when the foveal focus, scope, or memory state changes. Heartbeat ticks and tool-output feedback produce no context change, so assembly is skipped, saving ~200-800 tokens per heartbeat. 3. ~Token budget enforcement~ — when the total prompt exceeds ~CONTEXT_MAX_TOKENS~ (default 16384), the system progressively trims less-essential sections (logs first, then standing mandates, then peripheral context). These functions are called from ~think()~ via ~fboundp~ guards, keeping core-reason thin while enabling token economics as a hot-loadable skill. Depends on: tokenizer.lisp, cost-tracker.lisp ** Contract 1. (prompt-prefix-cached assistant-name feedback mandates-text tool-belt): Build the IDENTITY+TOOLS system prompt prefix. Uses ~sxhash~ on the inputs to detect changes. Returns the cached string when unchanged. 2. (context-assemble-cached context sensor): Incrementally assemble awareness context. Skips assembly entirely for ~:heartbeat~ / ~:delegation~ sensors. Returns cached context when foveal-id, scope, and memory timestamp are unchanged. Falls back to ~[Awareness skill not loaded]~ when ~context-assemble-global-awareness~ is not ~fboundp~. 3. (enforce-token-budget prefix ctxt logs user prompt mandates &optional max): Enforce per-call token budget via progressive trimming: L1: truncate logs to last 5 lines L2: drop standing mandates L3: downgrade context to single-line summary Returns (values trimmed-prefix trimmed-ctxt trimmed-logs trimmed-user trimmed-mandates). 4. (token-economics-initialize): zeroes the cache state at daemon boot. * Implementation ** Package context #+begin_src lisp (in-package :passepartout) #+end_src ** Cache state #+begin_src lisp (defvar *prompt-prefix-cache* (cons nil "") "Prompt prefix cache: (sxhash . cached-string). Rebuilt when IDENTITY or TOOLS change.") (defvar *context-cache* (list :foveal-id nil :scope nil :memory-timestamp 0 :rendered "") "Context assembly cache: metadata + last rendered context string.") #+end_src ** Contract 1: prompt prefix caching #+begin_src lisp (defun prompt-prefix-cached (assistant-name feedback mandates-text tool-belt) "Build the static IDENTITY+TOOLS system prompt prefix. Uses sxhash on inputs to detect changes; returns cached string on cache hit." (let* ((hash-key (sxhash (list assistant-name feedback mandates-text tool-belt))) (cached-hash (car *prompt-prefix-cache*)) (cached-str (cdr *prompt-prefix-cache*))) (if (and cached-str (> (length cached-str) 0) (= hash-key cached-hash)) cached-str (let ((new-prefix (format nil "IDENTITY: ~a~a~a~%~%TOOLS:~%~a" assistant-name feedback (if (and mandates-text (> (length mandates-text) 0)) (concatenate 'string (string #\Newline) mandates-text) "") tool-belt))) (setf (car *prompt-prefix-cache*) hash-key (cdr *prompt-prefix-cache*) new-prefix) new-prefix)))) #+end_src ** Contract 2: incremental context assembly #+begin_src lisp (defun context-assemble-cached (context sensor) "Incrementally assemble awareness context. Skips assembly for heartbeat/delegation sensors. Uses cache when foveal, scope, and memory timestamp are unchanged." (when (member sensor '(:heartbeat :delegation)) (return-from context-assemble-cached nil)) (unless (fboundp 'context-assemble-global-awareness) (return-from context-assemble-cached "[Awareness skill not loaded]")) (let* ((foveal-id (getf context :foveal-focus)) (scope (if (and (boundp '*scope-resolver*) *scope-resolver*) (funcall *scope-resolver*) nil)) (mem-ts (hash-table-count *memory-store*)) (cache-foveal (getf *context-cache* :foveal-id)) (cache-scope (getf *context-cache* :scope)) (cache-ts (getf *context-cache* :memory-timestamp)) (cache-rendered (getf *context-cache* :rendered))) (if (and (equal foveal-id cache-foveal) (eq scope cache-scope) (= mem-ts cache-ts) cache-rendered (> (length cache-rendered) 0)) cache-rendered (let ((rendered (funcall (symbol-function 'context-assemble-global-awareness)))) (setf (getf *context-cache* :foveal-id) foveal-id (getf *context-cache* :scope) scope (getf *context-cache* :memory-timestamp) mem-ts (getf *context-cache* :rendered) rendered) rendered)))) #+end_src ** Contract 3: token budget enforcement #+begin_src lisp (defun enforce-token-budget (prefix context-text logs-text user-prompt mandates-text &optional (max-tokens nil)) "Enforce per-call token budget via progressive trimming. Returns (values prefix context-text logs-text user-prompt mandates-text) with trimmed sections." (let ((max (or max-tokens (ignore-errors (parse-integer (uiop:getenv "CONTEXT_MAX_TOKENS"))) 16384))) (labels ((ct (s) (funcall (symbol-function 'count-tokens) s)) (total-tokens (p c l u m) (+ (ct p) (if c (ct c) 0) (ct l) (ct u) (if m (ct m) 0)))) (let ((total (total-tokens prefix context-text logs-text user-prompt mandates-text))) (when (> total max) (log-message "TOKEN BUDGET: ~d tokens exceeds max ~d, trimming..." total max) ;; L1: truncate logs to last 5 lines (let* ((log-lines (uiop:split-string logs-text :separator '(#\Newline))) (trimmed (if (> (length log-lines) 5) (format nil "~{~a~^~%~}" (last log-lines 5)) logs-text))) (setf total (total-tokens prefix context-text trimmed user-prompt mandates-text) logs-text trimmed) (when (> total max) ;; L2: drop standing mandates (setf total (total-tokens prefix context-text logs-text user-prompt nil) mandates-text nil) (when (> total max) ;; L3: downgrade context to summary (let ((ctxt-lines (uiop:split-string (or context-text "") :separator '(#\Newline)))) (setf context-text (format nil "[Context trimmed: ~d items]" (length ctxt-lines))))))))) (values prefix context-text logs-text user-prompt mandates-text)))) #+end_src ** Contract 4: initialization #+begin_src lisp (defun token-economics-initialize () "Zero cache state at daemon boot." (setf (car *prompt-prefix-cache*) nil (cdr *prompt-prefix-cache*) "" (getf *context-cache* :foveal-id) nil (getf *context-cache* :scope) nil (getf *context-cache* :memory-timestamp) 0 (getf *context-cache* :rendered) "")) #+end_src * Test Suite #+begin_src lisp (eval-when (:compile-toplevel :load-toplevel :execute) (ql:quickload :fiveam :silent t)) (defpackage :passepartout-token-economics-tests (:use :cl :fiveam :passepartout) (:export #:token-economics-suite)) (in-package :passepartout-token-economics-tests) (def-suite token-economics-suite :description "Prompt prefix caching, incremental context, token budget") (in-suite token-economics-suite) (test test-prompt-prefix-cached-builds "Contract 1: prompt-prefix-cached returns a string containing IDENTITY." (setf (car passepartout::*prompt-prefix-cache*) nil (cdr passepartout::*prompt-prefix-cache*) "") (let ((prefix (passepartout::prompt-prefix-cached "Agent" "" nil "No tools"))) (is (stringp prefix)) (is (search "IDENTITY" prefix)) (is (search "TOOLS" prefix)))) (test test-prompt-prefix-cached-hits "Contract 1: second call with same inputs returns cached result." (setf (car passepartout::*prompt-prefix-cache*) nil (cdr passepartout::*prompt-prefix-cache*) "") (let ((p1 (passepartout::prompt-prefix-cached "Agent" "" nil "No tools")) (p2 (passepartout::prompt-prefix-cached "Agent" "" nil "No tools"))) (is (string= p1 p2)))) (test test-prompt-prefix-cached-miss "Contract 1: different inputs rebuild the cache." (setf (car passepartout::*prompt-prefix-cache*) nil (cdr passepartout::*prompt-prefix-cache*) "") (let ((p1 (passepartout::prompt-prefix-cached "Agent" "" nil "No tools")) (p2 (passepartout::prompt-prefix-cached "Bot" "" nil "No tools"))) (is (not (string= p1 p2))) (is (search "Bot" p2)))) (test test-context-assemble-cached-skips-heartbeat "Contract 2: heartbeat sensors skip context assembly, return nil." (let ((result (passepartout::context-assemble-cached '(:foveal-focus "id1") :heartbeat))) (is (null result)))) (test test-context-assemble-cached-skips-delegation "Contract 2: delegation sensors also skip assembly." (let ((result (passepartout::context-assemble-cached '(:foveal-focus "id1") :delegation))) (is (null result)))) (test test-context-assemble-cached-non-skip "Contract 2: user-input sensors attempt assembly (fails gracefully without awareness)." (let ((result (passepartout::context-assemble-cached '(:foveal-focus "id1") :user-input))) (is (stringp result)) (is (> (length result) 0)))) (test test-enforce-token-budget-passthrough "Contract 3: under-budget prompts pass through unchanged." (multiple-value-bind (p c l u m) (passepartout::enforce-token-budget "hi" "ctxt" "log" "user" nil 100000) (is (string= "hi" p)) (is (string= "ctxt" c)) (is (string= "log" l)) (is (string= "user" u)) (is (null m)))) (test test-enforce-token-budget-trims "Contract 3: over-budget prompts get trimmed." (let ((big-prefix (make-string 20000 :initial-element #\x))) (multiple-value-bind (p c l u m) (passepartout::enforce-token-budget big-prefix "ctxt" "logs\nlogs\nlogs\nlogs\nlogs\nlogs\nlogs" "user" nil 10) (declare (ignore p l u m)) ;; The prefix itself exceeds the tiny 10-token budget, so everything gets trimmed (is (or (stringp c) (null c))) (is (search "[Context trimmed" (or c "")))))) (test test-token-economics-initialize "Contract 4: initialize zeroes all cache state." (setf (car passepartout::*prompt-prefix-cache*) 12345 (cdr passepartout::*prompt-prefix-cache*) "stale") (setf (getf passepartout::*context-cache* :rendered) "stale context") (passepartout::token-economics-initialize) (is (null (car passepartout::*prompt-prefix-cache*))) (is (string= "" (cdr passepartout::*prompt-prefix-cache*))) (is (string= "" (getf passepartout::*context-cache* :rendered)))) #+end_src