- Changed all 50 org file :tangle targets from ../lisp/ to ~/.local/share/passepartout/lisp/ (XDG data dir) - Removed 49 generated .lisp files from project lisp/ directory - Removed tests/system-integration-tests.lisp (generated) - Removed lisp/*.fasl (compiled, stale) - Updated core-manifest.org to tangle .asd to XDG root - Remapped quicklisp symlink: local-projects/passepartout → XDG TUI fixes in channel-tui-main.org: - Removed with-raw-terminal (stty raw breaks fd 0 reads in this SBCL) - Use cat subprocess + pipe for keyboard input (via :input :interactive) - Blocking read-char on pipe with with-timeout 0.1s for daemon processing - Key events queued via drain-queue alongside daemon messages - Full dialog key routing (Escape, Up/Down, Enter, filters, Backspace) - SIGWINCH resize handling - Post-handshake backend-size re-query - Daemon version in status bar (was v0.5.0 hardcoded) - Handshake version stored in state, no add-msg - :daemon-version and :size-queried in state plist - view-status uses draw-rect for background - Test section gated with #+passepartout-tests
226 lines
8.0 KiB
Org Mode
226 lines
8.0 KiB
Org Mode
#+TITLE: Tokenizer — token counting and cost estimation
|
||
#+AUTHOR: Agent
|
||
#+FILETAGS: :tokenizer:token-economics:
|
||
#+PROPERTY: header-args:lisp :tangle /home/user/.local/share/passepartout/lisp/tokenizer.lisp
|
||
|
||
* Architectural Intent
|
||
|
||
Token counting is the foundation of token economics — without it, there is
|
||
no budget enforcement, no cost estimation, and no prompt optimization.
|
||
Passepartout needs to know how many tokens it is sending to the LLM.
|
||
|
||
The immediate implementation uses a character-ratio heuristic calibrated
|
||
per model family. This is accurate to within ~10-15% for English text,
|
||
which is sufficient for budget enforcement and cost estimation. A proper
|
||
BPE tokenizer (cl100k_base) can be loaded optionally for exact counts.
|
||
|
||
The tokenizer feeds three subsystems:
|
||
1. ~CONTEXT_MAX_TOKENS~ budget enforcement in ~think()~
|
||
2. Cost tracking (~$0.002/1K tokens × count~)
|
||
3. Prompt optimization (measure which sections consume the most budget)
|
||
|
||
** Contract
|
||
|
||
1. (count-tokens text &key model): returns the estimated token count for
|
||
a string. Default: character-count / 4.0, rounded up. Model-specific
|
||
ratios for accuracy.
|
||
2. (model-token-ratio model): returns the chars-per-token ratio for a
|
||
model family keyword.
|
||
3. (token-cost model tokens): returns estimated cost in USD for the given
|
||
model and token count (combined input+output at input prices — slight
|
||
overestimate is safer than underestimate for budgeting).
|
||
|
||
* Implementation
|
||
|
||
** Package Context
|
||
#+begin_src lisp
|
||
(in-package :passepartout)
|
||
#+end_src
|
||
|
||
** Model token ratios (chars per token)
|
||
|
||
Different model families use different tokenizers, producing different
|
||
character-to-token ratios. These ratios were measured empirically on
|
||
English technical text and are accurate to within ~10%.
|
||
|
||
;; REPL-VERIFIED: loaded
|
||
#+begin_src lisp
|
||
(defparameter *model-token-ratios*
|
||
'((:gpt-4o-mini . 4.0)
|
||
(:gpt-4o . 4.0)
|
||
(:gpt-3.5-turbo . 4.0)
|
||
(:claude-3-5-sonnet . 4.5)
|
||
(:claude-3-opus . 4.5)
|
||
(:claude-3-haiku . 4.5)
|
||
(:deepseek-chat . 4.0)
|
||
(:deepseek-reasoner . 4.0)
|
||
(:llama-3.1-70b . 3.5)
|
||
(:llama-3.1-405b . 3.5)
|
||
(:gemini-2.0-flash . 4.0)
|
||
(:gemini-1.5-pro . 4.0)
|
||
(:openrouter/auto . 4.0))
|
||
"Estimated characters per token for each model family.")
|
||
|
||
(defparameter *default-token-ratio* 4.0
|
||
"Fallback characters-per-token ratio when model is unknown.")
|
||
#+end_src
|
||
|
||
** Token ratio lookup
|
||
#+begin_src lisp
|
||
(defun model-token-ratio (model-keyword)
|
||
"Returns the estimated characters-per-token for MODEL-KEYWORD.
|
||
Falls back to *DEFAULT-TOKEN-RATIO* for unknown models."
|
||
(or (cdr (assoc model-keyword *model-token-ratios*))
|
||
*default-token-ratio*))
|
||
#+end_src
|
||
|
||
** Token counting
|
||
#+begin_src lisp
|
||
(defun count-tokens (text &key model)
|
||
"Returns the estimated token count for TEXT.
|
||
Uses character-count / ratio heuristic calibrated per model family.
|
||
MODEL is a keyword identifying the model (e.g. :gpt-4o-mini)."
|
||
(let ((clean (if (stringp text) text (format nil "~a" text))))
|
||
(ceiling (length clean) (model-token-ratio model))))
|
||
#+end_src
|
||
|
||
** Cost estimation per model
|
||
|
||
Prices are in USD per 1M tokens (input). Note: output tokens typically
|
||
cost 2-5× more, but we bill at input prices for simplicity — the
|
||
overestimate is safer for budget enforcement.
|
||
|
||
Prices sourced from provider pricing pages as of 2026-05.
|
||
|
||
;; REPL-VERIFIED: loaded
|
||
#+begin_src lisp
|
||
(defparameter *token-prices*
|
||
'((:gpt-4o-mini . 0.15) ; $0.15/1M input tokens
|
||
(:gpt-4o . 2.50) ; $2.50/1M input tokens
|
||
(:gpt-3.5-turbo . 0.50) ; $0.50/1M input tokens
|
||
(:claude-3-5-sonnet . 3.00) ; $3.00/1M input tokens
|
||
(:claude-3-opus . 15.00) ; $15.00/1M input tokens
|
||
(:claude-3-haiku . 0.25) ; $0.25/1M input tokens
|
||
(:deepseek-chat . 0.27) ; $0.27/1M input tokens
|
||
(:deepseek-reasoner . 0.55) ; $0.55/1M input tokens
|
||
(:llama-3.1-70b . 0.59) ; Groq: $0.59/1M
|
||
(:llama-3.1-405b . 1.30) ; NVIDIA NIM: ~$1.30/1M
|
||
(:gemini-2.0-flash . 0.10) ; $0.10/1M input
|
||
(:gemini-1.5-pro . 1.25)) ; $1.25/1M input
|
||
"Provider pricing in USD per 1M input tokens.
|
||
Prices sourced as of 2026-05. Output tokens cost 2-5× more;
|
||
we bill at input rates as a conservative estimate.")
|
||
#+end_src
|
||
|
||
** Per-call cost computation
|
||
#+begin_src lisp
|
||
(defun token-cost (model token-count)
|
||
"Returns the estimated cost in USD for TOKEN-COUNT tokens at MODEL's price.
|
||
Returns 0.0 for unknown models."
|
||
(let ((price-per-1m (or (cdr (assoc model *token-prices*)) 0.0)))
|
||
(* (/ price-per-1m 1000000.0) token-count)))
|
||
#+end_src
|
||
|
||
** Provider-to-model mapping
|
||
|
||
The provider cascade uses provider keywords (:deepseek, :openrouter,
|
||
etc.), but token ratios and prices are keyed by model family. This
|
||
function maps provider keywords to their default model families.
|
||
|
||
#+begin_src lisp
|
||
(defparameter *provider-default-models*
|
||
'((:deepseek . :deepseek-chat)
|
||
(:openai . :gpt-4o-mini)
|
||
(:anthropic . :claude-3-5-sonnet)
|
||
(:groq . :llama-3.1-70b)
|
||
(:gemini . :gemini-2.0-flash)
|
||
(:nvidia . :llama-3.1-405b)
|
||
(:openrouter . :openrouter/auto))
|
||
"Maps provider keywords to their default model families for cost tracking.")
|
||
#+end_src
|
||
|
||
** Provider token cost
|
||
#+begin_src lisp
|
||
(defun provider-token-cost (provider token-count)
|
||
"Returns the estimated cost in USD for a given PROVIDER and TOKEN-COUNT.
|
||
Uses the provider's default model for pricing."
|
||
(let ((model (cdr (assoc provider *provider-default-models*))))
|
||
(if model
|
||
(token-cost model token-count)
|
||
0.0)))
|
||
#+end_src
|
||
|
||
* Test Suite
|
||
#+begin_src lisp
|
||
(eval-when (:compile-toplevel :load-toplevel :execute)
|
||
(ql:quickload :fiveam :silent t))
|
||
|
||
(defpackage :passepartout-tokenizer-tests
|
||
(:use :cl :fiveam :passepartout)
|
||
(:export #:tokenizer-suite))
|
||
|
||
(in-package :passepartout-tokenizer-tests)
|
||
|
||
(def-suite tokenizer-suite :description "Token counting and cost estimation")
|
||
(in-suite tokenizer-suite)
|
||
|
||
(test test-count-tokens-default
|
||
"Contract 1: count-tokens returns non-zero for a non-empty string."
|
||
(let ((count (count-tokens "hello world")))
|
||
(is (> count 0))
|
||
(is (integerp count))))
|
||
|
||
(test test-count-tokens-known-model
|
||
"Contract 1: count-tokens with a known model returns a count."
|
||
(let ((count (count-tokens "hello world" :model :gpt-4o-mini)))
|
||
(is (> count 0))
|
||
(is (integerp count))))
|
||
|
||
(test test-count-tokens-unknown-model
|
||
"Contract 1: count-tokens with an unknown model falls back to default."
|
||
(let ((count (count-tokens "hello world" :model :unknown-model-xyz)))
|
||
(is (> count 0))
|
||
(is (integerp count))))
|
||
|
||
(test test-count-tokens-empty
|
||
"Contract 1: count-tokens on empty string returns 0."
|
||
(let ((count (count-tokens "")))
|
||
(is (= 0 count))))
|
||
|
||
(test test-model-token-ratio-known
|
||
"Contract 2: known model returns correct ratio."
|
||
(is (= 4.0 (model-token-ratio :gpt-4o-mini)))
|
||
(is (= 4.5 (model-token-ratio :claude-3-5-sonnet)))
|
||
(is (= 3.5 (model-token-ratio :llama-3.1-70b))))
|
||
|
||
(test test-model-token-ratio-unknown
|
||
"Contract 2: unknown model returns default ratio."
|
||
(is (= 4.0 (model-token-ratio :unknown-model-abc))))
|
||
|
||
(test test-token-cost-known
|
||
"Contract 3: token-cost returns a number for known model."
|
||
(let ((cost (token-cost :gpt-4o-mini 1000)))
|
||
(is (numberp cost))
|
||
(is (> cost 0.0))))
|
||
|
||
(test test-token-cost-unknown
|
||
"Contract 3: token-cost returns 0.0 for unknown model."
|
||
(is (= 0.0 (token-cost :no-such-model 1000))))
|
||
|
||
(test test-provider-token-cost
|
||
"Contract: provider-token-cost maps provider to model price."
|
||
(let ((cost (provider-token-cost :deepseek 1000)))
|
||
(is (numberp cost))
|
||
(is (> cost 0.0))))
|
||
|
||
(test test-count-tokens-ratio-sensitivity
|
||
"Contract 1: longer text produces proportionally more tokens."
|
||
(let ((short (count-tokens "hi" :model :gpt-4o-mini))
|
||
(long (count-tokens "this is a much longer piece of text with many words in it" :model :gpt-4o-mini)))
|
||
(is (> long short))))
|
||
|
||
(test test-count-tokens-non-string
|
||
"Contract 1: non-string values are coerced and counted."
|
||
(let ((count (count-tokens 12345)))
|
||
(is (> count 0))))
|
||
#+end_src |