passepartout/lisp/tokenizer.lisp

(eval-when (:compile-toplevel :load-toplevel :execute)
  (ql:quickload :fiveam :silent t))

(defpackage :passepartout-tokenizer-tests
  (:use :cl :fiveam :passepartout)
  (:export #:tokenizer-suite))

(in-package :passepartout-tokenizer-tests)

(def-suite tokenizer-suite :description "Token counting and cost estimation")
(in-suite tokenizer-suite)

(test test-count-tokens-default
  "Contract 1: count-tokens returns non-zero for a non-empty string."
  (let ((count (count-tokens "hello world")))
    (is (> count 0))
    (is (integerp count))))

(test test-count-tokens-known-model
  "Contract 1: count-tokens with a known model returns a count."
  (let ((count (count-tokens "hello world" :model :gpt-4o-mini)))
    (is (> count 0))
    (is (integerp count))))

(test test-count-tokens-unknown-model
  "Contract 1: count-tokens with an unknown model falls back to default."
  (let ((count (count-tokens "hello world" :model :unknown-model-xyz)))
    (is (> count 0))
    (is (integerp count))))

(test test-count-tokens-empty
  "Contract 1: count-tokens on empty string returns 0."
  (let ((count (count-tokens "")))
    (is (= 0 count))))

(test test-model-token-ratio-known
  "Contract 2: known model returns correct ratio."
  (is (= 4.0 (model-token-ratio :gpt-4o-mini)))
  (is (= 4.5 (model-token-ratio :claude-3-5-sonnet)))
  (is (= 3.5 (model-token-ratio :llama-3.1-70b))))

(test test-model-token-ratio-unknown
  "Contract 2: unknown model returns default ratio."
  (is (= 4.0 (model-token-ratio :unknown-model-abc))))

(test test-token-cost-known
  "Contract 3: token-cost returns a number for known model."
  (let ((cost (token-cost :gpt-4o-mini 1000)))
    (is (numberp cost))
    (is (> cost 0.0))))

(test test-token-cost-unknown
  "Contract 3: token-cost returns 0.0 for unknown model."
  (is (= 0.0 (token-cost :no-such-model 1000))))

(test test-provider-token-cost
  "Contract: provider-token-cost maps provider to model price."
  (let ((cost (provider-token-cost :deepseek 1000)))
    (is (numberp cost))
    (is (> cost 0.0))))

(test test-count-tokens-ratio-sensitivity
  "Contract 1: longer text produces proportionally more tokens."
  (let ((short (count-tokens "hi" :model :gpt-4o-mini))
        (long  (count-tokens "this is a much longer piece of text with many words in it" :model :gpt-4o-mini)))
    (is (> long short))))

(test test-count-tokens-non-string
  "Contract 1: non-string values are coerced and counted."
  (let ((count (count-tokens 12345)))
    (is (> count 0))))

(in-package :passepartout)

(defparameter *model-token-ratios*
  '((:gpt-4o-mini        . 4.0)
    (:gpt-4o             . 4.0)
    (:gpt-3.5-turbo      . 4.0)
    (:claude-3-5-sonnet  . 4.5)
    (:claude-3-opus      . 4.5)
    (:claude-3-haiku     . 4.5)
    (:deepseek-chat      . 4.0)
    (:deepseek-reasoner  . 4.0)
    (:llama-3.1-70b      . 3.5)
    (:llama-3.1-405b     . 3.5)
    (:gemini-2.0-flash   . 4.0)
    (:gemini-1.5-pro     . 4.0)
    (:openrouter/auto    . 4.0))
  "Estimated characters per token for each model family.")

(defparameter *default-token-ratio* 4.0
  "Fallback characters-per-token ratio when model is unknown.")

(defun model-token-ratio (model-keyword)
  "Returns the estimated characters-per-token for MODEL-KEYWORD.
Falls back to *DEFAULT-TOKEN-RATIO* for unknown models."
  (or (cdr (assoc model-keyword *model-token-ratios*))
      *default-token-ratio*))

(defun count-tokens (text &key model)
  "Returns the estimated token count for TEXT.
Uses character-count / ratio heuristic calibrated per model family.
MODEL is a keyword identifying the model (e.g. :gpt-4o-mini)."
  (let ((clean (if (stringp text) text (format nil "~a" text))))
    (ceiling (length clean) (model-token-ratio model))))

(defparameter *token-prices*
  '((:gpt-4o-mini        . 0.15)     ; $0.15/1M input tokens
    (:gpt-4o             . 2.50)     ; $2.50/1M input tokens
    (:gpt-3.5-turbo      . 0.50)     ; $0.50/1M input tokens
    (:claude-3-5-sonnet  . 3.00)     ; $3.00/1M input tokens
    (:claude-3-opus      . 15.00)    ; $15.00/1M input tokens
    (:claude-3-haiku     . 0.25)     ; $0.25/1M input tokens
    (:deepseek-chat      . 0.27)     ; $0.27/1M input tokens
    (:deepseek-reasoner  . 0.55)     ; $0.55/1M input tokens
    (:llama-3.1-70b      . 0.59)     ; Groq: $0.59/1M
    (:llama-3.1-405b     . 1.30)     ; NVIDIA NIM: ~$1.30/1M
    (:gemini-2.0-flash   . 0.10)     ; $0.10/1M input
    (:gemini-1.5-pro     . 1.25))    ; $1.25/1M input
  "Provider pricing in USD per 1M input tokens.
Prices sourced as of 2026-05. Output tokens cost 2-5× more;
we bill at input rates as a conservative estimate.")

(defun token-cost (model token-count)
  "Returns the estimated cost in USD for TOKEN-COUNT tokens at MODEL's price.
Returns 0.0 for unknown models."
  (let ((price-per-1m (or (cdr (assoc model *token-prices*)) 0.0)))
    (* (/ price-per-1m 1000000.0) token-count)))

(defparameter *provider-default-models*
  '((:deepseek   . :deepseek-chat)
    (:openai     . :gpt-4o-mini)
    (:anthropic  . :claude-3-5-sonnet)
    (:groq       . :llama-3.1-70b)
    (:gemini     . :gemini-2.0-flash)
    (:nvidia     . :llama-3.1-405b)
    (:openrouter . :openrouter/auto))
  "Maps provider keywords to their default model families for cost tracking.")

(defun provider-token-cost (provider token-count)
  "Returns the estimated cost in USD for a given PROVIDER and TOKEN-COUNT.
Uses the provider's default model for pricing."
  (let ((model (cdr (assoc provider *provider-default-models*))))
    (if model
        (token-cost model token-count)
        0.0)))