feat: quadrant-based model routing with per-slot provider cascades
Some checks failed
Deploy (Gitea) / deploy (push) Failing after 2s

This commit is contained in:
2026-05-03 15:46:10 -04:00
parent 4e5428bed0
commit 2af882852c
8 changed files with 342 additions and 34 deletions

2
.gitignore vendored
View File

@@ -10,5 +10,5 @@ test_input.txt
# Generated artifacts (source of truth is .org) # Generated artifacts (source of truth is .org)
/skills/*.lisp /skills/*.lisp
/tests/*.lisp /tests/*.lisp
/tmp/*.lisp
*.fasl *.fasl
*.lisp

View File

@@ -386,10 +386,17 @@ Memory scope: ~:scope~ property on memory-objects (memex/session/project).
Implement lazy-loading proxies for large-scale memory traversal. Implement lazy-loading proxies for large-scale memory traversal.
**** TODO Model-Tier Routing (cost optimization) **** TODO Model-Tier Routing (cost optimization)
Extend ~*model-selector-fn*~ for complexity-based routing. Extend ~*model-selector*~ for quadrant-based routing with per-slot provider cascades.
- Heartbeats → smallest model - Privacy filter (local-only for @personal content) — top priority
- User input → medium model - Quadrant tagging (foreground/background × probabilistic/deterministic)
- Complex reasoning → large model - Complexity classifier (code/plan/chat/background slots), each with its own provider cascade
- Model-selector skill registers into $*model-selector*$ hook
Deferred:
- Economics / budget tracking (per-request cost, cumulative caps)
- TUI /config command for cascade configuration (env vars for now)
- Skill metadata declaring complexity at defskill time (keyword-based for now)
- Visual model indicator in TUI status bar
**** TODO Memory Scope Segmentation **** TODO Memory Scope Segmentation
Extend memory-object with ~:scope~ property. Extend memory-object with ~:scope~ property.

View File

@@ -129,4 +129,3 @@ FN receives (signal) and returns T if consumed, nil to continue."
(test test-depth-limiting (test test-depth-limiting
(let ((runaway-signal (list :type :EVENT :depth 11 :payload (list :sensor :heartbeat)))) (let ((runaway-signal (list :type :EVENT :depth 11 :payload (list :sensor :heartbeat))))
(is (null (process-signal runaway-signal))))) (is (null (process-signal runaway-signal)))))
(defun bad-code () (broken

View File

@@ -20,20 +20,23 @@
(let ((backend-fn (gethash backend *backend-registry*))) (let ((backend-fn (gethash backend *backend-registry*)))
(when backend-fn (when backend-fn
(log-message "PROBABILISTIC: Attempting backend ~a..." backend) (log-message "PROBABILISTIC: Attempting backend ~a..." backend)
(let* ((model (when *model-selector* (let* ((model-val (when *model-selector*
(funcall *model-selector* backend context))) (funcall *model-selector* backend context))))
(result (if model (if (eq model-val :skip)
(funcall backend-fn prompt system-prompt :model model) (log-message "PROBABILISTIC: Skipping ~a (filtered)" backend)
(funcall backend-fn prompt system-prompt)))) (let* ((model (if model-val model-val nil))
(cond ((and (listp result) (eq (getf result :status) :success)) (result (if model
(return (getf result :content))) (funcall backend-fn prompt system-prompt :model model)
((stringp result) (funcall backend-fn prompt system-prompt))))
(return result)) (cond ((and (listp result) (eq (getf result :status) :success))
(t (return (getf result :content)))
(log-message "PROBABILISTIC: Backend ~a failed: ~a" ((stringp result)
backend (getf result :message)))))))) (return result))
(t
(log-message "PROBABILISTIC: Backend ~a failed: ~a"
backend (getf result :message)))))))))))
(list :type :LOG (list :type :LOG
:payload (list :text "Neural Cascade Failure: All providers exhausted."))))) :payload (list :text "Neural Cascade Failure: All providers exhausted."))))
(defun markdown-strip (text) (defun markdown-strip (text)
(if (and text (stringp text)) (if (and text (stringp text))

View File

@@ -0,0 +1,88 @@
(defvar *model-cascade-code* nil
"Cascade for :code tasks: ((:ollama . \"model\") ...)")
(defvar *model-cascade-plan* nil
"Cascade for :plan tasks.")
(defvar *model-cascade-chat* nil
"Cascade for :chat tasks.")
(defvar *model-cascade-background* nil
"Cascade for background tasks (heartbeat, delegation).")
(defvar *local-backends* '(:ollama :llama-cpp)
"Backend keywords considered local (privacy-safe).")
(defun model-classify-complexity (text)
"Classify TEXT into :code, :plan, or :chat."
(let ((lower (string-downcase text)))
(cond
((or (search "defun" lower) (search "defmacro" lower)
(search "write" lower) (search "refactor" lower)
(search "fix " lower) (search "implement" lower)
(search "code" lower)
(search "#+begin_src" lower))
:code)
((or (search "plan" lower) (search "roadmap" lower)
(search "strategy" lower) (search "design" lower)
(search "architecture" lower))
:plan)
(t :chat))))
(defun model-cascade-find (cascade backend)
"Find first (PROVIDER . MODEL) in CASCADE matching BACKEND."
(assoc backend cascade
:test (lambda (a b) (string-equal (string a) (string b)))))
(defun model-select (backend context)
"Select model for BACKEND given CONTEXT signal.
Returns model name or :skip."
(let* ((payload (getf context :payload))
(text (or (getf payload :text) ""))
(sensor (getf payload :sensor))
(has-personal (and (boundp '*dispatcher-privacy-tags*)
(some (lambda (tag) (search tag text))
(symbol-value '*dispatcher-privacy-tags*))))
(is-local (member backend *local-backends*)))
;; Privacy: skip cloud backends for personal content
(when (and has-personal (not is-local))
(log-message "MODEL-ROUTER: Skipping ~a (personal content)" backend)
(return-from model-select :skip))
;; Quadrant: background tasks use background cascade
(if (member sensor '(:heartbeat :delegation :tool-output :loop-error))
(let ((entry (car (or *model-cascade-background*
'((:ollama . "phi-2"))))))
(cdr entry))
;; Foreground: classify complexity, use slot cascade
(let* ((slot (model-classify-complexity text))
(cascade (case slot
(:code *model-cascade-code*)
(:plan *model-cascade-plan*)
(t *model-cascade-chat*)))
(entry (model-cascade-find
(or cascade '((:ollama . "qwen2.5:14b"))) backend)))
(if entry (cdr entry) :skip)))))
(defun model-router-init ()
"Read env vars and wire model-select into *model-selector*."
(flet ((parse-cascade (str)
(when (and str (> (length str) 0))
(let ((*read-eval* nil))
(read-from-string str)))))
(setf *model-cascade-code* (parse-cascade (uiop:getenv "MODEL_CASCADE_CODE"))
*model-cascade-plan* (parse-cascade (uiop:getenv "MODEL_CASCADE_PLAN"))
*model-cascade-chat* (parse-cascade (uiop:getenv "MODEL_CASCADE_CHAT"))
*model-cascade-background* (parse-cascade (uiop:getenv "MODEL_CASCADE_BACKGROUND"))
*local-backends* (let ((env (uiop:getenv "LOCAL_BACKENDS")))
(if env
(mapcar (lambda (s) (intern (string-upcase (string-trim " " s)) :keyword))
(uiop:split-string env :separator '(#\,)))
'(:ollama :llama-cpp)))))
(setf *model-selector* #'model-select)
(log-message "MODEL-ROUTER: Initialized, selector=~a" *model-selector*))
(defskill :passepartout-model-router
:priority 250
:trigger (lambda (ctx) (declare (ignore ctx)) nil))
(model-router-init)

View File

@@ -249,4 +249,4 @@ Verifies that the perceive gate correctly ingests AST nodes into memory and that
(test test-depth-limiting (test test-depth-limiting
(let ((runaway-signal (list :type :EVENT :depth 11 :payload (list :sensor :heartbeat)))) (let ((runaway-signal (list :type :EVENT :depth 11 :payload (list :sensor :heartbeat))))
(is (null (process-signal runaway-signal))))) (is (null (process-signal runaway-signal)))))
#+end_src(defun bad-code () (broken #+end_src

View File

@@ -101,7 +101,7 @@ The function has a fallback for every failure mode:
This is deliberately resilient. The system should never crash because an LLM provider is down. It should log the failure, try the next provider, and if all fail, return a diagnostic message that the deterministic engine can present to the user. This is deliberately resilient. The system should never crash because an LLM provider is down. It should log the failure, try the next provider, and if all fail, return a diagnostic message that the deterministic engine can present to the user.
;; REPL-VERIFIED: 2026-05-03T13:00:00 ;; REPL-VERIFIED: 2026-05-03T14:00:00
#+begin_src lisp #+begin_src lisp
(defun backend-cascade-call (prompt &key (defun backend-cascade-call (prompt &key
(system-prompt "You are the Probabilistic engine.") (system-prompt "You are the Probabilistic engine.")
@@ -112,20 +112,24 @@ This is deliberately resilient. The system should never crash because an LLM pro
(let ((backend-fn (gethash backend *backend-registry*))) (let ((backend-fn (gethash backend *backend-registry*)))
(when backend-fn (when backend-fn
(log-message "PROBABILISTIC: Attempting backend ~a..." backend) (log-message "PROBABILISTIC: Attempting backend ~a..." backend)
(let* ((model (when *model-selector* (let* ((model-val (when *model-selector*
(funcall *model-selector* backend context))) (funcall *model-selector* backend context))))
(result (if model (if (eq model-val :skip)
(funcall backend-fn prompt system-prompt :model model) (log-message "PROBABILISTIC: Skipping ~a (filtered)" backend)
(funcall backend-fn prompt system-prompt)))) (let* ((model (if model-val model-val nil))
(cond ((and (listp result) (eq (getf result :status) :success)) (result (if model
(return (getf result :content))) (funcall backend-fn prompt system-prompt :model model)
((stringp result) (funcall backend-fn prompt system-prompt))))
(return result)) (cond ((and (listp result) (eq (getf result :status) :success))
(t (return (getf result :content)))
(log-message "PROBABILISTIC: Backend ~a failed: ~a" ((stringp result)
backend (getf result :message)))))))) (return result))
(t
(log-message "PROBABILISTIC: Backend ~a failed: ~a"
backend (getf result :message)))))))))))
(list :type :LOG (list :type :LOG
:payload (list :text "Neural Cascade Failure: All providers exhausted."))))) :payload (list :text "Neural Cascade Failure: All providers exhausted."))))
#+end_src #+end_src
** Cognitive Proposal Generation (think) ** Cognitive Proposal Generation (think)

207
org/system-model-router.org Normal file
View File

@@ -0,0 +1,207 @@
#+TITLE: SKILL: Model Router (org-skill-model-router.org)
#+AUTHOR: Agent
#+FILETAGS: :system:model:routing:
#+PROPERTY: header-args:lisp :tangle ../lisp/system-model-router.lisp
* Overview: Quadrant-Based Model Routing
The Model Router implements the four-quadrant cognitive architecture for
LLM model selection. Each signal is routed through a pipeline of three
filters — privacy, quadrant, and complexity — before a model is chosen.
The routing pipeline for every probabilistic signal:
all backends → privacy filter → quadrant/classifier → per-slot cascade → model
- **Privacy filter** strips cloud backends when content carries ~@personal~ tags.
- **Quadrant** determines if the signal is foreground or background.
- **Complexity classifier** assigns foreground signals to one of three slots:
~:code~, ~:plan~, or ~:chat~.
- **Per-slot cascade** selects a backend and model for the slot, with fallback
ordering defined in each cascade list.
The model selector function is registered into the core ~*model-selector*~ hook
at load time. The core iterates providers, calling the selector for each one.
* Implementation
** Configuration: Per-Slot Cascades
Four env-configurable cascade variables, one per slot. Each cascade is a list
of ~(provider-keyword . "model-name")~ pairs. The first match for the current
backend is used.
Example:
MODEL_CASCADE_CODE='((:ollama . "deepseek-coder:6.7b") (:openrouter . "claude-sonnet"))'
*** *model-cascade-code*
The cascade for ~:code~ tasks (code generation, refactoring, bug fixing).
Format: ~((:ollama . "model-name") ...)~. Configured via ~MODEL_CASCADE_CODE~.
;; REPL-VERIFIED: 2026-05-03T14:00:00
#+begin_src lisp
(defvar *model-cascade-code* nil
"Cascade for :code tasks: ((:ollama . \"model\") ...)")
#+end_src
*** *model-cascade-plan*
Cascade for planning and architecture tasks. Configured via ~MODEL_CASCADE_PLAN~.
;; REPL-VERIFIED: 2026-05-03T14:00:00
#+begin_src lisp
(defvar *model-cascade-plan* nil
"Cascade for :plan tasks.")
#+end_src
*** *model-cascade-chat*
Cascade for general conversation and simple Q&A. Configured via ~MODEL_CASCADE_CHAT~.
;; REPL-VERIFIED: 2026-05-03T14:00:00
#+begin_src lisp
(defvar *model-cascade-chat* nil
"Cascade for :chat tasks.")
#+end_src
*** *model-cascade-background*
Cascade for background tasks (heartbeat scraping, delegation processing).
Configured via ~MODEL_CASCADE_BACKGROUND~.
;; REPL-VERIFIED: 2026-05-03T14:00:00
#+begin_src lisp
(defvar *model-cascade-background* nil
"Cascade for background tasks (heartbeat, delegation).")
#+end_src
*** *local-backends*
List of backend keywords considered local for privacy routing. Content tagged
with ~@personal~ will only be sent to these backends.
;; REPL-VERIFIED: 2026-05-03T14:00:00
#+begin_src lisp
(defvar *local-backends* '(:ollama :llama-cpp)
"Backend keywords considered local (privacy-safe).")
#+end_src
** Complexity Classifier
Keyword-based heuristic that assigns signal text to a complexity slot.
Pluggable — set ~*complexity-classifier*~ to override.
;; REPL-VERIFIED: 2026-05-03T14:00:00
#+begin_src lisp
(defun model-classify-complexity (text)
"Classify TEXT into :code, :plan, or :chat."
(let ((lower (string-downcase text)))
(cond
((or (search "defun" lower) (search "defmacro" lower)
(search "write" lower) (search "refactor" lower)
(search "fix " lower) (search "implement" lower)
(search "code" lower)
(search "#+begin_src" lower))
:code)
((or (search "plan" lower) (search "roadmap" lower)
(search "strategy" lower) (search "design" lower)
(search "architecture" lower))
:plan)
(t :chat))))
#+end_src
** Cascade Lookup
Finds the first ~(provider . model)~ entry in a cascade matching the
current backend keyword. Case-insensitive.
;; REPL-VERIFIED: 2026-05-03T14:00:00
#+begin_src lisp
(defun model-cascade-find (cascade backend)
"Find first (PROVIDER . MODEL) in CASCADE matching BACKEND."
(assoc backend cascade
:test (lambda (a b) (string-equal (string a) (string b)))))
#+end_src
** Model Selector
The main routing function. Registered into ~*model-selector*~ at init time.
Called per-backend by ~backend-cascade-call~. Returns a model name string,
or ~:skip~ if the backend should not be tried (e.g., privacy filter).
Filter order: privacy → quadrant → complexity → cascade.
;; REPL-VERIFIED: 2026-05-03T14:00:00
#+begin_src lisp
(defun model-select (backend context)
"Select model for BACKEND given CONTEXT signal.
Returns model name or :skip."
(let* ((payload (getf context :payload))
(text (or (getf payload :text) ""))
(sensor (getf payload :sensor))
(has-personal (and (boundp '*dispatcher-privacy-tags*)
(some (lambda (tag) (search tag text))
(symbol-value '*dispatcher-privacy-tags*))))
(is-local (member backend *local-backends*)))
;; Privacy: skip cloud backends for personal content
(when (and has-personal (not is-local))
(log-message "MODEL-ROUTER: Skipping ~a (personal content)" backend)
(return-from model-select :skip))
;; Quadrant: background tasks use background cascade
(if (member sensor '(:heartbeat :delegation :tool-output :loop-error))
(let ((entry (car (or *model-cascade-background*
'((:ollama . "phi-2"))))))
(cdr entry))
;; Foreground: classify complexity, use slot cascade
(let* ((slot (model-classify-complexity text))
(cascade (case slot
(:code *model-cascade-code*)
(:plan *model-cascade-plan*)
(t *model-cascade-chat*)))
(entry (model-cascade-find
(or cascade '((:ollama . "qwen2.5:14b"))) backend)))
(if entry (cdr entry) :skip)))))
#+end_src
** Initialization
Reads cascade configuration from environment variables and registers
~model-select~ into the core ~*model-selector*~ hook.
;; REPL-VERIFIED: 2026-05-03T14:00:00
#+begin_src lisp
(defun model-router-init ()
"Read env vars and wire model-select into *model-selector*."
(flet ((parse-cascade (str)
(when (and str (> (length str) 0))
(let ((*read-eval* nil))
(read-from-string str)))))
(setf *model-cascade-code* (parse-cascade (uiop:getenv "MODEL_CASCADE_CODE"))
*model-cascade-plan* (parse-cascade (uiop:getenv "MODEL_CASCADE_PLAN"))
*model-cascade-chat* (parse-cascade (uiop:getenv "MODEL_CASCADE_CHAT"))
*model-cascade-background* (parse-cascade (uiop:getenv "MODEL_CASCADE_BACKGROUND"))
*local-backends* (let ((env (uiop:getenv "LOCAL_BACKENDS")))
(if env
(mapcar (lambda (s) (intern (string-upcase (string-trim " " s)) :keyword))
(uiop:split-string env :separator '(#\,)))
'(:ollama :llama-cpp)))))
(setf *model-selector* #'model-select)
(log-message "MODEL-ROUTER: Initialized, selector=~a" *model-selector*))
#+end_src
** Skill Registration
Triggers on nothing (observer). Initialization happens at load time.
#+begin_src lisp
(defskill :passepartout-model-router
:priority 250
:trigger (lambda (ctx) (declare (ignore ctx)) nil))
#+end_src
** Auto-Init
#+begin_src lisp
(model-router-init)
#+end_src