feat: quadrant-based model routing with per-slot provider cascades
Some checks failed
Deploy (Gitea) / deploy (push) Failing after 2s

This commit is contained in:
2026-05-03 15:46:10 -04:00
parent 4e5428bed0
commit 2af882852c
8 changed files with 342 additions and 34 deletions

2
.gitignore vendored
View File

@@ -10,5 +10,5 @@ test_input.txt
# Generated artifacts (source of truth is .org)
/skills/*.lisp
/tests/*.lisp
/tmp/*.lisp
*.fasl
*.lisp

View File

@@ -386,10 +386,17 @@ Memory scope: ~:scope~ property on memory-objects (memex/session/project).
Implement lazy-loading proxies for large-scale memory traversal.
**** TODO Model-Tier Routing (cost optimization)
Extend ~*model-selector-fn*~ for complexity-based routing.
- Heartbeats → smallest model
- User input → medium model
- Complex reasoning → large model
Extend ~*model-selector*~ for quadrant-based routing with per-slot provider cascades.
- Privacy filter (local-only for @personal content) — top priority
- Quadrant tagging (foreground/background × probabilistic/deterministic)
- Complexity classifier (code/plan/chat/background slots), each with its own provider cascade
- Model-selector skill registers into $*model-selector*$ hook
Deferred:
- Economics / budget tracking (per-request cost, cumulative caps)
- TUI /config command for cascade configuration (env vars for now)
- Skill metadata declaring complexity at defskill time (keyword-based for now)
- Visual model indicator in TUI status bar
**** TODO Memory Scope Segmentation
Extend memory-object with ~:scope~ property.

View File

@@ -129,4 +129,3 @@ FN receives (signal) and returns T if consumed, nil to continue."
(test test-depth-limiting
(let ((runaway-signal (list :type :EVENT :depth 11 :payload (list :sensor :heartbeat))))
(is (null (process-signal runaway-signal)))))
(defun bad-code () (broken

View File

@@ -20,20 +20,23 @@
(let ((backend-fn (gethash backend *backend-registry*)))
(when backend-fn
(log-message "PROBABILISTIC: Attempting backend ~a..." backend)
(let* ((model (when *model-selector*
(funcall *model-selector* backend context)))
(result (if model
(funcall backend-fn prompt system-prompt :model model)
(funcall backend-fn prompt system-prompt))))
(cond ((and (listp result) (eq (getf result :status) :success))
(return (getf result :content)))
((stringp result)
(return result))
(t
(log-message "PROBABILISTIC: Backend ~a failed: ~a"
backend (getf result :message))))))))
(let* ((model-val (when *model-selector*
(funcall *model-selector* backend context))))
(if (eq model-val :skip)
(log-message "PROBABILISTIC: Skipping ~a (filtered)" backend)
(let* ((model (if model-val model-val nil))
(result (if model
(funcall backend-fn prompt system-prompt :model model)
(funcall backend-fn prompt system-prompt))))
(cond ((and (listp result) (eq (getf result :status) :success))
(return (getf result :content)))
((stringp result)
(return result))
(t
(log-message "PROBABILISTIC: Backend ~a failed: ~a"
backend (getf result :message)))))))))))
(list :type :LOG
:payload (list :text "Neural Cascade Failure: All providers exhausted.")))))
:payload (list :text "Neural Cascade Failure: All providers exhausted."))))
(defun markdown-strip (text)
(if (and text (stringp text))

View File

@@ -0,0 +1,88 @@
(defvar *model-cascade-code* nil
"Cascade for :code tasks: ((:ollama . \"model\") ...)")
(defvar *model-cascade-plan* nil
"Cascade for :plan tasks.")
(defvar *model-cascade-chat* nil
"Cascade for :chat tasks.")
(defvar *model-cascade-background* nil
"Cascade for background tasks (heartbeat, delegation).")
(defvar *local-backends* '(:ollama :llama-cpp)
"Backend keywords considered local (privacy-safe).")
(defun model-classify-complexity (text)
"Classify TEXT into :code, :plan, or :chat."
(let ((lower (string-downcase text)))
(cond
((or (search "defun" lower) (search "defmacro" lower)
(search "write" lower) (search "refactor" lower)
(search "fix " lower) (search "implement" lower)
(search "code" lower)
(search "#+begin_src" lower))
:code)
((or (search "plan" lower) (search "roadmap" lower)
(search "strategy" lower) (search "design" lower)
(search "architecture" lower))
:plan)
(t :chat))))
(defun model-cascade-find (cascade backend)
"Find first (PROVIDER . MODEL) in CASCADE matching BACKEND."
(assoc backend cascade
:test (lambda (a b) (string-equal (string a) (string b)))))
(defun model-select (backend context)
"Select model for BACKEND given CONTEXT signal.
Returns model name or :skip."
(let* ((payload (getf context :payload))
(text (or (getf payload :text) ""))
(sensor (getf payload :sensor))
(has-personal (and (boundp '*dispatcher-privacy-tags*)
(some (lambda (tag) (search tag text))
(symbol-value '*dispatcher-privacy-tags*))))
(is-local (member backend *local-backends*)))
;; Privacy: skip cloud backends for personal content
(when (and has-personal (not is-local))
(log-message "MODEL-ROUTER: Skipping ~a (personal content)" backend)
(return-from model-select :skip))
;; Quadrant: background tasks use background cascade
(if (member sensor '(:heartbeat :delegation :tool-output :loop-error))
(let ((entry (car (or *model-cascade-background*
'((:ollama . "phi-2"))))))
(cdr entry))
;; Foreground: classify complexity, use slot cascade
(let* ((slot (model-classify-complexity text))
(cascade (case slot
(:code *model-cascade-code*)
(:plan *model-cascade-plan*)
(t *model-cascade-chat*)))
(entry (model-cascade-find
(or cascade '((:ollama . "qwen2.5:14b"))) backend)))
(if entry (cdr entry) :skip)))))
(defun model-router-init ()
"Read env vars and wire model-select into *model-selector*."
(flet ((parse-cascade (str)
(when (and str (> (length str) 0))
(let ((*read-eval* nil))
(read-from-string str)))))
(setf *model-cascade-code* (parse-cascade (uiop:getenv "MODEL_CASCADE_CODE"))
*model-cascade-plan* (parse-cascade (uiop:getenv "MODEL_CASCADE_PLAN"))
*model-cascade-chat* (parse-cascade (uiop:getenv "MODEL_CASCADE_CHAT"))
*model-cascade-background* (parse-cascade (uiop:getenv "MODEL_CASCADE_BACKGROUND"))
*local-backends* (let ((env (uiop:getenv "LOCAL_BACKENDS")))
(if env
(mapcar (lambda (s) (intern (string-upcase (string-trim " " s)) :keyword))
(uiop:split-string env :separator '(#\,)))
'(:ollama :llama-cpp)))))
(setf *model-selector* #'model-select)
(log-message "MODEL-ROUTER: Initialized, selector=~a" *model-selector*))
(defskill :passepartout-model-router
:priority 250
:trigger (lambda (ctx) (declare (ignore ctx)) nil))
(model-router-init)

View File

@@ -249,4 +249,4 @@ Verifies that the perceive gate correctly ingests AST nodes into memory and that
(test test-depth-limiting
(let ((runaway-signal (list :type :EVENT :depth 11 :payload (list :sensor :heartbeat))))
(is (null (process-signal runaway-signal)))))
#+end_src(defun bad-code () (broken
#+end_src

View File

@@ -101,7 +101,7 @@ The function has a fallback for every failure mode:
This is deliberately resilient. The system should never crash because an LLM provider is down. It should log the failure, try the next provider, and if all fail, return a diagnostic message that the deterministic engine can present to the user.
;; REPL-VERIFIED: 2026-05-03T13:00:00
;; REPL-VERIFIED: 2026-05-03T14:00:00
#+begin_src lisp
(defun backend-cascade-call (prompt &key
(system-prompt "You are the Probabilistic engine.")
@@ -112,20 +112,24 @@ This is deliberately resilient. The system should never crash because an LLM pro
(let ((backend-fn (gethash backend *backend-registry*)))
(when backend-fn
(log-message "PROBABILISTIC: Attempting backend ~a..." backend)
(let* ((model (when *model-selector*
(funcall *model-selector* backend context)))
(result (if model
(funcall backend-fn prompt system-prompt :model model)
(funcall backend-fn prompt system-prompt))))
(cond ((and (listp result) (eq (getf result :status) :success))
(return (getf result :content)))
((stringp result)
(return result))
(t
(log-message "PROBABILISTIC: Backend ~a failed: ~a"
backend (getf result :message))))))))
(let* ((model-val (when *model-selector*
(funcall *model-selector* backend context))))
(if (eq model-val :skip)
(log-message "PROBABILISTIC: Skipping ~a (filtered)" backend)
(let* ((model (if model-val model-val nil))
(result (if model
(funcall backend-fn prompt system-prompt :model model)
(funcall backend-fn prompt system-prompt))))
(cond ((and (listp result) (eq (getf result :status) :success))
(return (getf result :content)))
((stringp result)
(return result))
(t
(log-message "PROBABILISTIC: Backend ~a failed: ~a"
backend (getf result :message)))))))))))
(list :type :LOG
:payload (list :text "Neural Cascade Failure: All providers exhausted.")))))
:payload (list :text "Neural Cascade Failure: All providers exhausted."))))
#+end_src
** Cognitive Proposal Generation (think)

207
org/system-model-router.org Normal file
View File

@@ -0,0 +1,207 @@
#+TITLE: SKILL: Model Router (org-skill-model-router.org)
#+AUTHOR: Agent
#+FILETAGS: :system:model:routing:
#+PROPERTY: header-args:lisp :tangle ../lisp/system-model-router.lisp
* Overview: Quadrant-Based Model Routing
The Model Router implements the four-quadrant cognitive architecture for
LLM model selection. Each signal is routed through a pipeline of three
filters — privacy, quadrant, and complexity — before a model is chosen.
The routing pipeline for every probabilistic signal:
all backends → privacy filter → quadrant/classifier → per-slot cascade → model
- **Privacy filter** strips cloud backends when content carries ~@personal~ tags.
- **Quadrant** determines if the signal is foreground or background.
- **Complexity classifier** assigns foreground signals to one of three slots:
~:code~, ~:plan~, or ~:chat~.
- **Per-slot cascade** selects a backend and model for the slot, with fallback
ordering defined in each cascade list.
The model selector function is registered into the core ~*model-selector*~ hook
at load time. The core iterates providers, calling the selector for each one.
* Implementation
** Configuration: Per-Slot Cascades
Four env-configurable cascade variables, one per slot. Each cascade is a list
of ~(provider-keyword . "model-name")~ pairs. The first match for the current
backend is used.
Example:
MODEL_CASCADE_CODE='((:ollama . "deepseek-coder:6.7b") (:openrouter . "claude-sonnet"))'
*** *model-cascade-code*
The cascade for ~:code~ tasks (code generation, refactoring, bug fixing).
Format: ~((:ollama . "model-name") ...)~. Configured via ~MODEL_CASCADE_CODE~.
;; REPL-VERIFIED: 2026-05-03T14:00:00
#+begin_src lisp
(defvar *model-cascade-code* nil
"Cascade for :code tasks: ((:ollama . \"model\") ...)")
#+end_src
*** *model-cascade-plan*
Cascade for planning and architecture tasks. Configured via ~MODEL_CASCADE_PLAN~.
;; REPL-VERIFIED: 2026-05-03T14:00:00
#+begin_src lisp
(defvar *model-cascade-plan* nil
"Cascade for :plan tasks.")
#+end_src
*** *model-cascade-chat*
Cascade for general conversation and simple Q&A. Configured via ~MODEL_CASCADE_CHAT~.
;; REPL-VERIFIED: 2026-05-03T14:00:00
#+begin_src lisp
(defvar *model-cascade-chat* nil
"Cascade for :chat tasks.")
#+end_src
*** *model-cascade-background*
Cascade for background tasks (heartbeat scraping, delegation processing).
Configured via ~MODEL_CASCADE_BACKGROUND~.
;; REPL-VERIFIED: 2026-05-03T14:00:00
#+begin_src lisp
(defvar *model-cascade-background* nil
"Cascade for background tasks (heartbeat, delegation).")
#+end_src
*** *local-backends*
List of backend keywords considered local for privacy routing. Content tagged
with ~@personal~ will only be sent to these backends.
;; REPL-VERIFIED: 2026-05-03T14:00:00
#+begin_src lisp
(defvar *local-backends* '(:ollama :llama-cpp)
"Backend keywords considered local (privacy-safe).")
#+end_src
** Complexity Classifier
Keyword-based heuristic that assigns signal text to a complexity slot.
Pluggable — set ~*complexity-classifier*~ to override.
;; REPL-VERIFIED: 2026-05-03T14:00:00
#+begin_src lisp
(defun model-classify-complexity (text)
"Classify TEXT into :code, :plan, or :chat."
(let ((lower (string-downcase text)))
(cond
((or (search "defun" lower) (search "defmacro" lower)
(search "write" lower) (search "refactor" lower)
(search "fix " lower) (search "implement" lower)
(search "code" lower)
(search "#+begin_src" lower))
:code)
((or (search "plan" lower) (search "roadmap" lower)
(search "strategy" lower) (search "design" lower)
(search "architecture" lower))
:plan)
(t :chat))))
#+end_src
** Cascade Lookup
Finds the first ~(provider . model)~ entry in a cascade matching the
current backend keyword. Case-insensitive.
;; REPL-VERIFIED: 2026-05-03T14:00:00
#+begin_src lisp
(defun model-cascade-find (cascade backend)
"Find first (PROVIDER . MODEL) in CASCADE matching BACKEND."
(assoc backend cascade
:test (lambda (a b) (string-equal (string a) (string b)))))
#+end_src
** Model Selector
The main routing function. Registered into ~*model-selector*~ at init time.
Called per-backend by ~backend-cascade-call~. Returns a model name string,
or ~:skip~ if the backend should not be tried (e.g., privacy filter).
Filter order: privacy → quadrant → complexity → cascade.
;; REPL-VERIFIED: 2026-05-03T14:00:00
#+begin_src lisp
(defun model-select (backend context)
"Select model for BACKEND given CONTEXT signal.
Returns model name or :skip."
(let* ((payload (getf context :payload))
(text (or (getf payload :text) ""))
(sensor (getf payload :sensor))
(has-personal (and (boundp '*dispatcher-privacy-tags*)
(some (lambda (tag) (search tag text))
(symbol-value '*dispatcher-privacy-tags*))))
(is-local (member backend *local-backends*)))
;; Privacy: skip cloud backends for personal content
(when (and has-personal (not is-local))
(log-message "MODEL-ROUTER: Skipping ~a (personal content)" backend)
(return-from model-select :skip))
;; Quadrant: background tasks use background cascade
(if (member sensor '(:heartbeat :delegation :tool-output :loop-error))
(let ((entry (car (or *model-cascade-background*
'((:ollama . "phi-2"))))))
(cdr entry))
;; Foreground: classify complexity, use slot cascade
(let* ((slot (model-classify-complexity text))
(cascade (case slot
(:code *model-cascade-code*)
(:plan *model-cascade-plan*)
(t *model-cascade-chat*)))
(entry (model-cascade-find
(or cascade '((:ollama . "qwen2.5:14b"))) backend)))
(if entry (cdr entry) :skip)))))
#+end_src
** Initialization
Reads cascade configuration from environment variables and registers
~model-select~ into the core ~*model-selector*~ hook.
;; REPL-VERIFIED: 2026-05-03T14:00:00
#+begin_src lisp
(defun model-router-init ()
"Read env vars and wire model-select into *model-selector*."
(flet ((parse-cascade (str)
(when (and str (> (length str) 0))
(let ((*read-eval* nil))
(read-from-string str)))))
(setf *model-cascade-code* (parse-cascade (uiop:getenv "MODEL_CASCADE_CODE"))
*model-cascade-plan* (parse-cascade (uiop:getenv "MODEL_CASCADE_PLAN"))
*model-cascade-chat* (parse-cascade (uiop:getenv "MODEL_CASCADE_CHAT"))
*model-cascade-background* (parse-cascade (uiop:getenv "MODEL_CASCADE_BACKGROUND"))
*local-backends* (let ((env (uiop:getenv "LOCAL_BACKENDS")))
(if env
(mapcar (lambda (s) (intern (string-upcase (string-trim " " s)) :keyword))
(uiop:split-string env :separator '(#\,)))
'(:ollama :llama-cpp)))))
(setf *model-selector* #'model-select)
(log-message "MODEL-ROUTER: Initialized, selector=~a" *model-selector*))
#+end_src
** Skill Registration
Triggers on nothing (observer). Initialization happens at load time.
#+begin_src lisp
(defskill :passepartout-model-router
:priority 250
:trigger (lambda (ctx) (declare (ignore ctx)) nil))
#+end_src
** Auto-Init
#+begin_src lisp
(model-router-init)
#+end_src