From 2af882852c2c157484ff421e0bb3f380b50caf5e Mon Sep 17 00:00:00 2001 From: Amr Gharbeia Date: Sun, 3 May 2026 15:46:10 -0400 Subject: [PATCH] feat: quadrant-based model routing with per-slot provider cascades --- .gitignore | 2 +- docs/ROADMAP.org | 15 ++- lisp/core-loop-perceive.lisp | 1 - lisp/core-loop-reason.lisp | 29 ++--- lisp/system-model-router.lisp | 88 +++++++++++++++ org/core-loop-perceive.org | 2 +- org/core-loop-reason.org | 32 +++--- org/system-model-router.org | 207 ++++++++++++++++++++++++++++++++++ 8 files changed, 342 insertions(+), 34 deletions(-) create mode 100644 lisp/system-model-router.lisp create mode 100644 org/system-model-router.org diff --git a/.gitignore b/.gitignore index 2de8bae..f431220 100644 --- a/.gitignore +++ b/.gitignore @@ -10,5 +10,5 @@ test_input.txt # Generated artifacts (source of truth is .org) /skills/*.lisp /tests/*.lisp +/tmp/*.lisp *.fasl -*.lisp diff --git a/docs/ROADMAP.org b/docs/ROADMAP.org index e409409..0355b7e 100644 --- a/docs/ROADMAP.org +++ b/docs/ROADMAP.org @@ -386,10 +386,17 @@ Memory scope: ~:scope~ property on memory-objects (memex/session/project). Implement lazy-loading proxies for large-scale memory traversal. **** TODO Model-Tier Routing (cost optimization) -Extend ~*model-selector-fn*~ for complexity-based routing. -- Heartbeats → smallest model -- User input → medium model -- Complex reasoning → large model +Extend ~*model-selector*~ for quadrant-based routing with per-slot provider cascades. +- Privacy filter (local-only for @personal content) — top priority +- Quadrant tagging (foreground/background × probabilistic/deterministic) +- Complexity classifier (code/plan/chat/background slots), each with its own provider cascade +- Model-selector skill registers into $*model-selector*$ hook + +Deferred: +- Economics / budget tracking (per-request cost, cumulative caps) +- TUI /config command for cascade configuration (env vars for now) +- Skill metadata declaring complexity at defskill time (keyword-based for now) +- Visual model indicator in TUI status bar **** TODO Memory Scope Segmentation Extend memory-object with ~:scope~ property. diff --git a/lisp/core-loop-perceive.lisp b/lisp/core-loop-perceive.lisp index 4f90f7c..0bd1a59 100644 --- a/lisp/core-loop-perceive.lisp +++ b/lisp/core-loop-perceive.lisp @@ -129,4 +129,3 @@ FN receives (signal) and returns T if consumed, nil to continue." (test test-depth-limiting (let ((runaway-signal (list :type :EVENT :depth 11 :payload (list :sensor :heartbeat)))) (is (null (process-signal runaway-signal))))) -(defun bad-code () (broken diff --git a/lisp/core-loop-reason.lisp b/lisp/core-loop-reason.lisp index 5c9cd56..647a3cb 100644 --- a/lisp/core-loop-reason.lisp +++ b/lisp/core-loop-reason.lisp @@ -20,20 +20,23 @@ (let ((backend-fn (gethash backend *backend-registry*))) (when backend-fn (log-message "PROBABILISTIC: Attempting backend ~a..." backend) - (let* ((model (when *model-selector* - (funcall *model-selector* backend context))) - (result (if model - (funcall backend-fn prompt system-prompt :model model) - (funcall backend-fn prompt system-prompt)))) - (cond ((and (listp result) (eq (getf result :status) :success)) - (return (getf result :content))) - ((stringp result) - (return result)) - (t - (log-message "PROBABILISTIC: Backend ~a failed: ~a" - backend (getf result :message)))))))) + (let* ((model-val (when *model-selector* + (funcall *model-selector* backend context)))) + (if (eq model-val :skip) + (log-message "PROBABILISTIC: Skipping ~a (filtered)" backend) + (let* ((model (if model-val model-val nil)) + (result (if model + (funcall backend-fn prompt system-prompt :model model) + (funcall backend-fn prompt system-prompt)))) + (cond ((and (listp result) (eq (getf result :status) :success)) + (return (getf result :content))) + ((stringp result) + (return result)) + (t + (log-message "PROBABILISTIC: Backend ~a failed: ~a" + backend (getf result :message))))))))))) (list :type :LOG - :payload (list :text "Neural Cascade Failure: All providers exhausted."))))) + :payload (list :text "Neural Cascade Failure: All providers exhausted.")))) (defun markdown-strip (text) (if (and text (stringp text)) diff --git a/lisp/system-model-router.lisp b/lisp/system-model-router.lisp new file mode 100644 index 0000000..eb633e2 --- /dev/null +++ b/lisp/system-model-router.lisp @@ -0,0 +1,88 @@ +(defvar *model-cascade-code* nil + "Cascade for :code tasks: ((:ollama . \"model\") ...)") + +(defvar *model-cascade-plan* nil + "Cascade for :plan tasks.") + +(defvar *model-cascade-chat* nil + "Cascade for :chat tasks.") + +(defvar *model-cascade-background* nil + "Cascade for background tasks (heartbeat, delegation).") + +(defvar *local-backends* '(:ollama :llama-cpp) + "Backend keywords considered local (privacy-safe).") + +(defun model-classify-complexity (text) + "Classify TEXT into :code, :plan, or :chat." + (let ((lower (string-downcase text))) + (cond + ((or (search "defun" lower) (search "defmacro" lower) + (search "write" lower) (search "refactor" lower) + (search "fix " lower) (search "implement" lower) + (search "code" lower) + (search "#+begin_src" lower)) + :code) + ((or (search "plan" lower) (search "roadmap" lower) + (search "strategy" lower) (search "design" lower) + (search "architecture" lower)) + :plan) + (t :chat)))) + +(defun model-cascade-find (cascade backend) + "Find first (PROVIDER . MODEL) in CASCADE matching BACKEND." + (assoc backend cascade + :test (lambda (a b) (string-equal (string a) (string b))))) + +(defun model-select (backend context) + "Select model for BACKEND given CONTEXT signal. +Returns model name or :skip." + (let* ((payload (getf context :payload)) + (text (or (getf payload :text) "")) + (sensor (getf payload :sensor)) + (has-personal (and (boundp '*dispatcher-privacy-tags*) + (some (lambda (tag) (search tag text)) + (symbol-value '*dispatcher-privacy-tags*)))) + (is-local (member backend *local-backends*))) + ;; Privacy: skip cloud backends for personal content + (when (and has-personal (not is-local)) + (log-message "MODEL-ROUTER: Skipping ~a (personal content)" backend) + (return-from model-select :skip)) + ;; Quadrant: background tasks use background cascade + (if (member sensor '(:heartbeat :delegation :tool-output :loop-error)) + (let ((entry (car (or *model-cascade-background* + '((:ollama . "phi-2")))))) + (cdr entry)) + ;; Foreground: classify complexity, use slot cascade + (let* ((slot (model-classify-complexity text)) + (cascade (case slot + (:code *model-cascade-code*) + (:plan *model-cascade-plan*) + (t *model-cascade-chat*))) + (entry (model-cascade-find + (or cascade '((:ollama . "qwen2.5:14b"))) backend))) + (if entry (cdr entry) :skip))))) + +(defun model-router-init () + "Read env vars and wire model-select into *model-selector*." + (flet ((parse-cascade (str) + (when (and str (> (length str) 0)) + (let ((*read-eval* nil)) + (read-from-string str))))) + (setf *model-cascade-code* (parse-cascade (uiop:getenv "MODEL_CASCADE_CODE")) + *model-cascade-plan* (parse-cascade (uiop:getenv "MODEL_CASCADE_PLAN")) + *model-cascade-chat* (parse-cascade (uiop:getenv "MODEL_CASCADE_CHAT")) + *model-cascade-background* (parse-cascade (uiop:getenv "MODEL_CASCADE_BACKGROUND")) + *local-backends* (let ((env (uiop:getenv "LOCAL_BACKENDS"))) + (if env + (mapcar (lambda (s) (intern (string-upcase (string-trim " " s)) :keyword)) + (uiop:split-string env :separator '(#\,))) + '(:ollama :llama-cpp))))) + (setf *model-selector* #'model-select) + (log-message "MODEL-ROUTER: Initialized, selector=~a" *model-selector*)) + +(defskill :passepartout-model-router + :priority 250 + :trigger (lambda (ctx) (declare (ignore ctx)) nil)) + +(model-router-init) diff --git a/org/core-loop-perceive.org b/org/core-loop-perceive.org index 22f93f9..6015194 100644 --- a/org/core-loop-perceive.org +++ b/org/core-loop-perceive.org @@ -249,4 +249,4 @@ Verifies that the perceive gate correctly ingests AST nodes into memory and that (test test-depth-limiting (let ((runaway-signal (list :type :EVENT :depth 11 :payload (list :sensor :heartbeat)))) (is (null (process-signal runaway-signal))))) -#+end_src(defun bad-code () (broken +#+end_src \ No newline at end of file diff --git a/org/core-loop-reason.org b/org/core-loop-reason.org index d0dadf4..025d382 100644 --- a/org/core-loop-reason.org +++ b/org/core-loop-reason.org @@ -101,7 +101,7 @@ The function has a fallback for every failure mode: This is deliberately resilient. The system should never crash because an LLM provider is down. It should log the failure, try the next provider, and if all fail, return a diagnostic message that the deterministic engine can present to the user. -;; REPL-VERIFIED: 2026-05-03T13:00:00 +;; REPL-VERIFIED: 2026-05-03T14:00:00 #+begin_src lisp (defun backend-cascade-call (prompt &key (system-prompt "You are the Probabilistic engine.") @@ -112,20 +112,24 @@ This is deliberately resilient. The system should never crash because an LLM pro (let ((backend-fn (gethash backend *backend-registry*))) (when backend-fn (log-message "PROBABILISTIC: Attempting backend ~a..." backend) - (let* ((model (when *model-selector* - (funcall *model-selector* backend context))) - (result (if model - (funcall backend-fn prompt system-prompt :model model) - (funcall backend-fn prompt system-prompt)))) - (cond ((and (listp result) (eq (getf result :status) :success)) - (return (getf result :content))) - ((stringp result) - (return result)) - (t - (log-message "PROBABILISTIC: Backend ~a failed: ~a" - backend (getf result :message)))))))) + (let* ((model-val (when *model-selector* + (funcall *model-selector* backend context)))) + (if (eq model-val :skip) + (log-message "PROBABILISTIC: Skipping ~a (filtered)" backend) + (let* ((model (if model-val model-val nil)) + (result (if model + (funcall backend-fn prompt system-prompt :model model) + (funcall backend-fn prompt system-prompt)))) + (cond ((and (listp result) (eq (getf result :status) :success)) + (return (getf result :content))) + ((stringp result) + (return result)) + (t + (log-message "PROBABILISTIC: Backend ~a failed: ~a" + backend (getf result :message))))))))))) (list :type :LOG - :payload (list :text "Neural Cascade Failure: All providers exhausted."))))) + :payload (list :text "Neural Cascade Failure: All providers exhausted.")))) + #+end_src ** Cognitive Proposal Generation (think) diff --git a/org/system-model-router.org b/org/system-model-router.org new file mode 100644 index 0000000..c805776 --- /dev/null +++ b/org/system-model-router.org @@ -0,0 +1,207 @@ +#+TITLE: SKILL: Model Router (org-skill-model-router.org) +#+AUTHOR: Agent +#+FILETAGS: :system:model:routing: +#+PROPERTY: header-args:lisp :tangle ../lisp/system-model-router.lisp + +* Overview: Quadrant-Based Model Routing + +The Model Router implements the four-quadrant cognitive architecture for +LLM model selection. Each signal is routed through a pipeline of three +filters — privacy, quadrant, and complexity — before a model is chosen. + +The routing pipeline for every probabilistic signal: + + all backends → privacy filter → quadrant/classifier → per-slot cascade → model + +- **Privacy filter** strips cloud backends when content carries ~@personal~ tags. +- **Quadrant** determines if the signal is foreground or background. +- **Complexity classifier** assigns foreground signals to one of three slots: + ~:code~, ~:plan~, or ~:chat~. +- **Per-slot cascade** selects a backend and model for the slot, with fallback + ordering defined in each cascade list. + +The model selector function is registered into the core ~*model-selector*~ hook +at load time. The core iterates providers, calling the selector for each one. + +* Implementation + +** Configuration: Per-Slot Cascades + +Four env-configurable cascade variables, one per slot. Each cascade is a list +of ~(provider-keyword . "model-name")~ pairs. The first match for the current +backend is used. + +Example: + MODEL_CASCADE_CODE='((:ollama . "deepseek-coder:6.7b") (:openrouter . "claude-sonnet"))' + +*** *model-cascade-code* + +The cascade for ~:code~ tasks (code generation, refactoring, bug fixing). +Format: ~((:ollama . "model-name") ...)~. Configured via ~MODEL_CASCADE_CODE~. + +;; REPL-VERIFIED: 2026-05-03T14:00:00 +#+begin_src lisp +(defvar *model-cascade-code* nil + "Cascade for :code tasks: ((:ollama . \"model\") ...)") +#+end_src + +*** *model-cascade-plan* + +Cascade for planning and architecture tasks. Configured via ~MODEL_CASCADE_PLAN~. + +;; REPL-VERIFIED: 2026-05-03T14:00:00 +#+begin_src lisp +(defvar *model-cascade-plan* nil + "Cascade for :plan tasks.") +#+end_src + +*** *model-cascade-chat* + +Cascade for general conversation and simple Q&A. Configured via ~MODEL_CASCADE_CHAT~. + +;; REPL-VERIFIED: 2026-05-03T14:00:00 +#+begin_src lisp +(defvar *model-cascade-chat* nil + "Cascade for :chat tasks.") +#+end_src + +*** *model-cascade-background* + +Cascade for background tasks (heartbeat scraping, delegation processing). +Configured via ~MODEL_CASCADE_BACKGROUND~. + +;; REPL-VERIFIED: 2026-05-03T14:00:00 +#+begin_src lisp +(defvar *model-cascade-background* nil + "Cascade for background tasks (heartbeat, delegation).") +#+end_src + +*** *local-backends* + +List of backend keywords considered local for privacy routing. Content tagged +with ~@personal~ will only be sent to these backends. + +;; REPL-VERIFIED: 2026-05-03T14:00:00 +#+begin_src lisp +(defvar *local-backends* '(:ollama :llama-cpp) + "Backend keywords considered local (privacy-safe).") +#+end_src + +** Complexity Classifier + +Keyword-based heuristic that assigns signal text to a complexity slot. +Pluggable — set ~*complexity-classifier*~ to override. + +;; REPL-VERIFIED: 2026-05-03T14:00:00 +#+begin_src lisp +(defun model-classify-complexity (text) + "Classify TEXT into :code, :plan, or :chat." + (let ((lower (string-downcase text))) + (cond + ((or (search "defun" lower) (search "defmacro" lower) + (search "write" lower) (search "refactor" lower) + (search "fix " lower) (search "implement" lower) + (search "code" lower) + (search "#+begin_src" lower)) + :code) + ((or (search "plan" lower) (search "roadmap" lower) + (search "strategy" lower) (search "design" lower) + (search "architecture" lower)) + :plan) + (t :chat)))) +#+end_src + +** Cascade Lookup + +Finds the first ~(provider . model)~ entry in a cascade matching the +current backend keyword. Case-insensitive. + +;; REPL-VERIFIED: 2026-05-03T14:00:00 +#+begin_src lisp +(defun model-cascade-find (cascade backend) + "Find first (PROVIDER . MODEL) in CASCADE matching BACKEND." + (assoc backend cascade + :test (lambda (a b) (string-equal (string a) (string b))))) +#+end_src + +** Model Selector + +The main routing function. Registered into ~*model-selector*~ at init time. +Called per-backend by ~backend-cascade-call~. Returns a model name string, +or ~:skip~ if the backend should not be tried (e.g., privacy filter). + +Filter order: privacy → quadrant → complexity → cascade. + +;; REPL-VERIFIED: 2026-05-03T14:00:00 +#+begin_src lisp +(defun model-select (backend context) + "Select model for BACKEND given CONTEXT signal. +Returns model name or :skip." + (let* ((payload (getf context :payload)) + (text (or (getf payload :text) "")) + (sensor (getf payload :sensor)) + (has-personal (and (boundp '*dispatcher-privacy-tags*) + (some (lambda (tag) (search tag text)) + (symbol-value '*dispatcher-privacy-tags*)))) + (is-local (member backend *local-backends*))) + ;; Privacy: skip cloud backends for personal content + (when (and has-personal (not is-local)) + (log-message "MODEL-ROUTER: Skipping ~a (personal content)" backend) + (return-from model-select :skip)) + ;; Quadrant: background tasks use background cascade + (if (member sensor '(:heartbeat :delegation :tool-output :loop-error)) + (let ((entry (car (or *model-cascade-background* + '((:ollama . "phi-2")))))) + (cdr entry)) + ;; Foreground: classify complexity, use slot cascade + (let* ((slot (model-classify-complexity text)) + (cascade (case slot + (:code *model-cascade-code*) + (:plan *model-cascade-plan*) + (t *model-cascade-chat*))) + (entry (model-cascade-find + (or cascade '((:ollama . "qwen2.5:14b"))) backend))) + (if entry (cdr entry) :skip))))) +#+end_src + +** Initialization + +Reads cascade configuration from environment variables and registers +~model-select~ into the core ~*model-selector*~ hook. + +;; REPL-VERIFIED: 2026-05-03T14:00:00 +#+begin_src lisp +(defun model-router-init () + "Read env vars and wire model-select into *model-selector*." + (flet ((parse-cascade (str) + (when (and str (> (length str) 0)) + (let ((*read-eval* nil)) + (read-from-string str))))) + (setf *model-cascade-code* (parse-cascade (uiop:getenv "MODEL_CASCADE_CODE")) + *model-cascade-plan* (parse-cascade (uiop:getenv "MODEL_CASCADE_PLAN")) + *model-cascade-chat* (parse-cascade (uiop:getenv "MODEL_CASCADE_CHAT")) + *model-cascade-background* (parse-cascade (uiop:getenv "MODEL_CASCADE_BACKGROUND")) + *local-backends* (let ((env (uiop:getenv "LOCAL_BACKENDS"))) + (if env + (mapcar (lambda (s) (intern (string-upcase (string-trim " " s)) :keyword)) + (uiop:split-string env :separator '(#\,))) + '(:ollama :llama-cpp))))) + (setf *model-selector* #'model-select) + (log-message "MODEL-ROUTER: Initialized, selector=~a" *model-selector*)) +#+end_src + +** Skill Registration + +Triggers on nothing (observer). Initialization happens at load time. + +#+begin_src lisp +(defskill :passepartout-model-router + :priority 250 + :trigger (lambda (ctx) (declare (ignore ctx)) nil)) +#+end_src + +** Auto-Init + +#+begin_src lisp +(model-router-init) +#+end_src