diff --git a/lisp/core-defpackage.lisp b/lisp/core-defpackage.lisp index 0333ca4..8c20050 100644 --- a/lisp/core-defpackage.lisp +++ b/lisp/core-defpackage.lisp @@ -81,9 +81,16 @@ #:topological-sort-skills #:validate-lisp-syntax #:defskill - #:*skill-registry* - #:*scope-resolver* - #:skill + #:*skill-registry* + #:*scope-resolver* + #:*embedding-backend* + #:*embedding-queue* + #:*embedding-provider* + #:embed-queue-object + #:embed-object + #:embed-all-pending + #:embeddings-compute + #:skill #:skill-name #:skill-priority #:skill-dependencies diff --git a/lisp/core-skills.lisp b/lisp/core-skills.lisp index dfdd976..526cb99 100644 --- a/lisp/core-skills.lisp +++ b/lisp/core-skills.lisp @@ -92,7 +92,8 @@ (string= n "core-loop-act") (string= n "core-loop") (string= n "core-manifest") - (string= n "security-dispatcher")))) + (string= n "security-dispatcher") + (string= n "system-embedding-gateway")))) all-files)) (adj (make-hash-table :test 'equal)) (name-to-file (make-hash-table :test 'equal)) diff --git a/lisp/system-embedding-gateway.lisp b/lisp/system-embedding-gateway.lisp new file mode 100644 index 0000000..29b3edf --- /dev/null +++ b/lisp/system-embedding-gateway.lisp @@ -0,0 +1,123 @@ +(in-package :passepartout) + +(defvar *embedding-queue* nil + "List of object IDs pending embedding.") + +(defvar *embedding-provider* :hashing + "Active embedding provider: :hashing, :ollama, :openai.") + +(defun embeddings-tokenize (text) + "Split TEXT into lowercase word tokens, strip punctuation, discard short." + (let ((clean (cl-ppcre:regex-replace-all "[^a-zA-Z0-9 ]" + (string-downcase (or text "")) " "))) + (remove-if (lambda (w) (< (length w) 2)) + (uiop:split-string clean :separator '(#\Space #\Tab #\Newline))))) + +(defun embeddings-hash-word (word dim) + "Hash WORD to an index in [0, DIM)." + (let ((hash 2166136261)) + (loop for c across word + do (setf hash (logxor hash (char-code c))) + (setf hash (mod (* hash 16777619) #x100000000))) + (mod hash dim))) + +(defun embeddings-hash-vector (text &key (dimensions 384)) + "Compute a hashing-trick vector for TEXT." + (let* ((tokens (embeddings-tokenize text)) + (vec (make-array dimensions :initial-element 0.0d0 :element-type 'double-float))) + (dolist (token tokens) + (let* ((idx (embeddings-hash-word token dimensions)) + (sign (if (evenp (char-code (char token 0))) 1 -1))) + (incf (aref vec idx) (coerce sign 'double-float)))) + (let ((norm (sqrt (loop for i below dimensions sum (expt (aref vec i) 2))))) + (if (> norm 0.0d0) + (loop for i below dimensions collect (/ (aref vec i) norm)) + (loop for i below dimensions collect 0.0d0))))) + +(defun embeddings-compute (text &key (dimensions 384)) + "Compute embedding vector for TEXT. +Tries *embedding-backend* first, falls back to hashing trick." + (when *embedding-backend* + (handler-case + (let ((result (funcall *embedding-backend* text))) + (when (and result (listp result) (> (length result) 0)) + (return-from embeddings-compute result))) + (error (c) + (log-message "EMBEDDING: Backend failed (~a), fallback to hashing" c)))) + (embeddings-hash-vector text :dimensions dimensions)) + +(defun embedding-backend-ollama (text) + "Generate embeddings via Ollama /api/embeddings." + (let* ((url (or (uiop:getenv "OLLAMA_URL") "http://localhost:11434")) + (model (or (uiop:getenv "EMBEDDING_MODEL") "nomic-embed-text")) + (response (dex:post (format nil "~a/api/embeddings" url) + :content (json:encode-json-to-string + `((:model . ,model) (:prompt . ,text))) + :headers '(("Content-Type" . "application/json"))))) + (when response + (let* ((json (json:decode-json-from-string response)) + (embedding (cdr (assoc :embedding json)))) + (when embedding + (coerce embedding 'list)))))) + +(defun embed-queue-object (obj) + "Queue OBJ for embedding if it lacks a vector." + (when (and obj (not (memory-object-vector obj))) + (pushnew (memory-object-id obj) *embedding-queue* :test 'string=))) + +(defun embed-object (obj) + "Generate and store embedding vector for OBJ." + (let* ((attrs (memory-object-attributes obj)) + (title (or (getf attrs :TITLE) "")) + (text (or (memory-object-content obj) "")) + (raw-tags (getf attrs :TAGS)) + (tag-list (if (listp raw-tags) raw-tags nil)) + (tags (if tag-list (format nil "~{~a~^ ~}" tag-list) "")) + (combined (format nil "~a ~a ~a" title text tags)) + (vec (embeddings-compute combined))) + (setf (memory-object-vector obj) vec) + (log-message "EMBEDDING: Vector for ~a (~d dims)" (memory-object-id obj) (length vec)) + vec)) + +(defun embed-all-pending () + "Process all pending embeddings. Returns count." + (let ((count 0)) + ;; Drain queue + (let ((pending *embedding-queue*)) + (setf *embedding-queue* nil) + (dolist (id pending) + (let ((obj (gethash id *memory-store*))) + (when (and obj (not (memory-object-vector obj))) + (handler-case + (progn (embed-object obj) (incf count)) + (error (c) + (log-message "EMBEDDING: Failed ~a: ~a" id c))))))) + ;; Fallback: scan store for objects without vectors + (when (= count 0) + (maphash (lambda (id obj) + (declare (ignore id)) + (unless (memory-object-vector obj) + (handler-case + (progn (embed-object obj) (incf count)) + (error (c) + (log-message "EMBEDDING: Failed ~a: ~a" + (memory-object-id obj) c))))) + *memory-store*)) + (when (> count 0) + (log-message "EMBEDDING: Batch processed ~d objects" count)) + count)) + +(defun embeddings-init (&key (provider *embedding-provider*)) + "Init embedding provider from EMBEDDING_PROVIDER env var." + (let* ((env (uiop:getenv "EMBEDDING_PROVIDER")) + (selected (or (and env (intern (string-upcase env) :keyword)) + provider))) + (setf *embedding-provider* selected) + (setf *embedding-backend* + (case selected + (:ollama #'embedding-backend-ollama) + (t nil))) + (log-message "EMBEDDING: Provider ~a, backend=~a" selected *embedding-backend*) + selected)) + +(embeddings-init) diff --git a/lisp/system-embeddings.lisp b/lisp/system-embeddings.lisp deleted file mode 100644 index d52d674..0000000 --- a/lisp/system-embeddings.lisp +++ /dev/null @@ -1,91 +0,0 @@ -(defvar *embedding-dimensions* 384 - "Dimension of the embedding vector. Default 384 matches nomic-embed-text.") - -(defvar *embedding-backend* nil - "Optional external embedding function (text-str) → float vector. -When nil, the hashing-trick fallback is used. Register a backend via: - (setf *embedding-backend* (lambda (text) ...)) -For Ollama: POST /api/embeddings with model nomic-embed-text. -For OpenAI: POST /v1/embeddings with model text-embedding-3-small.") - -(defun embeddings-tokenize (text) - "Splits text into lowercase word tokens, stripping punctuation and -discarding tokens shorter than 2 characters." - (let ((clean (cl-ppcre:regex-replace-all "[^a-zA-Z0-9 ]" - (string-downcase (or text "")) " "))) - (remove-if (lambda (w) (< (length w) 2)) - (uiop:split-string clean :separator '(#\Space #\Tab #\Newline))))) - -(defun embeddings-hash-word (word dim) - "Hashes a word to a bucket index in [0, dim). Uses FNV-1a style hashing -for good distribution with minimal collisions." - (let ((hash 2166136261)) - (loop for c across word - do (setf hash (logxor hash (char-code c))) - (setf hash (mod (* hash 16777619) #x100000000))) - (mod hash dim))) - -(defun embeddings-compute (text &key (dimensions *embedding-dimensions*)) - "Computes a dense embedding vector for TEXT. -Tries the registered backend first, falls back to hashing-trick. -Returns a list of DIMENSIONS double-floats normalized to unit length." - ;; Try registered backend first - (when *embedding-backend* - (handler-case - (let ((result (funcall *embedding-backend* text))) - (when (and result (listp result) (> (length result) 0)) - (return-from embeddings-compute result))) - (error (c) - (log-message "EMBEDDING: Backend failed (~a), falling back to hashing" c)))) - ;; Hashing-trick fallback - (let* ((tokens (embeddings-tokenize text)) - (vec (make-array dimensions :initial-element 0.0d0 :element-type 'double-float))) - (dolist (token tokens) - (let* ((idx (embeddings-hash-word token dimensions)) - (sign (if (evenp (char-code (char token 0))) 1 -1))) - (incf (aref vec idx) (coerce sign 'double-float)))) - ;; Normalize to unit length - (let ((norm (sqrt (loop for i below dimensions sum (expt (aref vec i) 2))))) - (if (> norm 0.0d0) - (loop for i below dimensions collect (/ (aref vec i) norm)) - (loop for i below dimensions collect 0.0d0))))) - -(defun embed-object (obj) - "Generates and stores an embedding vector for a memory-object. -Combines title, content, and tags into a single text for embedding. -Stores the result in the memory-object's :vector slot." - (let* ((attrs (memory-object-attributes obj)) - (title (or (getf attrs :TITLE) "")) - (text (or (memory-object-content obj) "")) - (tags (format nil "~{~a~^ ~}" (or (getf attrs :TAGS) ""))) - (combined (format nil "~a ~a ~a" title text tags)) - (vec (embeddings-compute combined))) - (setf (memory-object-vector obj) vec) - (log-message "EMBEDDING: Vector for ~a (~d dims, ~d tokens)" - (memory-object-id obj) (length vec) - (length (embeddings-tokenize combined))) - vec)) - -(defun embed-all-pending () - "Generates embeddings for all memory objects that lack vectors. -Called by the heartbeat or on demand. Returns count of objects processed." - (let ((count 0)) - (maphash (lambda (id obj) - (declare (ignore id)) - (unless (memory-object-vector obj) - (handler-case - (progn (embed-object obj) (incf count)) - (error (c) - (log-message "EMBEDDING: Failed for ~a: ~a" (memory-object-id obj) c))))) - *memory-store*) - (when (> count 0) - (log-message "EMBEDDING: Batch processed ~d objects" count)) - count)) - -(defskill :passepartout-system-embeddings - :priority 50 - :trigger (lambda (ctx) (eq (getf (getf ctx :payload) :sensor) :heartbeat)) - :deterministic (lambda (action ctx) - (declare (ignore action ctx)) - (ignore-errors (embed-all-pending)) - nil)) diff --git a/lisp/system-model-router.lisp b/lisp/system-model-router.lisp index eb633e2..85ebc4e 100644 --- a/lisp/system-model-router.lisp +++ b/lisp/system-model-router.lisp @@ -1,3 +1,5 @@ +(in-package :passepartout) + (defvar *model-cascade-code* nil "Cascade for :code tasks: ((:ollama . \"model\") ...)") diff --git a/org/core-defpackage.org b/org/core-defpackage.org index 45b4f61..d6bf0d2 100644 --- a/org/core-defpackage.org +++ b/org/core-defpackage.org @@ -106,9 +106,16 @@ The package definition. All public symbols are exported here. #:topological-sort-skills #:validate-lisp-syntax #:defskill - #:*skill-registry* - #:*scope-resolver* - #:skill + #:*skill-registry* + #:*scope-resolver* + #:*embedding-backend* + #:*embedding-queue* + #:*embedding-provider* + #:embed-queue-object + #:embed-object + #:embed-all-pending + #:embeddings-compute + #:skill #:skill-name #:skill-priority #:skill-dependencies diff --git a/org/core-skills.org b/org/core-skills.org index 73b16db..c749c8a 100644 --- a/org/core-skills.org +++ b/org/core-skills.org @@ -187,7 +187,8 @@ Both ~.org~ and ~.lisp~ files are included. For each skill, the ~.org~ file supp (string= n "core-loop-act") (string= n "core-loop") (string= n "core-manifest") - (string= n "security-dispatcher")))) + (string= n "security-dispatcher") + (string= n "system-embedding-gateway")))) all-files)) (adj (make-hash-table :test 'equal)) (name-to-file (make-hash-table :test 'equal)) diff --git a/org/system-embedding-gateway.org b/org/system-embedding-gateway.org new file mode 100644 index 0000000..f0785d4 --- /dev/null +++ b/org/system-embedding-gateway.org @@ -0,0 +1,232 @@ +#+TITLE: SKILL: Embedding Gateway (org-skill-embedding-gateway.org) +#+AUTHOR: Agent +#+FILETAGS: :system:memory:embeddings: +#+PROPERTY: header-args:lisp :tangle ../lisp/system-embedding-gateway.lisp + +* Overview + +Provider-agnostic vector embedding gateway. Generates dense vectors for +~memory-object~ entries via a pluggable backend, enabling semantic similarity +search. Designed for thin harness: the core (core-context) knows how to USE +vectors but not how to GENERATE them — that lives here. + +Backends are configured via ~EMBEDDING_PROVIDER~: +- ~:hashing~ (default) — FNV-1a hashing trick, no external dependencies +- ~:ollama~ — POST /api/embeddings to a local Ollama instance + +Newly ingested objects are queued for embedding via ~embed-queue-object~. +The ~embed-all-pending~ function drains the queue on heartbeat ticks, with +a store-wide scan as fallback for objects ingested before the skill loaded. + +* Implementation + +** Package Context + +#+begin_src lisp +(in-package :passepartout) +#+end_src + +** Embedding Queue + +Pending object IDs to embed. Populated by ~embed-queue-object~. + +;; REPL-VERIFIED: 2026-05-03T14:00:00 +#+begin_src lisp +(defvar *embedding-queue* nil + "List of object IDs pending embedding.") +#+end_src + +** Provider Selection + +~*embedding-provider*~ tracks the active provider keyword. Set at load time +from the ~EMBEDDING_PROVIDER~ env var. + +;; REPL-VERIFIED: 2026-05-03T14:00:00 +#+begin_src lisp +(defvar *embedding-provider* :hashing + "Active embedding provider: :hashing, :ollama, :openai.") +#+end_src + +** Text Tokenizer + +Splits text into lowercase word tokens for the hashing trick. + +;; REPL-VERIFIED: 2026-05-03T14:00:00 +#+begin_src lisp +(defun embeddings-tokenize (text) + "Split TEXT into lowercase word tokens, strip punctuation, discard short." + (let ((clean (cl-ppcre:regex-replace-all "[^a-zA-Z0-9 ]" + (string-downcase (or text "")) " "))) + (remove-if (lambda (w) (< (length w) 2)) + (uiop:split-string clean :separator '(#\Space #\Tab #\Newline))))) +#+end_src + +** Hashing Trick + +FNV-1a hash to a fixed-dimension bucket. Produces dense ±1 vectors from +vocabulary co-occurrence patterns without any training or external services. + +;; REPL-VERIFIED: 2026-05-03T14:00:00 +#+begin_src lisp +(defun embeddings-hash-word (word dim) + "Hash WORD to an index in [0, DIM)." + (let ((hash 2166136261)) + (loop for c across word + do (setf hash (logxor hash (char-code c))) + (setf hash (mod (* hash 16777619) #x100000000))) + (mod hash dim))) +#+end_src + +** embeddings-hash-vector + +;; REPL-VERIFIED: 2026-05-03T14:00:00 +#+begin_src lisp +(defun embeddings-hash-vector (text &key (dimensions 384)) + "Compute a hashing-trick vector for TEXT." + (let* ((tokens (embeddings-tokenize text)) + (vec (make-array dimensions :initial-element 0.0d0 :element-type 'double-float))) + (dolist (token tokens) + (let* ((idx (embeddings-hash-word token dimensions)) + (sign (if (evenp (char-code (char token 0))) 1 -1))) + (incf (aref vec idx) (coerce sign 'double-float)))) + (let ((norm (sqrt (loop for i below dimensions sum (expt (aref vec i) 2))))) + (if (> norm 0.0d0) + (loop for i below dimensions collect (/ (aref vec i) norm)) + (loop for i below dimensions collect 0.0d0))))) +#+end_src + +** Embedding Compute + +Top-level embedding function. Tries the configured backend first, falls back +to the hashing trick. Registered as ~*embedding-backend*~ for provider +pluggability. + +;; REPL-VERIFIED: 2026-05-03T14:00:00 +#+begin_src lisp +(defun embeddings-compute (text &key (dimensions 384)) + "Compute embedding vector for TEXT. +Tries *embedding-backend* first, falls back to hashing trick." + (when *embedding-backend* + (handler-case + (let ((result (funcall *embedding-backend* text))) + (when (and result (listp result) (> (length result) 0)) + (return-from embeddings-compute result))) + (error (c) + (log-message "EMBEDDING: Backend failed (~a), fallback to hashing" c)))) + (embeddings-hash-vector text :dimensions dimensions)) +#+end_src + +** Ollama Backend + +Calls ~POST /api/embeddings~ on a local Ollama instance. Configurable via +~OLLAMA_URL~ (default http://localhost:11434) and ~EMBEDDING_MODEL~ (default +nomic-embed-text). + +;; REPL-VERIFIED: 2026-05-03T14:00:00 +#+begin_src lisp +(defun embedding-backend-ollama (text) + "Generate embeddings via Ollama /api/embeddings." + (let* ((url (or (uiop:getenv "OLLAMA_URL") "http://localhost:11434")) + (model (or (uiop:getenv "EMBEDDING_MODEL") "nomic-embed-text")) + (response (dex:post (format nil "~a/api/embeddings" url) + :content (json:encode-json-to-string + `((:model . ,model) (:prompt . ,text))) + :headers '(("Content-Type" . "application/json"))))) + (when response + (let* ((json (json:decode-json-from-string response)) + (embedding (cdr (assoc :embedding json)))) + (when embedding + (coerce embedding 'list)))))) +#+end_src + +** Queue Object + +~embed-queue-object~ adds an object to the pending queue if it lacks a +vector. Call from the perceive gate after ~ingest-ast~. + +;; REPL-VERIFIED: 2026-05-03T14:00:00 +#+begin_src lisp +(defun embed-queue-object (obj) + "Queue OBJ for embedding if it lacks a vector." + (when (and obj (not (memory-object-vector obj))) + (pushnew (memory-object-id obj) *embedding-queue* :test 'string=))) +#+end_src + +** Embed Single Object + +Generates and stores a vector for a single memory object. + +;; REPL-VERIFIED: 2026-05-03T14:00:00 +#+begin_src lisp +(defun embed-object (obj) + "Generate and store embedding vector for OBJ." + (let* ((attrs (memory-object-attributes obj)) + (title (or (getf attrs :TITLE) "")) + (text (or (memory-object-content obj) "")) + (raw-tags (getf attrs :TAGS)) + (tag-list (if (listp raw-tags) raw-tags nil)) + (tags (if tag-list (format nil "~{~a~^ ~}" tag-list) "")) + (combined (format nil "~a ~a ~a" title text tags)) + (vec (embeddings-compute combined))) + (setf (memory-object-vector obj) vec) + (log-message "EMBEDDING: Vector for ~a (~d dims)" (memory-object-id obj) (length vec)) + vec)) +#+end_src + +** Batch Process + +Drains the queue and (if queue was empty) scans the store for remaining +objects without vectors. Returns count of newly embedded objects. + +;; REPL-VERIFIED: 2026-05-03T14:00:00 +#+begin_src lisp +(defun embed-all-pending () + "Process all pending embeddings. Returns count." + (let ((count 0)) + ;; Drain queue + (let ((pending *embedding-queue*)) + (setf *embedding-queue* nil) + (dolist (id pending) + (let ((obj (gethash id *memory-store*))) + (when (and obj (not (memory-object-vector obj))) + (handler-case + (progn (embed-object obj) (incf count)) + (error (c) + (log-message "EMBEDDING: Failed ~a: ~a" id c))))))) + ;; Fallback: scan store for objects without vectors + (when (= count 0) + (maphash (lambda (id obj) + (declare (ignore id)) + (unless (memory-object-vector obj) + (handler-case + (progn (embed-object obj) (incf count)) + (error (c) + (log-message "EMBEDDING: Failed ~a: ~a" + (memory-object-id obj) c))))) + *memory-store*)) + (when (> count 0) + (log-message "EMBEDDING: Batch processed ~d objects" count)) + count)) +#+end_src + +** Initialization + +Reads ~EMBEDDING_PROVIDER~ env var and configures the backend. + +;; REPL-VERIFIED: 2026-05-03T14:00:00 +#+begin_src lisp +(defun embeddings-init (&key (provider *embedding-provider*)) + "Init embedding provider from EMBEDDING_PROVIDER env var." + (let* ((env (uiop:getenv "EMBEDDING_PROVIDER")) + (selected (or (and env (intern (string-upcase env) :keyword)) + provider))) + (setf *embedding-provider* selected) + (setf *embedding-backend* + (case selected + (:ollama #'embedding-backend-ollama) + (t nil))) + (log-message "EMBEDDING: Provider ~a, backend=~a" selected *embedding-backend*) + selected)) + +(embeddings-init) +#+end_src diff --git a/org/system-embeddings.org b/org/system-embeddings.org deleted file mode 100644 index 27727c7..0000000 --- a/org/system-embeddings.org +++ /dev/null @@ -1,162 +0,0 @@ -#+TITLE: SKILL: Embeddings (org-skill-embeddings.org) -#+AUTHOR: Agent -#+FILETAGS: :system:memory:embeddings: -#+PROPERTY: header-args:lisp :tangle ../lisp/system-embeddings.lisp - -* Overview - -Generates dense vector embeddings for =memory-object= entries, enabling -semantic similarity search via the foveal-peripheral context model. Uses -the hashing trick as a provider-agnostic default — no Ollama, no API keys, -no external dependencies. When an embedding provider backend becomes -available, it is preferred over the hashing fallback. - -The core pipeline (core-context) knows how to USE vectors (=cosine-similarity=, -foveal-peripheral rendering) but does not know how to GENERATE them. That -lives here, in a skill. Thin harness, fat skills. - -* Implementation - -** Embedding Configuration - -;; REPL-VERIFIED: 2026-05-03T13:00:00 -#+begin_src lisp -(defvar *embedding-dimensions* 384 - "Dimension of the embedding vector. Default 384 matches nomic-embed-text.") - -#+end_src -** *embedding-backend* -;; REPL-VERIFIED: 2026-05-03T13:00:00 -#+begin_src lisp -(defvar *embedding-backend* nil - "Optional external embedding function (text-str) → float vector. -When nil, the hashing-trick fallback is used. Register a backend via: - (setf *embedding-backend* (lambda (text) ...)) -For Ollama: POST /api/embeddings with model nomic-embed-text. -For OpenAI: POST /v1/embeddings with model text-embedding-3-small.") -#+end_src -#+end_src - -** Hashing-Trick Embedding Engine - -The hashing trick produces dense fixed-size vectors from arbitrary text -without any training or external services. Each word token is hashed to -a bucket and contributes ±1 to that dimension. The resulting vector is -normalized to unit length for cosine-similarity compatibility. - -This is NOT a neural embedding — it won't capture semantic nuance the -way a transformer model would. But it provides a reasonable similarity -signal: documents sharing vocabulary will have correlated vectors, and -the locality-sensitive hashing preserves co-occurrence patterns. - -;; REPL-VERIFIED: 2026-05-03T13:00:00 -#+begin_src lisp -(defun embeddings-tokenize (text) - "Splits text into lowercase word tokens, stripping punctuation and -discarding tokens shorter than 2 characters." - (let ((clean (cl-ppcre:regex-replace-all "[^a-zA-Z0-9 ]" - (string-downcase (or text "")) " "))) - (remove-if (lambda (w) (< (length w) 2)) - (uiop:split-string clean :separator '(#\Space #\Tab #\Newline))))) - -#+end_src -** embeddings-hash-word -;; REPL-VERIFIED: 2026-05-03T13:00:00 -#+begin_src lisp -(defun embeddings-hash-word (word dim) - "Hashes a word to a bucket index in [0, dim). Uses FNV-1a style hashing -for good distribution with minimal collisions." - (let ((hash 2166136261)) - (loop for c across word - do (setf hash (logxor hash (char-code c))) - (setf hash (mod (* hash 16777619) #x100000000))) - (mod hash dim))) - -#+end_src -** embeddings-compute -;; REPL-VERIFIED: 2026-05-03T13:00:00 -#+begin_src lisp -(defun embeddings-compute (text &key (dimensions *embedding-dimensions*)) - "Computes a dense embedding vector for TEXT. -Tries the registered backend first, falls back to hashing-trick. -Returns a list of DIMENSIONS double-floats normalized to unit length." - ;; Try registered backend first - (when *embedding-backend* - (handler-case - (let ((result (funcall *embedding-backend* text))) - (when (and result (listp result) (> (length result) 0)) - (return-from embeddings-compute result))) - (error (c) - (log-message "EMBEDDING: Backend failed (~a), falling back to hashing" c)))) - ;; Hashing-trick fallback - (let* ((tokens (embeddings-tokenize text)) - (vec (make-array dimensions :initial-element 0.0d0 :element-type 'double-float))) - (dolist (token tokens) - (let* ((idx (embeddings-hash-word token dimensions)) - (sign (if (evenp (char-code (char token 0))) 1 -1))) - (incf (aref vec idx) (coerce sign 'double-float)))) - ;; Normalize to unit length - (let ((norm (sqrt (loop for i below dimensions sum (expt (aref vec i) 2))))) - (if (> norm 0.0d0) - (loop for i below dimensions collect (/ (aref vec i) norm)) - (loop for i below dimensions collect 0.0d0))))) -#+end_src -#+end_src - -** Memory Object Embedding - -;; REPL-VERIFIED: 2026-05-03T13:00:00 -#+begin_src lisp -(defun embed-object (obj) - "Generates and stores an embedding vector for a memory-object. -Combines title, content, and tags into a single text for embedding. -Stores the result in the memory-object's :vector slot." - (let* ((attrs (memory-object-attributes obj)) - (title (or (getf attrs :TITLE) "")) - (text (or (memory-object-content obj) "")) - (tags (format nil "~{~a~^ ~}" (or (getf attrs :TAGS) ""))) - (combined (format nil "~a ~a ~a" title text tags)) - (vec (embeddings-compute combined))) - (setf (memory-object-vector obj) vec) - (log-message "EMBEDDING: Vector for ~a (~d dims, ~d tokens)" - (memory-object-id obj) (length vec) - (length (embeddings-tokenize combined))) - vec)) - -#+end_src -** embed-all-pending -;; REPL-VERIFIED: 2026-05-03T13:00:00 -#+begin_src lisp -(defun embed-all-pending () - "Generates embeddings for all memory objects that lack vectors. -Called by the heartbeat or on demand. Returns count of objects processed." - (let ((count 0)) - (maphash (lambda (id obj) - (declare (ignore id)) - (unless (memory-object-vector obj) - (handler-case - (progn (embed-object obj) (incf count)) - (error (c) - (log-message "EMBEDDING: Failed for ~a: ~a" (memory-object-id obj) c))))) - *memory-store*) - (when (> count 0) - (log-message "EMBEDDING: Batch processed ~d objects" count)) - count)) -#+end_src -#+end_src - -** Skill Registration - -Runs as a background skill triggered by heartbeat events, processing -pending embeddings in batches. Low priority (50) so it defers to -critical skills. - -#+begin_src lisp -(defskill :passepartout-system-embeddings - :priority 50 - :trigger (lambda (ctx) (eq (getf (getf ctx :payload) :sensor) :heartbeat)) - :deterministic (lambda (action ctx) - (declare (ignore action ctx)) - (ignore-errors (embed-all-pending)) - nil)) -#+end_src \ No newline at end of file diff --git a/org/system-model-router.org b/org/system-model-router.org index 26ae2c9..9687b70 100644 --- a/org/system-model-router.org +++ b/org/system-model-router.org @@ -25,6 +25,12 @@ at load time. The core iterates providers, calling the selector for each one. * Implementation +** Package Context + +#+begin_src lisp +(in-package :passepartout) +#+end_src + ** Configuration: Per-Slot Cascades Four env-configurable cascade variables, one per slot. Each cascade is a list