#+TITLE: SKILL: Embedding Gateway (org-skill-embedding-gateway.org) #+AUTHOR: Agent #+FILETAGS: :skill:system:embedding: #+PROPERTY: header-args:lisp :tangle ../lisp/system-model-embedding.lisp * Architectural Intent ~system-model-embedding~ converts text into vector representations for semantic search and memory retrieval. It provides three backends: - ~:trigram~ — a zero-dependency fallback that uses character-trigram Jaccard similarity. Pure Lisp, works fully offline, captures lexical overlap. - ~:sha256~ — integrity-only (explicit opt-in). SHA-256 hashing for environments where even trivial computation is undesirable. - ~:local~ — any OpenAI-compatible ~/api/embeddings~ endpoint (Ollama, vLLM, etc.) - ~:openai~ — the OpenAI ~/v1/embeddings~ API with an API key - ~:native~ — in-process inference via llama.cpp / CFFI. 768-dim nomic-embed-text-v1.5, zero network calls, <100ms per document on CPU. Requires model file at ~/.local/share/passepartout/models/nomic-embed-text-v1.5.Q4_K_M.gguf and libllama_wrap.so at /usr/local/lib. The embedding queue (~embed-queue-object~ / ~embed-all-pending~) decouples document indexing from the main loop. On each heartbeat tick, ~embed-all-pending~ drains the queue and embeds all accumulated objects. This prevents indexing traffic from blocking conversational responses. The default provider is ~:trigram~ — it captures lexical overlap (character trigram bloom filter → cosine similarity approximates Jaccard) and works immediately with zero configuration. Switch to ~:local~ or ~:openai~ when you have an embedding server; switch to ~:sha256~ for integrity-only deployments. **Why not SHA-256 by default?** SHA-256 is a cryptographic hash with the avalanche property — one-bit input differences produce entirely different outputs. "implement user login form" and "implement user login forn" (one character difference) have completely different SHA-256 values → cosine similarity near zero. This makes SHA-256 correct for integrity verification (Merkle tree) but useless for similarity-based retrieval. The trigram Jaccard approach captures lexical overlap: "authentication" and "authenticate" share trigrams "aut", "uth", "the", "hen", "ent", "nti", "tic", "ica", producing high cosine similarity (0.80). "authentication" and "banana" share zero trigrams → 0.0 similarity. This replaces the old ~system-embedding-gateway~ with the same logic but renamed to ~system-model-embedding~ to live alongside the other ~system-model-*~ skills. * Implementation ** State #+begin_src lisp (in-package :passepartout) (defvar *embedding-provider* :trigram "Active embedding provider: :trigram, :sha256, :local, :openai, :native.") (defvar *embedding-queue* nil "Queue of text objects awaiting embedding.") (defvar *embedding-batch-size* 10 "Maximum texts per embedding API call.") #+end_src ** Local backend (OpenAI-compatible) #+begin_src lisp (defun embedding-backend-local (text) "Generate embeddings via a local OpenAI-compatible endpoint." (let* ((url (or (uiop:getenv "LOCAL_BASE_URL") (format nil "http://~a" (or (uiop:getenv "OLLAMA_HOST") "localhost:11434")))) (model (or (uiop:getenv "EMBEDDING_MODEL") "nomic-embed-text")) (body (cl-json:encode-json-to-string `((model . ,model) (input . ,text))))) (handler-case (let* ((response (dex:post (format nil "~a/api/embeddings" url) :headers '(("Content-Type" . "application/json")) :content body :connect-timeout 5 :read-timeout 30)) (json (cl-json:decode-json-from-string response)) (data (car (cdr (assoc :data json))))) (or (cdr (assoc :embedding data)) (list :error "No embedding in response"))) (error (c) (list :error (format nil "Embedding failed: ~a" c)))))) #+end_src ** OpenAI backend #+begin_src lisp (defun embedding-backend-openai (text) "Generate embeddings via OpenAI compatible /v1/embeddings endpoint." (let* ((api-key (uiop:getenv "OPENAI_API_KEY")) (base-url (or (uiop:getenv "EMBEDDING_BASE_URL") "https://api.openai.com/v1")) (model (or (uiop:getenv "EMBEDDING_MODEL") "text-embedding-3-small")) (body (cl-json:encode-json-to-string `((model . ,model) (input . ,text))))) (handler-case (let* ((response (dex:post (format nil "~a/embeddings" base-url) :headers `(("Content-Type" . "application/json") ("Authorization" . ,(format nil "Bearer ~a" api-key))) :content body :connect-timeout 5 :read-timeout 30)) (json (cl-json:decode-json-from-string response)) (data (car (cdr (assoc :data json))))) (or (cdr (assoc :embedding data)) (list :error "No embedding in response"))) (error (c) (list :error (format nil "OpenAI Embedding failed: ~a" c)))))) #+end_src ** Trigram backend (v0.4.0) #+begin_src lisp (defun embedding-backend-sha256 (text) "SHA-256 based vector — integrity only, no semantic retrieval capability. For environments where even trivial computation is undesirable." (let* ((digest (ironclad:digest-sequence :sha256 (babel:string-to-octets text))) (vec (make-array 8 :element-type 'single-float :initial-element 0.0))) (dotimes (i (min (length digest) 8)) (setf (aref vec i) (float (/ (aref digest i) 255.0) 0.0))) vec)) (defun embedding-backend-hashing (text) "Backward-compatibility alias for SHA-256 hashing." (embedding-backend-sha256 text)) (defun embedding-backend-trigram (text) "Trigram bloom filter — captures lexical overlap for semantic retrieval. Returns a 128-dim float vector where each position corresponds to a trigram hash. Pure Lisp, zero external dependencies, works fully offline." (let* ((s (string-trim '(#\Space #\Newline #\Tab) (string-downcase text))) (trigrams (make-hash-table :test 'equal)) (result (make-array 128 :element-type 'single-float :initial-element 0.0))) (when (>= (length s) 3) (loop for i from 0 to (- (length s) 3) for tri = (subseq s i (+ i 3)) do (setf (gethash tri trigrams) t))) (maphash (lambda (tri _) (declare (ignore _)) (setf (aref result (mod (sxhash tri) 128)) 1.0)) trigrams) result)) #+end_src ** Object embedding and queuing #+begin_src lisp (defvar *embedding-backend* nil "Explicit backend override (nil = use *embedding-provider*).") (defun embeddings-compute (text) "Compute an embedding vector for text using the active backend." (embed-object text)) (defun embed-object (text) "Embed a single text string using the active backend." (let* ((selected (or *embedding-backend* *embedding-provider* :trigram)) (backend (case selected (:local #'embedding-backend-local) (:openai #'embedding-backend-openai) (:native (unless (fboundp 'embedding-backend-native) (embedding-native-ensure-loaded)) #'embedding-backend-native) (:sha256 #'embedding-backend-sha256) (t #'embedding-backend-trigram)))) (if backend (progn (log-message "EMBEDDING: Provider ~a, backend=~a" selected backend) (funcall backend text)) (progn (log-message "EMBEDDING: No backend for provider ~a, using hashing" selected) (embedding-backend-hashing text))))) (defun embed-queue-object (object) "Queue a text object for async embedding." (push object *embedding-queue*) (log-message "EMBEDDING: Queued object")) (defun embed-all-pending () "Drain the embedding queue, store vectors in the store-keyed objects." (let ((batch (nreverse *embedding-queue*))) (setf *embedding-queue* nil) (dolist (item batch) (handler-case (let ((id (getf item :id)) (text (getf item :text))) (when (and id text) (let ((vec (embeddings-compute text)) (obj (gethash id *memory-store*))) (when (and obj vec (not (listp vec))) (setf (memory-object-vector obj) vec)) (log-message "EMBEDDING: Computed vector for ~a (~d dims)" id (length vec))))) (error (c) (log-message "EMBEDDING: Failed to embed object: ~a" c)))))) ;; Apply env var override at load time (let ((provider-env (uiop:getenv "EMBEDDING_PROVIDER"))) (when provider-env (let ((kw (intern (string-upcase provider-env) :keyword))) (setf *embedding-provider* kw) (log-message "EMBEDDING: Set provider to ~a from EMBEDDING_PROVIDER env" kw)))) (defun embedding-native-ensure-loaded () "Lazy-load the native CFFI backend. First call blocks ~30s for model init." (when (fboundp 'embedding-backend-native) (return-from embedding-native-ensure-loaded t)) (let* ((data-dir (uiop:ensure-directory-pathname (or (uiop:getenv "PASSEPARTOUT_DATA_DIR") (namestring (merge-pathnames ".local/share/passepartout/" (user-homedir-pathname)))))) (native-file (merge-pathnames "lisp/system-model-embedding-native.lisp" data-dir))) (handler-case (progn (load native-file :verbose nil :print nil) (log-message "EMBEDDING: Native backend loaded from ~a" native-file)) (error (c) (error "Failed to load native embedding backend (~a): ~a" native-file c))))) ;; Preload native model if configured at startup (when (eq *embedding-provider* :native) (log-message "EMBEDDING: Native provider configured, preloading model...") (embedding-native-ensure-loaded) (handler-case (progn (embedding-native-load-model) (log-message "EMBEDDING: Native model preloaded (~d dims)" (embedding-native-get-dim))) (error (c) (log-message "EMBEDDING: Preload deferred: ~a (will retry on first call)" c)))) (log-message "EMBEDDING: Gateway loaded with provider ~a" *embedding-provider*) #+end_src ** Stale vector marking #+begin_src lisp (defun mark-vector-stale (id &optional content) "Mark a memory object's vector as :pending and queue it for re-embedding. When content is not supplied, reads from the object in *memory-store*." (let* ((obj (gethash id *memory-store*)) (text (or content (and obj (memory-object-content obj))))) (when obj (setf (memory-object-vector obj) :pending)) (when text (push (list :id id :text text) *embedding-queue*) (log-message "EMBEDDING: Marked ~a vector stale, queued for re-embed" id)) (or obj text))) #+end_src ** Skill Registration and Cron #+begin_src lisp (defskill :passepartout-system-model-embedding :priority 70 :trigger (lambda (ctx) (declare (ignore ctx)) nil)) ;; Register periodic batch embedding via cron (when orchestrator available) (when (fboundp 'orchestrator-register-cron) (handler-case (orchestrator-register-cron :embed-batch "<2026-05-05 Tue +10m>" 'embed-all-pending :reflex) (error (c) (log-message "EMBEDDING: Cron registration failed: ~a" c)))) #+end_src * Contract 1. (embeddings-compute text): produces a vector (single-float array) for any text string using the active backend (~*embedding-backend*~ or ~*embedding-provider*~). 2. (embedding-backend-hashing text): zero-dependency fallback. Returns an 8-element single-float vector deterministically from SHA-256. 3. (embed-all-pending): drains ~*embedding-queue*~, computes vectors for all queued objects, and stores them in ~*memory-store*~ entries. 4. (mark-vector-stale id &optional content): sets ~:vector~ to ~:pending~ and pushes object to ~*embedding-queue*~ for background re-embedding. 5. Cron: ~embed-all-pending~ is registered with the orchestrator to run on ~:reflex~ tier every 10 minutes for background batch processing. * Test Suite #+begin_src lisp (eval-when (:compile-toplevel :load-toplevel :execute) (ql:quickload :fiveam :silent t)) (defpackage :passepartout-embedding-tests (:use :cl :passepartout) (:export #:embedding-suite)) (in-package :passepartout-embedding-tests) (fiveam:def-suite embedding-suite :description "Embedding gateway verification") (fiveam:in-suite embedding-suite) (fiveam:test test-embedding-backend-hashing "Contract 2: hashing backend produces 8-element float vector." (let ((vec (embedding-backend-hashing "hello world"))) (fiveam:is (arrayp vec)) (fiveam:is (= 8 (length vec))) (fiveam:is (every #'numberp (coerce vec 'list))))) (fiveam:test test-embedding-backend-hashing-deterministic "Contract 2: same input produces same vector." (let ((v1 (embedding-backend-hashing "test")) (v2 (embedding-backend-hashing "test"))) (fiveam:is (equalp v1 v2)))) (fiveam:test test-embeddings-compute "Contract 1: embeddings-compute returns a float vector." (let ((vec (embeddings-compute "some text"))) (fiveam:is (arrayp vec)) (fiveam:is (> (length vec) 0)))) (fiveam:test test-embed-queue-and-drain "Contract 3: embed-all-pending drains queue and stores vectors." (let ((*embedding-queue* nil)) (embed-queue-object '(:id "test-obj" :text "sample text")) (fiveam:is (= 1 (length *embedding-queue*))) (embed-all-pending) (fiveam:is (null *embedding-queue*)))) (fiveam:test test-mark-vector-stale "Contract 4: mark-vector-stale sets vector to :pending and queues for re-embed." (let ((*embedding-queue* nil)) ;; Create an object in memory with a vector (let ((obj (make-memory-object :id "stale-test" :content "stale content" :vector #(1.0 2.0 3.0)))) (setf (gethash "stale-test" *memory-store*) obj) (mark-vector-stale "stale-test") (fiveam:is (eq :pending (memory-object-vector obj))) (fiveam:is (= 1 (length *embedding-queue*))) (let ((item (first *embedding-queue*))) (fiveam:is (string= "stale-test" (getf item :id))) (fiveam:is (string= "stale content" (getf item :text)))) ;; Clean up (remhash "stale-test" *memory-store*)))) #+end_src