#+TITLE: SKILL: Embedding Gateway (org-skill-embedding-gateway.org) #+AUTHOR: Agent #+FILETAGS: :skill:system:embedding: #+PROPERTY: header-args:lisp :tangle ../lisp/system-model-embedding.lisp * Architectural Intent ~system-model-embedding~ converts text into vector representations for semantic search and memory retrieval. It provides three backends: - ~:local~ — any OpenAI-compatible ~/api/embeddings~ endpoint (Ollama, vLLM, etc.) - ~:openai~ — the OpenAI ~/v1/embeddings~ API with an API key - ~:hashing~ — a zero-dependency fallback that produces deterministic vectors from SHA-256 hashes. No server, no config, works offline. The embedding queue (~embed-queue-object~ / ~embed-all-pending~) decouples document indexing from the main loop. On each heartbeat tick, ~embed-all-pending~ drains the queue and embeds all accumulated objects. This prevents indexing traffic from blocking conversational responses. The default provider is ~:hashing~ — useful for bootstrapping with zero configuration and for deployments where embedding quality isn't critical. Switch to ~:local~ or ~:openai~ when you have an embedding server available. This replaces the old ~system-embedding-gateway~ with the same logic but renamed to ~system-model-embedding~ to live alongside the other ~system-model-*~ skills. * Implementation ** State #+begin_src lisp (in-package :passepartout) (defvar *embedding-provider* :hashing "Active embedding provider: :hashing, :local, :openai.") (defvar *embedding-queue* nil "Queue of text objects awaiting embedding.") (defvar *embedding-batch-size* 10 "Maximum texts per embedding API call.") #+end_src ** Local backend (OpenAI-compatible) #+begin_src lisp (defun embedding-backend-local (text) "Generate embeddings via a local OpenAI-compatible endpoint." (let* ((url (or (uiop:getenv "LOCAL_BASE_URL") (format nil "http://~a" (or (uiop:getenv "OLLAMA_HOST") "localhost:11434")))) (model (or (uiop:getenv "EMBEDDING_MODEL") "nomic-embed-text")) (body (cl-json:encode-json-to-string `((model . ,model) (input . ,text))))) (handler-case (let* ((response (dex:post (format nil "~a/api/embeddings" url) :headers '(("Content-Type" . "application/json")) :content body :connect-timeout 5 :read-timeout 30)) (json (cl-json:decode-json-from-string response)) (data (car (cdr (assoc :data json))))) (or (cdr (assoc :embedding data)) (list :error "No embedding in response"))) (error (c) (list :error (format nil "Embedding failed: ~a" c)))))) #+end_src ** OpenAI backend #+begin_src lisp (defun embedding-backend-openai (text) "Generate embeddings via OpenAI compatible /v1/embeddings endpoint." (let* ((api-key (uiop:getenv "OPENAI_API_KEY")) (base-url (or (uiop:getenv "EMBEDDING_BASE_URL") "https://api.openai.com/v1")) (model (or (uiop:getenv "EMBEDDING_MODEL") "text-embedding-3-small")) (body (cl-json:encode-json-to-string `((model . ,model) (input . ,text))))) (handler-case (let* ((response (dex:post (format nil "~a/embeddings" base-url) :headers `(("Content-Type" . "application/json") ("Authorization" . ,(format nil "Bearer ~a" api-key))) :content body :connect-timeout 5 :read-timeout 30)) (json (cl-json:decode-json-from-string response)) (data (car (cdr (assoc :data json))))) (or (cdr (assoc :embedding data)) (list :error "No embedding in response"))) (error (c) (list :error (format nil "OpenAI Embedding failed: ~a" c)))))) #+end_src ** Hashing fallback #+begin_src lisp (defun embedding-backend-hashing (text) "Fallback: produces a deterministic vector from the text hash." (let* ((digest (ironclad:digest-sequence :sha256 (babel:string-to-octets text))) (vec (make-array 8 :element-type 'single-float :initial-element 0.0))) (dotimes (i (min (length digest) 8)) (setf (aref vec i) (float (/ (aref digest i) 255.0) 0.0))) vec)) #+end_src ** Object embedding and queuing #+begin_src lisp (defun embed-object (text) "Embed a single text string using the active backend." (let* ((selected (or *embedding-provider* :hashing)) (backend (case selected (:local #'embedding-backend-local) (:openai #'embedding-backend-openai) (t #'embedding-backend-hashing)))) (if backend (progn (log-message "EMBEDDING: Provider ~a, backend=~a" selected backend) (funcall backend text)) (progn (log-message "EMBEDDING: No backend for provider ~a, using hashing" selected) (embedding-backend-hashing text))))) (defun embed-queue-object (object) "Queue a text object for async embedding." (push object *embedding-queue*) (log-message "EMBEDDING: Queued object")) (defun embed-all-pending () "Drain the embedding queue, batch-process all queued objects." (let ((batch (nreverse *embedding-queue*))) (setf *embedding-queue* nil) (dolist (item batch) (handler-case (let ((text (if (stringp item) item (format nil "~a" item)))) (embed-object text)) (error (c) (log-message "EMBEDDING: Failed to embed object: ~a" c)))))) (log-message "EMBEDDING: Gateway loaded with provider ~a" *embedding-provider*) #+end_src