(in-package :passepartout) (defvar *embedding-queue* nil "List of object IDs pending embedding.") (defvar *embedding-provider* :hashing "Active embedding provider: :hashing, :ollama, :openai.") (defun embeddings-tokenize (text) "Split TEXT into lowercase word tokens, strip punctuation, discard short." (let ((clean (cl-ppcre:regex-replace-all "[^a-zA-Z0-9 ]" (string-downcase (or text "")) " "))) (remove-if (lambda (w) (< (length w) 2)) (uiop:split-string clean :separator '(#\Space #\Tab #\Newline))))) (defun embeddings-hash-word (word dim) "Hash WORD to an index in [0, DIM)." (let ((hash 2166136261)) (loop for c across word do (setf hash (logxor hash (char-code c))) (setf hash (mod (* hash 16777619) #x100000000))) (mod hash dim))) (defun embeddings-hash-vector (text &key (dimensions 384)) "Compute a hashing-trick vector for TEXT." (let* ((tokens (embeddings-tokenize text)) (vec (make-array dimensions :initial-element 0.0d0 :element-type 'double-float))) (dolist (token tokens) (let* ((idx (embeddings-hash-word token dimensions)) (sign (if (evenp (char-code (char token 0))) 1 -1))) (incf (aref vec idx) (coerce sign 'double-float)))) (let ((norm (sqrt (loop for i below dimensions sum (expt (aref vec i) 2))))) (if (> norm 0.0d0) (loop for i below dimensions collect (/ (aref vec i) norm)) (loop for i below dimensions collect 0.0d0))))) (defun embeddings-compute (text &key (dimensions 384)) "Compute embedding vector for TEXT. Tries *embedding-backend* first, falls back to hashing trick." (when *embedding-backend* (handler-case (let ((result (funcall *embedding-backend* text))) (when (and result (listp result) (> (length result) 0)) (return-from embeddings-compute result))) (error (c) (log-message "EMBEDDING: Backend failed (~a), fallback to hashing" c)))) (embeddings-hash-vector text :dimensions dimensions)) (defun embedding-backend-ollama (text) "Generate embeddings via Ollama /api/embeddings." (let* ((url (or (uiop:getenv "OLLAMA_URL") "http://localhost:11434")) (model (or (uiop:getenv "EMBEDDING_MODEL") "nomic-embed-text")) (response (dex:post (format nil "~a/api/embeddings" url) :content (json:encode-json-to-string `((:model . ,model) (:prompt . ,text))) :headers '(("Content-Type" . "application/json"))))) (when response (let* ((json (json:decode-json-from-string response)) (embedding (cdr (assoc :embedding json)))) (when embedding (coerce embedding 'list)))))) (defun embed-queue-object (obj) "Queue OBJ for embedding if it lacks a vector." (when (and obj (not (memory-object-vector obj))) (pushnew (memory-object-id obj) *embedding-queue* :test 'string=))) (defun embed-object (obj) "Generate and store embedding vector for OBJ." (let* ((attrs (memory-object-attributes obj)) (title (or (getf attrs :TITLE) "")) (text (or (memory-object-content obj) "")) (raw-tags (getf attrs :TAGS)) (tag-list (if (listp raw-tags) raw-tags nil)) (tags (if tag-list (format nil "~{~a~^ ~}" tag-list) "")) (combined (format nil "~a ~a ~a" title text tags)) (vec (embeddings-compute combined))) (setf (memory-object-vector obj) vec) (log-message "EMBEDDING: Vector for ~a (~d dims)" (memory-object-id obj) (length vec)) vec)) (defun embed-all-pending () "Process all pending embeddings. Returns count." (let ((count 0)) ;; Drain queue (let ((pending *embedding-queue*)) (setf *embedding-queue* nil) (dolist (id pending) (let ((obj (gethash id *memory-store*))) (when (and obj (not (memory-object-vector obj))) (handler-case (progn (embed-object obj) (incf count)) (error (c) (log-message "EMBEDDING: Failed ~a: ~a" id c))))))) ;; Fallback: scan store for objects without vectors (when (= count 0) (maphash (lambda (id obj) (declare (ignore id)) (unless (memory-object-vector obj) (handler-case (progn (embed-object obj) (incf count)) (error (c) (log-message "EMBEDDING: Failed ~a: ~a" (memory-object-id obj) c))))) *memory-store*)) (when (> count 0) (log-message "EMBEDDING: Batch processed ~d objects" count)) count)) (defun embeddings-init (&key (provider *embedding-provider*)) "Init embedding provider from EMBEDDING_PROVIDER env var." (let* ((env (uiop:getenv "EMBEDDING_PROVIDER")) (selected (or (and env (intern (string-upcase env) :keyword)) provider))) (setf *embedding-provider* selected) (setf *embedding-backend* (case selected (:ollama #'embedding-backend-ollama) (t nil))) (log-message "EMBEDDING: Provider ~a, backend=~a" selected *embedding-backend*) selected)) (embeddings-init)