v0.4.1: native embedding CFFI — full pipeline working, daemon-wired, HITL bug fixed
- Native backend returns 768-dim vectors via llama.cpp / C wrapper (/usr/local/lib/libllama_wrap.so) - Wired :native into embed-object dispatch and exported from passepartout package - Model preloads at daemon startup with EMBEDDING_PROVIDER=native (~30s) - Lazy loading via *embedding-backend* :native also works (first call ~45s) - C wrapper bridges CFFI pointer params to llama.cpp struct-by-value API - Correct struct layouts: llama_model_params(72B), llama_context_params(136B), llama_batch(56B) - BERT pooling: llama_get_embeddings_seq, llama_tokenize takes vocab* not model* - FiveAM tests pass: dimensions, self-similarity, semantic ranking - Fixed pre-existing HITL crash: boundp guard for *hitl-pending* in core-loop-act - Lazy load guard prevents double-load of native file in embedding-native-ensure-loaded - ROADMAP: v0.4.0 items marked DONE, v0.4.1 native embedding updated with actual implementation
This commit is contained in:
@@ -11,6 +11,7 @@
|
||||
- ~:sha256~ — integrity-only (explicit opt-in). SHA-256 hashing for environments where even trivial computation is undesirable.
|
||||
- ~:local~ — any OpenAI-compatible ~/api/embeddings~ endpoint (Ollama, vLLM, etc.)
|
||||
- ~:openai~ — the OpenAI ~/v1/embeddings~ API with an API key
|
||||
- ~:native~ — in-process inference via llama.cpp / CFFI. 768-dim nomic-embed-text-v1.5, zero network calls, <100ms per document on CPU. Requires model file at ~/.local/share/passepartout/models/nomic-embed-text-v1.5.Q4_K_M.gguf and libllama_wrap.so at /usr/local/lib.
|
||||
|
||||
The embedding queue (~embed-queue-object~ / ~embed-all-pending~) decouples document indexing from the main loop. On each heartbeat tick, ~embed-all-pending~ drains the queue and embeds all accumulated objects. This prevents indexing traffic from blocking conversational responses.
|
||||
|
||||
@@ -27,7 +28,7 @@ This replaces the old ~system-embedding-gateway~ with the same logic but renamed
|
||||
(in-package :passepartout)
|
||||
|
||||
(defvar *embedding-provider* :trigram
|
||||
"Active embedding provider: :trigram, :sha256, :local, :openai.")
|
||||
"Active embedding provider: :trigram, :sha256, :local, :openai, :native.")
|
||||
|
||||
(defvar *embedding-queue* nil
|
||||
"Queue of text objects awaiting embedding.")
|
||||
@@ -123,10 +124,14 @@ Pure Lisp, zero external dependencies, works fully offline."
|
||||
"Embed a single text string using the active backend."
|
||||
(let* ((selected (or *embedding-backend* *embedding-provider* :trigram))
|
||||
(backend (case selected
|
||||
(:local #'embedding-backend-local)
|
||||
(:openai #'embedding-backend-openai)
|
||||
(:sha256 #'embedding-backend-sha256)
|
||||
(t #'embedding-backend-trigram))))
|
||||
(:local #'embedding-backend-local)
|
||||
(:openai #'embedding-backend-openai)
|
||||
(:native
|
||||
(unless (fboundp 'embedding-backend-native)
|
||||
(embedding-native-ensure-loaded))
|
||||
#'embedding-backend-native)
|
||||
(:sha256 #'embedding-backend-sha256)
|
||||
(t #'embedding-backend-trigram))))
|
||||
(if backend
|
||||
(progn
|
||||
(log-message "EMBEDDING: Provider ~a, backend=~a" selected backend)
|
||||
@@ -164,6 +169,34 @@ Pure Lisp, zero external dependencies, works fully offline."
|
||||
(setf *embedding-provider* kw)
|
||||
(log-message "EMBEDDING: Set provider to ~a from EMBEDDING_PROVIDER env" kw))))
|
||||
|
||||
(defun embedding-native-ensure-loaded ()
|
||||
"Lazy-load the native CFFI backend. First call blocks ~30s for model init."
|
||||
(when (fboundp 'embedding-backend-native)
|
||||
(return-from embedding-native-ensure-loaded t))
|
||||
(let* ((data-dir (uiop:ensure-directory-pathname
|
||||
(or (uiop:getenv "PASSEPARTOUT_DATA_DIR")
|
||||
(namestring (merge-pathnames ".local/share/passepartout/"
|
||||
(user-homedir-pathname))))))
|
||||
(native-file (merge-pathnames "lisp/system-model-embedding-native.lisp" data-dir)))
|
||||
(handler-case
|
||||
(progn
|
||||
(load native-file :verbose nil :print nil)
|
||||
(log-message "EMBEDDING: Native backend loaded from ~a" native-file))
|
||||
(error (c)
|
||||
(error "Failed to load native embedding backend (~a): ~a" native-file c)))))
|
||||
|
||||
;; Preload native model if configured at startup
|
||||
(when (eq *embedding-provider* :native)
|
||||
(log-message "EMBEDDING: Native provider configured, preloading model...")
|
||||
(embedding-native-ensure-loaded)
|
||||
(handler-case
|
||||
(progn
|
||||
(embedding-native-load-model)
|
||||
(log-message "EMBEDDING: Native model preloaded (~d dims)"
|
||||
(embedding-native-get-dim)))
|
||||
(error (c)
|
||||
(log-message "EMBEDDING: Preload deferred: ~a (will retry on first call)" c))))
|
||||
|
||||
(log-message "EMBEDDING: Gateway loaded with provider ~a" *embedding-provider*)
|
||||
#+end_src
|
||||
|
||||
|
||||
Reference in New Issue
Block a user