v0.4.0: semantic retrieval activation — wire foveal-vector + trigram Jaccard
Some checks failed
Deploy (Gitea) / deploy (push) Failing after 2s
Some checks failed
Deploy (Gitea) / deploy (push) Failing after 2s
1. Wire :foveal-vector into context-awareness-assemble: pass the foveal node's embedding vector to context-object-render. Previously always nil → similarity always 0.0 → no semantic boosting. 2. Replace default :hashing (SHA-256) with :trigram (character-trigram Jaccard). SHA-256 is a cryptographic hash with the avalanche property — one-bit input differences produce entirely different outputs. Useless for similarity. Trigram bloom filter (128-dim) captures lexical overlap in pure Lisp with zero external dependencies: - 'authentication' vs 'authenticate' → 0.80 similarity - 'authentication' vs 'banana' → 0.00 similarity 3. Rename old embedding-backend-hashing → embedding-backend-sha256 (integrity-only, explicit opt-in). Add embedding-backend-trigram. 4. Add test-semantic-retrieval-trigram: related texts > 0.75, unrelated < 0.3. Test: 97/0 across 13 suites (context 12/0, embedding 12/0)
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
(in-package :passepartout)
|
||||
|
||||
(defvar *embedding-provider* :hashing
|
||||
"Active embedding provider: :hashing, :local, :openai.")
|
||||
(defvar *embedding-provider* :trigram
|
||||
"Active embedding provider: :trigram, :sha256, :local, :openai.")
|
||||
|
||||
(defvar *embedding-queue* nil
|
||||
"Queue of text objects awaiting embedding.")
|
||||
@@ -45,14 +45,35 @@
|
||||
(error (c)
|
||||
(list :error (format nil "OpenAI Embedding failed: ~a" c))))))
|
||||
|
||||
(defun embedding-backend-hashing (text)
|
||||
"Fallback: produces a deterministic vector from the text hash."
|
||||
(defun embedding-backend-sha256 (text)
|
||||
"SHA-256 based vector — integrity only, no semantic retrieval capability.
|
||||
For environments where even trivial computation is undesirable."
|
||||
(let* ((digest (ironclad:digest-sequence :sha256 (babel:string-to-octets text)))
|
||||
(vec (make-array 8 :element-type 'single-float :initial-element 0.0)))
|
||||
(dotimes (i (min (length digest) 8))
|
||||
(setf (aref vec i) (float (/ (aref digest i) 255.0) 0.0)))
|
||||
vec))
|
||||
|
||||
(defun embedding-backend-hashing (text)
|
||||
"Backward-compatibility alias for SHA-256 hashing."
|
||||
(embedding-backend-sha256 text))
|
||||
|
||||
(defun embedding-backend-trigram (text)
|
||||
"Trigram bloom filter — captures lexical overlap for semantic retrieval.
|
||||
Returns a 128-dim float vector where each position corresponds to a trigram hash.
|
||||
Pure Lisp, zero external dependencies, works fully offline."
|
||||
(let* ((s (string-trim '(#\Space #\Newline #\Tab) (string-downcase text)))
|
||||
(trigrams (make-hash-table :test 'equal))
|
||||
(result (make-array 128 :element-type 'single-float :initial-element 0.0)))
|
||||
(when (>= (length s) 3)
|
||||
(loop for i from 0 to (- (length s) 3)
|
||||
for tri = (subseq s i (+ i 3))
|
||||
do (setf (gethash tri trigrams) t)))
|
||||
(maphash (lambda (tri _) (declare (ignore _))
|
||||
(setf (aref result (mod (sxhash tri) 128)) 1.0))
|
||||
trigrams)
|
||||
result))
|
||||
|
||||
(defvar *embedding-backend* nil
|
||||
"Explicit backend override (nil = use *embedding-provider*).")
|
||||
|
||||
@@ -62,11 +83,12 @@
|
||||
|
||||
(defun embed-object (text)
|
||||
"Embed a single text string using the active backend."
|
||||
(let* ((selected (or *embedding-backend* *embedding-provider* :hashing))
|
||||
(let* ((selected (or *embedding-backend* *embedding-provider* :trigram))
|
||||
(backend (case selected
|
||||
(:local #'embedding-backend-local)
|
||||
(:openai #'embedding-backend-openai)
|
||||
(t #'embedding-backend-hashing))))
|
||||
(:local #'embedding-backend-local)
|
||||
(:openai #'embedding-backend-openai)
|
||||
(:sha256 #'embedding-backend-sha256)
|
||||
(t #'embedding-backend-trigram))))
|
||||
(if backend
|
||||
(progn
|
||||
(log-message "EMBEDDING: Provider ~a, backend=~a" selected backend)
|
||||
|
||||
Reference in New Issue
Block a user