v0.4.0: semantic retrieval activation — wire foveal-vector + trigram Jaccard
Some checks failed
Deploy (Gitea) / deploy (push) Failing after 2s
Some checks failed
Deploy (Gitea) / deploy (push) Failing after 2s
1. Wire :foveal-vector into context-awareness-assemble: pass the foveal node's embedding vector to context-object-render. Previously always nil → similarity always 0.0 → no semantic boosting. 2. Replace default :hashing (SHA-256) with :trigram (character-trigram Jaccard). SHA-256 is a cryptographic hash with the avalanche property — one-bit input differences produce entirely different outputs. Useless for similarity. Trigram bloom filter (128-dim) captures lexical overlap in pure Lisp with zero external dependencies: - 'authentication' vs 'authenticate' → 0.80 similarity - 'authentication' vs 'banana' → 0.00 similarity 3. Rename old embedding-backend-hashing → embedding-backend-sha256 (integrity-only, explicit opt-in). Add embedding-backend-trigram. 4. Add test-semantic-retrieval-trigram: related texts > 0.75, unrelated < 0.3. Test: 97/0 across 13 suites (context 12/0, embedding 12/0)
This commit is contained in:
@@ -271,15 +271,17 @@ Privacy-filtered projects (those with tags matching the Dispatcher's privacy tag
|
||||
"Produces a high-level skeletal outline of the current Memory for the LLM.
|
||||
Privacy-filtered objects (matching the Dispatcher's privacy tags) are excluded."
|
||||
(let* ((foveal-id (or (getf signal :foveal-focus)
|
||||
(ignore-errors (getf (getf signal :payload) :target-id))))
|
||||
(ignore-errors (getf (getf signal :payload) :target-id))))
|
||||
(foveal-vector (when foveal-id
|
||||
(memory-object-vector (memory-object-get foveal-id))))
|
||||
(all-projects (context-active-projects))
|
||||
(projects (remove-if #'context-privacy-filtered-p all-projects))
|
||||
(output (format nil "GLOBAL MEMEX AWARENESS (Peripheral Vision):~%")))
|
||||
(if projects
|
||||
(dolist (project projects)
|
||||
(setf output (concatenate 'string output
|
||||
(context-object-render project :foveal-id foveal-id))))
|
||||
(setf output (concatenate 'string output "No active projects found.~%")))
|
||||
(context-object-render project :foveal-id foveal-id :foveal-vector foveal-vector))))
|
||||
(setf output (concatenate 'string output "No active projects found.~%")))
|
||||
output))
|
||||
#+end_src
|
||||
|
||||
@@ -348,4 +350,19 @@ Verifies that the Foveal-Peripheral rendering correctly distinguishes between fo
|
||||
(let ((output (context-awareness-assemble nil)))
|
||||
(is (stringp output))
|
||||
(is (not (search "CHILD CONTENT" output))))))
|
||||
|
||||
(test test-semantic-retrieval-trigram
|
||||
"Contract v0.4.0: trigram backend produces non-zero similarity for related content."
|
||||
(let ((v1 (passepartout::embedding-backend-trigram "implement user login form"))
|
||||
(v2 (passepartout::embedding-backend-trigram "add password authentication")))
|
||||
(let ((sim (passepartout::vector-cosine-similarity v1 v2)))
|
||||
(is (> sim 0.0))))
|
||||
(let ((v3 (passepartout::embedding-backend-trigram "authentication login form handler module"))
|
||||
(v4 (passepartout::embedding-backend-trigram "authentication login form handler fix")))
|
||||
(let ((sim (passepartout::vector-cosine-similarity v3 v4)))
|
||||
(is (> sim 0.75))))
|
||||
(let ((v5 (passepartout::embedding-backend-trigram "authentication"))
|
||||
(v6 (passepartout::embedding-backend-trigram "banana")))
|
||||
(let ((sim (passepartout::vector-cosine-similarity v5 v6)))
|
||||
(is (< sim 0.3)))))
|
||||
#+end_src
|
||||
|
||||
@@ -23,8 +23,8 @@ This replaces the old ~system-embedding-gateway~ with the same logic but renamed
|
||||
#+begin_src lisp
|
||||
(in-package :passepartout)
|
||||
|
||||
(defvar *embedding-provider* :hashing
|
||||
"Active embedding provider: :hashing, :local, :openai.")
|
||||
(defvar *embedding-provider* :trigram
|
||||
"Active embedding provider: :trigram, :sha256, :local, :openai.")
|
||||
|
||||
(defvar *embedding-queue* nil
|
||||
"Queue of text objects awaiting embedding.")
|
||||
@@ -77,13 +77,34 @@ This replaces the old ~system-embedding-gateway~ with the same logic but renamed
|
||||
|
||||
** Hashing fallback
|
||||
#+begin_src lisp
|
||||
(defun embedding-backend-hashing (text)
|
||||
"Fallback: produces a deterministic vector from the text hash."
|
||||
(defun embedding-backend-sha256 (text)
|
||||
"SHA-256 based vector — integrity only, no semantic retrieval capability.
|
||||
For environments where even trivial computation is undesirable."
|
||||
(let* ((digest (ironclad:digest-sequence :sha256 (babel:string-to-octets text)))
|
||||
(vec (make-array 8 :element-type 'single-float :initial-element 0.0)))
|
||||
(dotimes (i (min (length digest) 8))
|
||||
(setf (aref vec i) (float (/ (aref digest i) 255.0) 0.0)))
|
||||
vec))
|
||||
|
||||
(defun embedding-backend-hashing (text)
|
||||
"Backward-compatibility alias for SHA-256 hashing."
|
||||
(embedding-backend-sha256 text))
|
||||
|
||||
(defun embedding-backend-trigram (text)
|
||||
"Trigram bloom filter — captures lexical overlap for semantic retrieval.
|
||||
Returns a 128-dim float vector where each position corresponds to a trigram hash.
|
||||
Pure Lisp, zero external dependencies, works fully offline."
|
||||
(let* ((s (string-trim '(#\Space #\Newline #\Tab) (string-downcase text)))
|
||||
(trigrams (make-hash-table :test 'equal))
|
||||
(result (make-array 128 :element-type 'single-float :initial-element 0.0)))
|
||||
(when (>= (length s) 3)
|
||||
(loop for i from 0 to (- (length s) 3)
|
||||
for tri = (subseq s i (+ i 3))
|
||||
do (setf (gethash tri trigrams) t)))
|
||||
(maphash (lambda (tri _) (declare (ignore _))
|
||||
(setf (aref result (mod (sxhash tri) 128)) 1.0))
|
||||
trigrams)
|
||||
result))
|
||||
#+end_src
|
||||
|
||||
** Object embedding and queuing
|
||||
@@ -97,11 +118,12 @@ This replaces the old ~system-embedding-gateway~ with the same logic but renamed
|
||||
|
||||
(defun embed-object (text)
|
||||
"Embed a single text string using the active backend."
|
||||
(let* ((selected (or *embedding-backend* *embedding-provider* :hashing))
|
||||
(let* ((selected (or *embedding-backend* *embedding-provider* :trigram))
|
||||
(backend (case selected
|
||||
(:local #'embedding-backend-local)
|
||||
(:openai #'embedding-backend-openai)
|
||||
(t #'embedding-backend-hashing))))
|
||||
(:local #'embedding-backend-local)
|
||||
(:openai #'embedding-backend-openai)
|
||||
(:sha256 #'embedding-backend-sha256)
|
||||
(t #'embedding-backend-trigram))))
|
||||
(if backend
|
||||
(progn
|
||||
(log-message "EMBEDDING: Provider ~a, backend=~a" selected backend)
|
||||
|
||||
Reference in New Issue
Block a user