v0.4.0: semantic retrieval activation — wire foveal-vector + trigram Jaccard
Some checks failed
Deploy (Gitea) / deploy (push) Failing after 2s

1. Wire :foveal-vector into context-awareness-assemble: pass the foveal
   node's embedding vector to context-object-render. Previously always
   nil → similarity always 0.0 → no semantic boosting.

2. Replace default :hashing (SHA-256) with :trigram (character-trigram
   Jaccard). SHA-256 is a cryptographic hash with the avalanche property
   — one-bit input differences produce entirely different outputs. Useless
   for similarity. Trigram bloom filter (128-dim) captures lexical overlap
   in pure Lisp with zero external dependencies:
   - 'authentication' vs 'authenticate' → 0.80 similarity
   - 'authentication' vs 'banana' → 0.00 similarity

3. Rename old embedding-backend-hashing → embedding-backend-sha256
   (integrity-only, explicit opt-in). Add embedding-backend-trigram.

4. Add test-semantic-retrieval-trigram: related texts > 0.75, unrelated < 0.3.

Test: 97/0 across 13 suites (context 12/0, embedding 12/0)
This commit is contained in:
2026-05-06 19:04:17 -04:00
parent 33993d2d73
commit 11254b56ec
4 changed files with 100 additions and 22 deletions

View File

@@ -271,15 +271,17 @@ Privacy-filtered projects (those with tags matching the Dispatcher's privacy tag
"Produces a high-level skeletal outline of the current Memory for the LLM.
Privacy-filtered objects (matching the Dispatcher's privacy tags) are excluded."
(let* ((foveal-id (or (getf signal :foveal-focus)
(ignore-errors (getf (getf signal :payload) :target-id))))
(ignore-errors (getf (getf signal :payload) :target-id))))
(foveal-vector (when foveal-id
(memory-object-vector (memory-object-get foveal-id))))
(all-projects (context-active-projects))
(projects (remove-if #'context-privacy-filtered-p all-projects))
(output (format nil "GLOBAL MEMEX AWARENESS (Peripheral Vision):~%")))
(if projects
(dolist (project projects)
(setf output (concatenate 'string output
(context-object-render project :foveal-id foveal-id))))
(setf output (concatenate 'string output "No active projects found.~%")))
(context-object-render project :foveal-id foveal-id :foveal-vector foveal-vector))))
(setf output (concatenate 'string output "No active projects found.~%")))
output))
#+end_src
@@ -348,4 +350,19 @@ Verifies that the Foveal-Peripheral rendering correctly distinguishes between fo
(let ((output (context-awareness-assemble nil)))
(is (stringp output))
(is (not (search "CHILD CONTENT" output))))))
(test test-semantic-retrieval-trigram
"Contract v0.4.0: trigram backend produces non-zero similarity for related content."
(let ((v1 (passepartout::embedding-backend-trigram "implement user login form"))
(v2 (passepartout::embedding-backend-trigram "add password authentication")))
(let ((sim (passepartout::vector-cosine-similarity v1 v2)))
(is (> sim 0.0))))
(let ((v3 (passepartout::embedding-backend-trigram "authentication login form handler module"))
(v4 (passepartout::embedding-backend-trigram "authentication login form handler fix")))
(let ((sim (passepartout::vector-cosine-similarity v3 v4)))
(is (> sim 0.75))))
(let ((v5 (passepartout::embedding-backend-trigram "authentication"))
(v6 (passepartout::embedding-backend-trigram "banana")))
(let ((sim (passepartout::vector-cosine-similarity v5 v6)))
(is (< sim 0.3)))))
#+end_src

View File

@@ -23,8 +23,8 @@ This replaces the old ~system-embedding-gateway~ with the same logic but renamed
#+begin_src lisp
(in-package :passepartout)
(defvar *embedding-provider* :hashing
"Active embedding provider: :hashing, :local, :openai.")
(defvar *embedding-provider* :trigram
"Active embedding provider: :trigram, :sha256, :local, :openai.")
(defvar *embedding-queue* nil
"Queue of text objects awaiting embedding.")
@@ -77,13 +77,34 @@ This replaces the old ~system-embedding-gateway~ with the same logic but renamed
** Hashing fallback
#+begin_src lisp
(defun embedding-backend-hashing (text)
"Fallback: produces a deterministic vector from the text hash."
(defun embedding-backend-sha256 (text)
"SHA-256 based vector — integrity only, no semantic retrieval capability.
For environments where even trivial computation is undesirable."
(let* ((digest (ironclad:digest-sequence :sha256 (babel:string-to-octets text)))
(vec (make-array 8 :element-type 'single-float :initial-element 0.0)))
(dotimes (i (min (length digest) 8))
(setf (aref vec i) (float (/ (aref digest i) 255.0) 0.0)))
vec))
(defun embedding-backend-hashing (text)
"Backward-compatibility alias for SHA-256 hashing."
(embedding-backend-sha256 text))
(defun embedding-backend-trigram (text)
"Trigram bloom filter — captures lexical overlap for semantic retrieval.
Returns a 128-dim float vector where each position corresponds to a trigram hash.
Pure Lisp, zero external dependencies, works fully offline."
(let* ((s (string-trim '(#\Space #\Newline #\Tab) (string-downcase text)))
(trigrams (make-hash-table :test 'equal))
(result (make-array 128 :element-type 'single-float :initial-element 0.0)))
(when (>= (length s) 3)
(loop for i from 0 to (- (length s) 3)
for tri = (subseq s i (+ i 3))
do (setf (gethash tri trigrams) t)))
(maphash (lambda (tri _) (declare (ignore _))
(setf (aref result (mod (sxhash tri) 128)) 1.0))
trigrams)
result))
#+end_src
** Object embedding and queuing
@@ -97,11 +118,12 @@ This replaces the old ~system-embedding-gateway~ with the same logic but renamed
(defun embed-object (text)
"Embed a single text string using the active backend."
(let* ((selected (or *embedding-backend* *embedding-provider* :hashing))
(let* ((selected (or *embedding-backend* *embedding-provider* :trigram))
(backend (case selected
(:local #'embedding-backend-local)
(:openai #'embedding-backend-openai)
(t #'embedding-backend-hashing))))
(:local #'embedding-backend-local)
(:openai #'embedding-backend-openai)
(:sha256 #'embedding-backend-sha256)
(t #'embedding-backend-trigram))))
(if backend
(progn
(log-message "EMBEDDING: Provider ~a, backend=~a" selected backend)