diff --git a/lisp/core-context.lisp b/lisp/core-context.lisp index caa16d5..93f4c09 100644 --- a/lisp/core-context.lisp +++ b/lisp/core-context.lisp @@ -140,15 +140,17 @@ or nil if the heading is not found." "Produces a high-level skeletal outline of the current Memory for the LLM. Privacy-filtered objects (matching the Dispatcher's privacy tags) are excluded." (let* ((foveal-id (or (getf signal :foveal-focus) - (ignore-errors (getf (getf signal :payload) :target-id)))) + (ignore-errors (getf (getf signal :payload) :target-id)))) + (foveal-vector (when foveal-id + (memory-object-vector (memory-object-get foveal-id)))) (all-projects (context-active-projects)) (projects (remove-if #'context-privacy-filtered-p all-projects)) (output (format nil "GLOBAL MEMEX AWARENESS (Peripheral Vision):~%"))) (if projects (dolist (project projects) (setf output (concatenate 'string output - (context-object-render project :foveal-id foveal-id)))) - (setf output (concatenate 'string output "No active projects found.~%"))) + (context-object-render project :foveal-id foveal-id :foveal-vector foveal-vector)))) + (setf output (concatenate 'string output "No active projects found.~%"))) output)) (defun context-assemble-global-awareness () @@ -205,3 +207,18 @@ Privacy-filtered objects (matching the Dispatcher's privacy tags) are excluded." (let ((output (context-awareness-assemble nil))) (is (stringp output)) (is (not (search "CHILD CONTENT" output)))))) + +(test test-semantic-retrieval-trigram + "Contract v0.4.0: trigram backend produces non-zero similarity for related content." + (let ((v1 (passepartout::embedding-backend-trigram "implement user login form")) + (v2 (passepartout::embedding-backend-trigram "add password authentication"))) + (let ((sim (passepartout::vector-cosine-similarity v1 v2))) + (is (> sim 0.0)))) + (let ((v3 (passepartout::embedding-backend-trigram "authentication login form handler module")) + (v4 (passepartout::embedding-backend-trigram "authentication login form handler fix"))) + (let ((sim (passepartout::vector-cosine-similarity v3 v4))) + (is (> sim 0.75)))) + (let ((v5 (passepartout::embedding-backend-trigram "authentication")) + (v6 (passepartout::embedding-backend-trigram "banana"))) + (let ((sim (passepartout::vector-cosine-similarity v5 v6))) + (is (< sim 0.3))))) diff --git a/lisp/system-model-embedding.lisp b/lisp/system-model-embedding.lisp index 9cef9ad..b6acca4 100644 --- a/lisp/system-model-embedding.lisp +++ b/lisp/system-model-embedding.lisp @@ -1,7 +1,7 @@ (in-package :passepartout) -(defvar *embedding-provider* :hashing - "Active embedding provider: :hashing, :local, :openai.") +(defvar *embedding-provider* :trigram + "Active embedding provider: :trigram, :sha256, :local, :openai.") (defvar *embedding-queue* nil "Queue of text objects awaiting embedding.") @@ -45,14 +45,35 @@ (error (c) (list :error (format nil "OpenAI Embedding failed: ~a" c)))))) -(defun embedding-backend-hashing (text) - "Fallback: produces a deterministic vector from the text hash." +(defun embedding-backend-sha256 (text) + "SHA-256 based vector — integrity only, no semantic retrieval capability. +For environments where even trivial computation is undesirable." (let* ((digest (ironclad:digest-sequence :sha256 (babel:string-to-octets text))) (vec (make-array 8 :element-type 'single-float :initial-element 0.0))) (dotimes (i (min (length digest) 8)) (setf (aref vec i) (float (/ (aref digest i) 255.0) 0.0))) vec)) +(defun embedding-backend-hashing (text) + "Backward-compatibility alias for SHA-256 hashing." + (embedding-backend-sha256 text)) + +(defun embedding-backend-trigram (text) + "Trigram bloom filter — captures lexical overlap for semantic retrieval. +Returns a 128-dim float vector where each position corresponds to a trigram hash. +Pure Lisp, zero external dependencies, works fully offline." + (let* ((s (string-trim '(#\Space #\Newline #\Tab) (string-downcase text))) + (trigrams (make-hash-table :test 'equal)) + (result (make-array 128 :element-type 'single-float :initial-element 0.0))) + (when (>= (length s) 3) + (loop for i from 0 to (- (length s) 3) + for tri = (subseq s i (+ i 3)) + do (setf (gethash tri trigrams) t))) + (maphash (lambda (tri _) (declare (ignore _)) + (setf (aref result (mod (sxhash tri) 128)) 1.0)) + trigrams) + result)) + (defvar *embedding-backend* nil "Explicit backend override (nil = use *embedding-provider*).") @@ -62,11 +83,12 @@ (defun embed-object (text) "Embed a single text string using the active backend." - (let* ((selected (or *embedding-backend* *embedding-provider* :hashing)) + (let* ((selected (or *embedding-backend* *embedding-provider* :trigram)) (backend (case selected - (:local #'embedding-backend-local) - (:openai #'embedding-backend-openai) - (t #'embedding-backend-hashing)))) + (:local #'embedding-backend-local) + (:openai #'embedding-backend-openai) + (:sha256 #'embedding-backend-sha256) + (t #'embedding-backend-trigram)))) (if backend (progn (log-message "EMBEDDING: Provider ~a, backend=~a" selected backend) diff --git a/org/core-context.org b/org/core-context.org index 7c3ccce..8306af6 100644 --- a/org/core-context.org +++ b/org/core-context.org @@ -271,15 +271,17 @@ Privacy-filtered projects (those with tags matching the Dispatcher's privacy tag "Produces a high-level skeletal outline of the current Memory for the LLM. Privacy-filtered objects (matching the Dispatcher's privacy tags) are excluded." (let* ((foveal-id (or (getf signal :foveal-focus) - (ignore-errors (getf (getf signal :payload) :target-id)))) + (ignore-errors (getf (getf signal :payload) :target-id)))) + (foveal-vector (when foveal-id + (memory-object-vector (memory-object-get foveal-id)))) (all-projects (context-active-projects)) (projects (remove-if #'context-privacy-filtered-p all-projects)) (output (format nil "GLOBAL MEMEX AWARENESS (Peripheral Vision):~%"))) (if projects (dolist (project projects) (setf output (concatenate 'string output - (context-object-render project :foveal-id foveal-id)))) - (setf output (concatenate 'string output "No active projects found.~%"))) + (context-object-render project :foveal-id foveal-id :foveal-vector foveal-vector)))) + (setf output (concatenate 'string output "No active projects found.~%"))) output)) #+end_src @@ -348,4 +350,19 @@ Verifies that the Foveal-Peripheral rendering correctly distinguishes between fo (let ((output (context-awareness-assemble nil))) (is (stringp output)) (is (not (search "CHILD CONTENT" output)))))) + +(test test-semantic-retrieval-trigram + "Contract v0.4.0: trigram backend produces non-zero similarity for related content." + (let ((v1 (passepartout::embedding-backend-trigram "implement user login form")) + (v2 (passepartout::embedding-backend-trigram "add password authentication"))) + (let ((sim (passepartout::vector-cosine-similarity v1 v2))) + (is (> sim 0.0)))) + (let ((v3 (passepartout::embedding-backend-trigram "authentication login form handler module")) + (v4 (passepartout::embedding-backend-trigram "authentication login form handler fix"))) + (let ((sim (passepartout::vector-cosine-similarity v3 v4))) + (is (> sim 0.75)))) + (let ((v5 (passepartout::embedding-backend-trigram "authentication")) + (v6 (passepartout::embedding-backend-trigram "banana"))) + (let ((sim (passepartout::vector-cosine-similarity v5 v6))) + (is (< sim 0.3))))) #+end_src diff --git a/org/system-model-embedding.org b/org/system-model-embedding.org index bec13fa..1c86f2c 100644 --- a/org/system-model-embedding.org +++ b/org/system-model-embedding.org @@ -23,8 +23,8 @@ This replaces the old ~system-embedding-gateway~ with the same logic but renamed #+begin_src lisp (in-package :passepartout) -(defvar *embedding-provider* :hashing - "Active embedding provider: :hashing, :local, :openai.") +(defvar *embedding-provider* :trigram + "Active embedding provider: :trigram, :sha256, :local, :openai.") (defvar *embedding-queue* nil "Queue of text objects awaiting embedding.") @@ -77,13 +77,34 @@ This replaces the old ~system-embedding-gateway~ with the same logic but renamed ** Hashing fallback #+begin_src lisp -(defun embedding-backend-hashing (text) - "Fallback: produces a deterministic vector from the text hash." +(defun embedding-backend-sha256 (text) + "SHA-256 based vector — integrity only, no semantic retrieval capability. +For environments where even trivial computation is undesirable." (let* ((digest (ironclad:digest-sequence :sha256 (babel:string-to-octets text))) (vec (make-array 8 :element-type 'single-float :initial-element 0.0))) (dotimes (i (min (length digest) 8)) (setf (aref vec i) (float (/ (aref digest i) 255.0) 0.0))) vec)) + +(defun embedding-backend-hashing (text) + "Backward-compatibility alias for SHA-256 hashing." + (embedding-backend-sha256 text)) + +(defun embedding-backend-trigram (text) + "Trigram bloom filter — captures lexical overlap for semantic retrieval. +Returns a 128-dim float vector where each position corresponds to a trigram hash. +Pure Lisp, zero external dependencies, works fully offline." + (let* ((s (string-trim '(#\Space #\Newline #\Tab) (string-downcase text))) + (trigrams (make-hash-table :test 'equal)) + (result (make-array 128 :element-type 'single-float :initial-element 0.0))) + (when (>= (length s) 3) + (loop for i from 0 to (- (length s) 3) + for tri = (subseq s i (+ i 3)) + do (setf (gethash tri trigrams) t))) + (maphash (lambda (tri _) (declare (ignore _)) + (setf (aref result (mod (sxhash tri) 128)) 1.0)) + trigrams) + result)) #+end_src ** Object embedding and queuing @@ -97,11 +118,12 @@ This replaces the old ~system-embedding-gateway~ with the same logic but renamed (defun embed-object (text) "Embed a single text string using the active backend." - (let* ((selected (or *embedding-backend* *embedding-provider* :hashing)) + (let* ((selected (or *embedding-backend* *embedding-provider* :trigram)) (backend (case selected - (:local #'embedding-backend-local) - (:openai #'embedding-backend-openai) - (t #'embedding-backend-hashing)))) + (:local #'embedding-backend-local) + (:openai #'embedding-backend-openai) + (:sha256 #'embedding-backend-sha256) + (t #'embedding-backend-trigram)))) (if backend (progn (log-message "EMBEDDING: Provider ~a, backend=~a" selected backend)