119 lines
4.3 KiB
Org Mode
119 lines
4.3 KiB
Org Mode
:PROPERTIES:
|
|
:ID: org-skill-embedding
|
|
:CREATED: [2026-04-12 Sun 14:00]
|
|
:END:
|
|
#+TITLE: SKILL: Vector Embedding (Universal Literate Note)
|
|
#+STARTUP: content
|
|
#+FILETAGS: :embedding:vector-search:semantic:psf:
|
|
|
|
* Overview
|
|
The *Vector Embedding* skill provides semantic search and vectorization capabilities to the org-agent. It decouples the specific embedding algorithms and provider-specific API calls from the core kernel.
|
|
|
|
* Phase A: Demand (PRD)
|
|
:PROPERTIES:
|
|
:STATUS: SIGNED
|
|
:END:
|
|
|
|
** 1. Purpose
|
|
Provide a standardized interface for converting text into vector representations and performing similarity searches.
|
|
|
|
** 2. User Needs
|
|
- *Text Vectorization:* Convert Org-mode content into high-dimensional vectors.
|
|
- *Similarity Search:* Find semantically related nodes in the Object Store.
|
|
- *Provider Agnosticism:* Support multiple embedding models (Gemini, OpenAI, etc.).
|
|
|
|
** 3. Success Criteria
|
|
- [ ] Successfully retrieve embeddings from a configured provider.
|
|
- [ ] Perform cosine similarity calculations between vectors.
|
|
- [ ] Register as a hot-reloadable skill.
|
|
|
|
* Phase B: Blueprint (PROTOCOL)
|
|
:PROPERTIES:
|
|
:STATUS: SIGNED
|
|
:END:
|
|
|
|
** 1. Architectural Intent
|
|
Move heavy neural and mathematical logic out of `core.lisp` and `neuro.lisp` into a dedicated skill.
|
|
|
|
** 2. Semantic Interfaces
|
|
|
|
#+begin_src lisp
|
|
(defun get-embedding (text)
|
|
"Retrieves a vector representation of text via the configured neural provider.")
|
|
|
|
(defun cosine-similarity (v1 v2)
|
|
"Calculates the semantic distance between two vectors.")
|
|
|
|
(defun find-most-similar (query-vector top-k)
|
|
"Identifies the top-k most semantically related objects in the store.")
|
|
#+end_src
|
|
|
|
* Phase D: Build (Implementation)
|
|
|
|
** Vector Operations
|
|
#+begin_src lisp :tangle ../src/embedding-logic.lisp
|
|
(in-package :org-agent)
|
|
|
|
(defun get-embedding (text)
|
|
"Retrieves a vector representation of text via the configured neural provider."
|
|
(let* ((auth (get-provider-auth :gemini))
|
|
(api-key (getf auth :api-key))
|
|
(endpoint "https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:embedContent"))
|
|
(unless api-key
|
|
(kernel-log "EMBEDDING ERROR: No API key for :gemini")
|
|
(return-from get-embedding nil))
|
|
(let* ((url (format nil "~a?key=~a" endpoint api-key))
|
|
(headers `(("Content-Type" . "application/json")))
|
|
(body (cl-json:encode-json-to-string
|
|
`((model . "models/text-embedding-004")
|
|
(content . ((parts . ((text . ,text)))))))))
|
|
(handler-case
|
|
(let* ((response (dex:post url :headers headers :content body))
|
|
(json (cl-json:decode-json-from-string response))
|
|
(embedding (getf (getf json :embedding) :values)))
|
|
embedding)
|
|
(error (c)
|
|
(kernel-log "EMBEDDING FAILURE: ~a" c)
|
|
nil)))))
|
|
|
|
(defun dot-product (v1 v2)
|
|
"Calculates the dot product of two numerical vectors."
|
|
(reduce #'+ (mapcar #'* v1 v2)))
|
|
|
|
(defun magnitude (v)
|
|
"Calculates the Euclidean magnitude of a numerical vector."
|
|
(sqrt (reduce #'+ (mapcar (lambda (x) (* x x)) v))))
|
|
|
|
(defun cosine-similarity (v1 v2)
|
|
"Calculates the semantic distance between two vectors."
|
|
(let ((m1 (magnitude v1))
|
|
(m2 (magnitude v2)))
|
|
(if (or (zerop m1) (zerop m2)) 0 (/ (dot-product v1 v2) (* m1 m2)))))
|
|
|
|
(defun find-most-similar (query-vector top-k)
|
|
"Identifies the top-k most semantically related objects in the store."
|
|
(let ((similarities nil))
|
|
(maphash (lambda (id obj)
|
|
(declare (ignore id))
|
|
(let ((vec (org-object-vector obj)))
|
|
(when vec
|
|
(push (cons (cosine-similarity query-vector vec) obj) similarities))))
|
|
*object-store*)
|
|
(let ((sorted (sort similarities #'> :key #'car)))
|
|
(subseq sorted 0 (min top-k (length sorted))))))
|
|
#+end_src
|
|
|
|
* Registration
|
|
#+begin_src lisp :tangle ../src/embedding-logic.lisp
|
|
(defskill :skill-embedding
|
|
:priority 50
|
|
:trigger (lambda (ctx) (eq (getf (getf ctx :payload) :sensor) :embedding-request))
|
|
:neuro nil
|
|
:symbolic (lambda (action ctx)
|
|
(declare (ignore ctx))
|
|
(case (getf action :action)
|
|
(:get-embedding (get-embedding (getf action :text)))
|
|
(:similarity (cosine-similarity (getf action :v1) (getf action :v2)))
|
|
(t action))))
|
|
#+end_src
|