- Local vector search: Ollama embeddings + semantic search - get-embedding with caching - cosine-similarity computation - semantic-search cognitive tool - :semantic-search tool for LLM queries - Tool permission tiers: security gating for cognitive tools - Three tiers: :allow, :deny, :ask - Gate in execute-tool-action before tool runs - Defaults: :deny for shell/delete-file, :ask for eval/write-file - :tool-permissions cognitive tool for management - Embedding provider support: Ollama AND llama.cpp - EMBEDDING_PROVIDER env var - EMBEDDING_MODEL env var - LLAMA_HOST for llama.cpp server - .env.example: Add embedding config variables - Fix parse-message in communication.lisp - Update ASDF: add test files, tool-permissions skill All 60 tests pass (6 suites x 100%)
9.3 KiB
9.3 KiB
SKILL: Unified LLM Gateway (Universal Literate Note)
Overview
The Unified LLM Gateway is the single sensory and reasoning interface for all neural backends. It consolidates the previously fragmented provider skills into a high-integrity dispatch layer, standardizing credential management, error handling, and payload formatting.
Phase B: Blueprint (PROTOCOL)
1. Architectural Intent
The gateway utilizes a functional dispatch pattern. A single entry point, `execute-llm-request`, resolves the provider-specific nuances (URLs, headers, JSON structures) while exposing a uniform interface to the harness.
Phase D: Build (Implementation)
Implementation
(defun get-nested (alist &rest keys)
"Recursively extracts nested values from an alist, handling both objects and arrays."
(let ((val alist))
(dolist (k keys)
;; Descend into arrays (cl-json style: ((key . val)) or ( ( (key . val) ) ))
(loop while (and (listp val) (listp (car val)) (not (keywordp (caar val))))
do (setf val (car val)))
(let ((pair (or (assoc k val)
(assoc (intern (string-upcase (string k)) :keyword) val)
(assoc (intern (string-downcase (string k)) :keyword) val))))
(if pair
(setf val (cdr pair))
(return-from get-nested nil))))
val))
(defun execute-llm-request (prompt system-prompt &key provider model)
"Unified entry point for all LLM providers. Respects the global cascade."
(let* ((active-provider (or provider (car opencortex::*provider-cascade*) :openrouter))
(api-key (vault-get-secret active-provider :type :api-key))
(full-prompt (format nil "~a~%~%Prompt: ~a" system-prompt prompt)))
(harness-log "PROBABILISTIC ENGINE: Requesting ~a (Model: ~s)"
active-provider (or model "default"))
;; If the specifically requested provider has no key, try falling back to the cascade
(when (or (null api-key) (string= api-key ""))
(harness-log "GATEWAY: Provider ~a has no key. Cascade fallback would trigger here." active-provider)
(return-from execute-llm-request (list :status :error :message "API Key missing.")))
(case active-provider
(:gemini-web
(let ((res (uiop:symbol-call :opencortex.skills.org-skill-web-research :ask-gemini-web full-prompt)))
(if res (list :status :success :content res) (list :status :error :message "Web Research Failure"))))
(:ollama
(let* ((host (or (uiop:getenv "OLLAMA_HOST") "localhost:11434"))
(url (format nil "http://~a/api/generate" host))
(body (cl-json:encode-json-to-string `((model . ,(or model "llama3")) (prompt . ,full-prompt) (stream . :false)))))
(handler-case
(progn
(harness-log "LLM DEBUG: Requesting Ollama...")
(let* ((response (dex:post url :headers '(("Content-Type" . "application/json")) :content body :connect-timeout 5 :read-timeout 60))
(json (cl-json:decode-json-from-string response)))
(list :status :success :content (cdr (assoc :response json)))))
(error (c) (list :status :error :message (format nil "Ollama Failure: ~a" c))))))
(t ;; Cloud Providers (Anthropic, Gemini API, Groq, OpenAI, OpenRouter)
(let* ((endpoint (case active-provider
(:anthropic "https://api.anthropic.com/v1/messages")
(:gemini-api (format nil "https://generativelanguage.googleapis.com/v1/models/~a:generateContent" (or model "gemini-1.5-flash-latest")))
(:groq "https://api.groq.com/openai/v1/chat/completions")
(:openai "https://api.openai.com/v1/chat/completions")
(:openrouter "https://openrouter.ai/api/v1/chat/completions")))
(headers (case active-provider
(:anthropic `(("Content-Type" . "application/json") ("x-api-key" . ,api-key) ("anthropic-version" . "2023-06-01")))
(:gemini-api `(("Content-Type" . "application/json") ("x-goog-api-key" . ,api-key)))
(:openrouter `(("Content-Type" . "application/json") ("Authorization" . ,(format nil "Bearer ~a" api-key))
("HTTP-Referer" . "https://github.com/amr/opencortex") ("X-Title" . "opencortex Autonomous Kernel")))
(t `(("Content-Type" . "application/json") ("Authorization" . ,(format nil "Bearer ~a" api-key))))))
(body (case active-provider
(:anthropic (cl-json:encode-json-to-string `((model . ,(or model "claude-3-5-sonnet-20240620")) (max_tokens . 4096) (system . ,system-prompt) (messages . (( (role . "user") (content . ,prompt) ))))))
(:gemini-api (cl-json:encode-json-to-string `((contents . (((parts . (((text . ,full-prompt))))))))))
(t (cl-json:encode-json-to-string `((model . ,(or model (case active-provider (:groq "llama-3.3-70b-versatile") (t "google/gemini-2.0-flash-001"))))
(messages . (( (role . "system") (content . ,system-prompt) ) ( (role . "user") (content . ,prompt) )))))))))
(handler-case
(progn
(harness-log "LLM DEBUG: Requesting ~a..." active-provider)
(let* ((response (dex:post endpoint :headers headers :content body :connect-timeout 10 :read-timeout 30))
(json (cl-json:decode-json-from-string response)))
(let ((content (case active-provider
(:anthropic (get-nested json :content :text))
(:gemini-api (get-nested json :candidates :parts :text))
(t (get-nested json :choices :message :content)))))
(if content
(list :status :success :content content)
(list :status :error :message (format nil "Failed to parse ~a response structure." active-provider))))))
(error (c) (list :status :error :message (format nil "LLM Gateway Failure (~a): ~a" active-provider c)))))))))
;; Initialize Cascade
(let* ((env-cascade (uiop:getenv "PROVIDER_CASCADE"))
(default-list '(:openrouter :openai :anthropic :groq :gemini-api :ollama))
(final-list (if (and env-cascade (not (string= env-cascade "")))
(mapcar (lambda (s) (intern (string-upcase (string-trim '(#\Space) s)) :keyword))
(uiop:split-string env-cascade :separator '(#\,)))
default-list)))
(setf opencortex::*provider-cascade* final-list)
(opencortex:harness-log "PROBABILISTIC: Neural Cascade Initialized -> ~a" final-list))
;; Register Providers
(dolist (p '(:anthropic :gemini-api :gemini-web :groq :ollama :openrouter :openai))
(opencortex:register-probabilistic-backend p (lambda (prompt system-prompt &key model)
(execute-llm-request prompt system-prompt :provider p :model model))))
(def-cognitive-tool :get-ollama-embedding
"Generates vector embeddings via Ollama API for semantic search."
((text :type :string :description "Text to embed."))
:body (lambda (args)
(let* ((text (getf args :text))
(host (or (uiop:getenv "OLLAMA_HOST") "localhost:11434"))
(url (format nil "http://~a/api/embeddings" host))
(model (or (uiop:getenv "OLLAMA_EMBEDDING_MODEL") "nomic-embed-text"))
(body (cl-json:encode-json-to-string `((model . ,model) (prompt . ,text)))))
(handler-case
(let* ((response (dex:post url :headers '(("Content-Type" . "application/json")) :content body :connect-timeout 5 :read-timeout 30))
(json (cl-json:decode-json-from-string response)))
(let ((embedding (cdr (assoc :embedding json))))
(if embedding
(list :status :success :vector embedding)
(list :status :error :message "No embedding in response"))))
(error (c) (list :status :error :message (format nil "Ollama Embedding Failure: ~a" c)))))))
(def-cognitive-tool :ask-llm
"Queries an LLM provider via the unified gateway."
((:prompt :type :string :description "The user prompt.")
(:system-prompt :type :string :description "The system instructions.")
(:provider :type :keyword :description "Optional specific provider.")
(:model :type :string :description "Optional specific model ID."))
:body (lambda (args)
(execute-llm-request (getf args :prompt)
(or (getf args :system-prompt) "You are a helpful assistant.")
:provider (getf args :provider)
:model (getf args :model))))
(defskill :skill-llm-gateway
:priority 150
:trigger (lambda (context) (declare (ignore context)) nil)
:probabilistic (lambda (context) (declare (ignore context)) nil)
:deterministic (lambda (action context) (declare (ignore context)) action))