FEAT: Verify all LLM providers and fix Gemini parsing
This commit is contained in:
40
docs/rca/rca-provider-verification.org
Normal file
40
docs/rca/rca-provider-verification.org
Normal file
@@ -0,0 +1,40 @@
|
||||
#+TITLE: Root Cause Analysis: Individual Provider Track Verification
|
||||
#+DATE: 2026-04-11
|
||||
#+FILETAGS: :rca:providers:llm:testing:psf:
|
||||
|
||||
* Executive Summary
|
||||
Verified the unified LLM gateway implementation for all 6 individual provider tracks (Anthropic, Gemini, Groq, OpenAI, OpenRouter, Ollama). Identified and resolved critical parsing failures in the Gemini track and integration gaps in the system build definition.
|
||||
|
||||
* 1. Issue: Fragile Response Parsing (Gemini)
|
||||
** Symptoms
|
||||
Gemini API responses were returning `NIL` content during mocked unit tests, despite the JSON structure being seemingly correct.
|
||||
** Root Cause
|
||||
Recursive `assoc` / `car` / `cdr` chains were hardcoded and brittle. Specifically, the Gemini extraction logic was incorrectly attempting to treat a single alist pair as a list of pairs, causing `assoc` to fail on the `:TEXT` key.
|
||||
** Resolution
|
||||
Implemented a robust `get-nested` helper function that safely traverses both nested objects (alists) and arrays (lists of alists). This normalized the extraction logic across all providers.
|
||||
|
||||
* 2. Issue: Decoupled Build Configuration
|
||||
** Symptoms
|
||||
Provider logic was present in the codebase but inaccessible during tests and runtime.
|
||||
** Root Cause
|
||||
The `credentials-vault.lisp` and `llm-gateway.lisp` files (consolidated in a previous session) were never added to the `org-agent.asd` system definition. Furthermore, an incorrect loading order caused `UNDEFINED-FUNCTION` errors for `register-neuro-backend`.
|
||||
** Resolution
|
||||
1. Added both files to `org-agent.asd`.
|
||||
2. Enforced strict loading order: `neuro` (defines registry) -> `credentials-vault` -> `llm-gateway` (uses registry).
|
||||
|
||||
* 3. Issue: Credential Key Mismatch
|
||||
** Symptoms
|
||||
Gemini requests failed with "API Key missing" even when environment variables were set.
|
||||
** Root Cause
|
||||
`llm-gateway` requested secrets for the `:gemini-api` provider, but the `credentials-vault` fallback logic only recognized the `:gemini` keyword.
|
||||
** Resolution
|
||||
Updated `vault-get-secret` to map both `:gemini` and `:gemini-api` to the same `GEMINI_API_KEY` environment variable.
|
||||
|
||||
* 4. PSF Mandate Alignment
|
||||
** Invariant Check
|
||||
- *High-Integrity Memory:* All individual provider tracks are now backed by automated unit tests (`llm-gateway-tests.lisp`).
|
||||
- *Literate Programming:* Updated `org-skill-llm-gateway.org` to reflect the improved `get-nested` utility.
|
||||
|
||||
* 5. Permanent Learnings
|
||||
- **Tooling vs Source:** Tangled `.lisp` files are not enough; always ensure new modules are registered in the `.asd` file to be part of the official kernel build.
|
||||
- **Robustness over Brevity:** Use abstraction helpers like `get-nested` instead of deep `car/cdr` chains when dealing with external JSON structures that may have varying array/object nesting.
|
||||
@@ -13,6 +13,8 @@
|
||||
(:file "src/context")
|
||||
(:file "src/skills")
|
||||
(:file "src/neuro")
|
||||
(:file "src/credentials-vault")
|
||||
(:file "src/llm-gateway")
|
||||
(:file "src/symbolic")
|
||||
(:file "src/safety-harness")
|
||||
(:file "src/self-fix")
|
||||
@@ -36,6 +38,7 @@
|
||||
(:file "tests/self-fix-tests")
|
||||
(:file "tests/lisp-repair-tests")
|
||||
(:file "tests/bouncer-tests")
|
||||
(:file "tests/llm-gateway-tests")
|
||||
(:file "tests/chaos-qa"))
|
||||
:perform (test-op (o s)
|
||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :oacp-suite :org-agent-tests))
|
||||
@@ -49,4 +52,5 @@
|
||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :self-fix-suite :org-agent-self-fix-tests))
|
||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :lisp-repair-suite :org-agent-lisp-repair-tests))
|
||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :bouncer-suite :org-agent-bouncer-tests))
|
||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :llm-gateway-suite :org-agent-llm-gateway-tests))
|
||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :chaos-suite :org-agent-chaos-qa))))
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
:PROPERTIES:
|
||||
:ID: llm-gateway-skill
|
||||
:CREATED: [2026-04-09 Thu]
|
||||
:EDITED: [2026-04-11 Sat]
|
||||
:END:
|
||||
#+TITLE: SKILL: Unified LLM Gateway (Universal Literate Note)
|
||||
#+STARTUP: content
|
||||
@@ -58,6 +59,26 @@ Verification will occur via `tests/llm-gateway-tests.lisp` using the FiveAM fram
|
||||
#+begin_src lisp :tangle ../src/llm-gateway.lisp
|
||||
(in-package :org-agent)
|
||||
#+end_src
|
||||
|
||||
** Nested Extraction Helper (get-nested)
|
||||
A robust utility to navigate deeply nested JSON alists produced by `cl-json`, handling both objects and arrays.
|
||||
|
||||
#+begin_src lisp :tangle ../src/llm-gateway.lisp
|
||||
(defun get-nested (alist &rest keys)
|
||||
"Recursively extracts nested values from an alist, handling both objects and arrays."
|
||||
(let ((val alist))
|
||||
(dolist (k keys)
|
||||
;; If val is an array (a list where the first element is a list but NOT a pair),
|
||||
;; descend into the first element.
|
||||
(when (and (listp val) (listp (car val)) (not (keywordp (caar val))))
|
||||
(setf val (car val)))
|
||||
(let ((pair (assoc k val)))
|
||||
(if pair
|
||||
(setf val (cdr pair))
|
||||
(return-from get-nested nil))))
|
||||
val))
|
||||
#+end_src
|
||||
|
||||
** Unified Request Executor (execute-llm-request)
|
||||
This is the primary actuator for neural reasoning. It handles the specific JSON payload formats and HTTP headers required by each provider. It retrieves secrets from the [[file:org-skill-credentials-vault.org][Credentials Vault]], ensuring that API keys are masked in all diagnostic output.
|
||||
|
||||
@@ -71,7 +92,6 @@ This is the primary actuator for neural reasoning. It handles the specific JSON
|
||||
provider (or model "default") (vault-mask-string api-key))
|
||||
|
||||
(case provider
|
||||
...
|
||||
(:gemini-web
|
||||
(let ((res (uiop:symbol-call :org-agent.skills.org-skill-web-research :ask-gemini-web full-prompt)))
|
||||
(if res (list :status :success :content res) (list :status :error :message "Web Research Failure"))))
|
||||
@@ -87,7 +107,8 @@ This is the primary actuator for neural reasoning. It handles the specific JSON
|
||||
(error (c) (list :status :error :message (format nil "Ollama Failure: ~a" c))))))
|
||||
|
||||
(t ;; Cloud Providers (Anthropic, Gemini API, Groq, OpenAI, OpenRouter)
|
||||
(unless api-key (return-from execute-llm-request (list :status :error :message (format nil "API Key missing for ~a" provider))))
|
||||
(when (or (null api-key) (string= api-key ""))
|
||||
(return-from execute-llm-request (list :status :error :message (format nil "API Key missing for ~a" provider))))
|
||||
(let* ((endpoint (case provider
|
||||
(:anthropic "https://api.anthropic.com/v1/messages")
|
||||
(:gemini-api (format nil "https://generativelanguage.googleapis.com/v1/models/~a:generateContent" (or model "gemini-1.5-flash-latest")))
|
||||
@@ -102,17 +123,19 @@ This is the primary actuator for neural reasoning. It handles the specific JSON
|
||||
(t `(("Content-Type" . "application/json") ("Authorization" . ,(format nil "Bearer ~a" api-key))))))
|
||||
(body (case provider
|
||||
(:anthropic (cl-json:encode-json-to-string `((model . ,(or model "claude-3-5-sonnet-20240620")) (max_tokens . 4096) (system . ,system-prompt) (messages . (( (role . "user") (content . ,prompt) ))))))
|
||||
(:gemini-api (cl-json:encode-json-to-string `((contents . ((parts . ((text . ,full-prompt))))))))
|
||||
(:gemini-api (cl-json:encode-json-to-string `((contents . (((parts . (((text . ,full-prompt))))))))))
|
||||
(t (cl-json:encode-json-to-string `((model . ,(or model (case provider (:groq "llama-3.3-70b-versatile") (:openai "gpt-4o") (t "openrouter/auto"))))
|
||||
(messages . (( (role . "system") (content . ,system-prompt) ) ( (role . "user") (content . ,prompt) )))))))))
|
||||
(handler-case
|
||||
(let* ((response (dex:post endpoint :headers headers :content body :connect-timeout 10 :read-timeout 30))
|
||||
(json (cl-json:decode-json-from-string response)))
|
||||
(list :status :success :content
|
||||
(case provider
|
||||
(:anthropic (cdr (assoc :text (car (cdr (assoc :content json))))))
|
||||
(:gemini-api (cdr (assoc :text (cdr (assoc :parts (car (cdr (assoc :parts (car (cdr (assoc :candidates json)))))))))))
|
||||
(t (cdr (assoc :content (cdr (assoc :message (car (cdr (assoc :choices json)))))))))))
|
||||
(let ((content (case provider
|
||||
(:anthropic (get-nested json :content :text))
|
||||
(:gemini-api (get-nested json :candidates :parts :text))
|
||||
(t (get-nested json :choices :message :content)))))
|
||||
(if content
|
||||
(list :status :success :content content)
|
||||
(list :status :error :message (format nil "Failed to parse ~a response structure." provider)))))
|
||||
(error (c) (list :status :error :message (format nil "LLM Gateway Failure (~a): ~a" provider c)))))))))
|
||||
#+end_src
|
||||
|
||||
@@ -152,26 +175,7 @@ We register all supported backends individually so that the kernel's `ask-neuro`
|
||||
* Phase E: Chaos (Verification)
|
||||
|
||||
** 1. Unit Tests (FiveAM)
|
||||
#+begin_src lisp :tangle ../tests/llm-gateway-tests.lisp
|
||||
(defpackage :org-agent-llm-gateway-tests
|
||||
(:use :cl :fiveam :org-agent))
|
||||
(in-package :org-agent-llm-gateway-tests)
|
||||
|
||||
(def-suite llm-gateway-suite :description "Tests for the Unified LLM Gateway.")
|
||||
(in-suite llm-gateway-suite)
|
||||
|
||||
(test test-credential-retrieval
|
||||
"Ensure credentials are retrieved from the correct environment variables."
|
||||
(uiop:setenv "ANTHROPIC_API_KEY" "sk-test-key")
|
||||
(is (equal "sk-test-key" (org-agent::get-llm-credentials :anthropic)))
|
||||
(uiop:setenv "ANTHROPIC_API_KEY" ""))
|
||||
|
||||
(test test-error-handling-missing-key
|
||||
"Ensure missing keys return a standardized error plist."
|
||||
(let ((res (org-agent:execute-llm-request "test" "sys" :provider :openai)))
|
||||
(is (eq (getf res :status) :error))
|
||||
(is (search "API Key missing" (getf res :message)))))
|
||||
#+end_src
|
||||
Verification is performed in `tests/llm-gateway-tests.lisp` by mocking the `dex:post` client.
|
||||
|
||||
** 2. Chaos Scenarios
|
||||
- *Scenario A (Key Exhaustion):* Use the `chaos` skill to temporarily clear an API key and verify the `token-accountant` successfully falls back to the next healthy provider.
|
||||
@@ -179,4 +183,4 @@ We register all supported backends individually so that the kernel's `ask-neuro`
|
||||
|
||||
* Phase F: Memory (RCA)
|
||||
- *[2026-04-09 Thu]:* Refactored 6 providers into this unified gateway to solve the URL key-leakage security vulnerability and reduce boilerplate by 60%.
|
||||
|
||||
- *[2026-04-11 Sat]:* Implemented `get-nested` robust extraction and verified all 6 individual provider tracks via unit test mocks.
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
val
|
||||
;; Fallback to environment
|
||||
(let ((env-var (case provider
|
||||
(:gemini "GEMINI_API_KEY")
|
||||
((:gemini :gemini-api) "GEMINI_API_KEY")
|
||||
(:openai "OPENAI_API_KEY")
|
||||
(:anthropic "ANTHROPIC_API_KEY")
|
||||
(:groq "GROQ_API_KEY")
|
||||
|
||||
@@ -1,5 +1,19 @@
|
||||
(in-package :org-agent)
|
||||
|
||||
(defun get-nested (alist &rest keys)
|
||||
"Recursively extracts nested values from an alist, handling both objects and arrays."
|
||||
(let ((val alist))
|
||||
(dolist (k keys)
|
||||
;; If val is an array (a list where the first element is a list but NOT a pair),
|
||||
;; descend into the first element.
|
||||
(when (and (listp val) (listp (car val)) (not (keywordp (caar val))))
|
||||
(setf val (car val)))
|
||||
(let ((pair (assoc k val)))
|
||||
(if pair
|
||||
(setf val (cdr pair))
|
||||
(return-from get-nested nil))))
|
||||
val))
|
||||
|
||||
(defun execute-llm-request (prompt system-prompt &key provider model)
|
||||
"Unified entry point for all LLM providers."
|
||||
(let ((api-key (vault-get-secret provider :type :api-key))
|
||||
@@ -9,7 +23,6 @@
|
||||
provider (or model "default") (vault-mask-string api-key))
|
||||
|
||||
(case provider
|
||||
...
|
||||
(:gemini-web
|
||||
(let ((res (uiop:symbol-call :org-agent.skills.org-skill-web-research :ask-gemini-web full-prompt)))
|
||||
(if res (list :status :success :content res) (list :status :error :message "Web Research Failure"))))
|
||||
@@ -25,7 +38,8 @@
|
||||
(error (c) (list :status :error :message (format nil "Ollama Failure: ~a" c))))))
|
||||
|
||||
(t ;; Cloud Providers (Anthropic, Gemini API, Groq, OpenAI, OpenRouter)
|
||||
(unless api-key (return-from execute-llm-request (list :status :error :message (format nil "API Key missing for ~a" provider))))
|
||||
(when (or (null api-key) (string= api-key ""))
|
||||
(return-from execute-llm-request (list :status :error :message (format nil "API Key missing for ~a" provider))))
|
||||
(let* ((endpoint (case provider
|
||||
(:anthropic "https://api.anthropic.com/v1/messages")
|
||||
(:gemini-api (format nil "https://generativelanguage.googleapis.com/v1/models/~a:generateContent" (or model "gemini-1.5-flash-latest")))
|
||||
@@ -40,17 +54,19 @@
|
||||
(t `(("Content-Type" . "application/json") ("Authorization" . ,(format nil "Bearer ~a" api-key))))))
|
||||
(body (case provider
|
||||
(:anthropic (cl-json:encode-json-to-string `((model . ,(or model "claude-3-5-sonnet-20240620")) (max_tokens . 4096) (system . ,system-prompt) (messages . (( (role . "user") (content . ,prompt) ))))))
|
||||
(:gemini-api (cl-json:encode-json-to-string `((contents . ((parts . ((text . ,full-prompt))))))))
|
||||
(:gemini-api (cl-json:encode-json-to-string `((contents . (((parts . (((text . ,full-prompt))))))))))
|
||||
(t (cl-json:encode-json-to-string `((model . ,(or model (case provider (:groq "llama-3.3-70b-versatile") (:openai "gpt-4o") (t "openrouter/auto"))))
|
||||
(messages . (( (role . "system") (content . ,system-prompt) ) ( (role . "user") (content . ,prompt) )))))))))
|
||||
(handler-case
|
||||
(let* ((response (dex:post endpoint :headers headers :content body :connect-timeout 10 :read-timeout 30))
|
||||
(json (cl-json:decode-json-from-string response)))
|
||||
(list :status :success :content
|
||||
(case provider
|
||||
(:anthropic (cdr (assoc :text (car (cdr (assoc :content json))))))
|
||||
(:gemini-api (cdr (assoc :text (cdr (assoc :parts (car (cdr (assoc :parts (car (cdr (assoc :candidates json)))))))))))
|
||||
(t (cdr (assoc :content (cdr (assoc :message (car (cdr (assoc :choices json)))))))))))
|
||||
(let ((content (case provider
|
||||
(:anthropic (get-nested json :content :text))
|
||||
(:gemini-api (get-nested json :candidates :parts :text))
|
||||
(t (get-nested json :choices :message :content)))))
|
||||
(if content
|
||||
(list :status :success :content content)
|
||||
(list :status :error :message (format nil "Failed to parse ~a response structure." provider)))))
|
||||
(error (c) (list :status :error :message (format nil "LLM Gateway Failure (~a): ~a" provider c)))))))))
|
||||
|
||||
(def-cognitive-tool :ask-llm "Queries an LLM provider via the unified gateway."
|
||||
|
||||
@@ -1,18 +1,74 @@
|
||||
(defpackage :org-agent-llm-gateway-tests
|
||||
(:use :cl :fiveam :org-agent))
|
||||
(:use :cl :fiveam :org-agent)
|
||||
(:export #:llm-gateway-suite))
|
||||
(in-package :org-agent-llm-gateway-tests)
|
||||
|
||||
(def-suite llm-gateway-suite :description "Tests for the Unified LLM Gateway.")
|
||||
(in-suite llm-gateway-suite)
|
||||
|
||||
(test test-credential-retrieval
|
||||
"Ensure credentials are retrieved from the correct environment variables."
|
||||
(uiop:setenv "ANTHROPIC_API_KEY" "sk-test-key")
|
||||
(is (equal "sk-test-key" (org-agent::get-llm-credentials :anthropic)))
|
||||
(uiop:setenv "ANTHROPIC_API_KEY" ""))
|
||||
(defun mock-dex-post (expected-json)
|
||||
"Returns a lambda that can be used to mock dex:post."
|
||||
(lambda (url &key headers content connect-timeout read-timeout)
|
||||
(declare (ignore url headers content connect-timeout read-timeout))
|
||||
expected-json))
|
||||
|
||||
(test test-provider-anthropic
|
||||
"Verify Anthropic request formatting and response parsing."
|
||||
(let ((old-post (symbol-function 'dex:post))
|
||||
(mock-response "{\"content\": [{\"text\": \"Anthropic thought\"}]}"))
|
||||
(unwind-protect
|
||||
(progn
|
||||
(setf (symbol-function 'dex:post) (mock-dex-post mock-response))
|
||||
(setf (uiop:getenv "ANTHROPIC_API_KEY") "test-key")
|
||||
(let ((res (org-agent::execute-llm-request "prompt" "sys" :provider :anthropic)))
|
||||
(is (eq (getf res :status) :success))
|
||||
(is (equal "Anthropic thought" (getf res :content)))))
|
||||
(setf (symbol-function 'dex:post) old-post))))
|
||||
|
||||
(test test-provider-gemini
|
||||
"Verify Gemini request formatting and response parsing."
|
||||
(let ((old-post (symbol-function 'dex:post))
|
||||
(mock-response "{\"candidates\": [{\"parts\": [{\"text\": \"Gemini thought\"}]}]}"))
|
||||
(unwind-protect
|
||||
(progn
|
||||
(setf (symbol-function 'dex:post) (mock-dex-post mock-response))
|
||||
(setf (uiop:getenv "GEMINI_API_KEY") "test-key")
|
||||
(let ((res (org-agent::execute-llm-request "prompt" "sys" :provider :gemini-api)))
|
||||
(is (eq (getf res :status) :success))
|
||||
(is (equal "Gemini thought" (getf res :content)))))
|
||||
(setf (symbol-function 'dex:post) old-post))))
|
||||
|
||||
(test test-provider-openai-compat
|
||||
"Verify OpenAI-compatible (Groq, OpenAI, OpenRouter) response parsing."
|
||||
(let ((old-post (symbol-function 'dex:post))
|
||||
(mock-response "{\"choices\": [{\"message\": {\"content\": \"OpenAI thought\"}}]}"))
|
||||
(unwind-protect
|
||||
(progn
|
||||
(setf (symbol-function 'dex:post) (mock-dex-post mock-response))
|
||||
(dolist (p '(:openai :groq :openrouter))
|
||||
(setf (uiop:getenv (format nil "~a_API_KEY" (string-upcase (string p)))) "test-key")
|
||||
(let ((res (org-agent::execute-llm-request "prompt" "sys" :provider p)))
|
||||
(is (eq (getf res :status) :success))
|
||||
(is (equal "OpenAI thought" (getf res :content))))))
|
||||
(setf (symbol-function 'dex:post) old-post))))
|
||||
|
||||
(test test-provider-ollama
|
||||
"Verify Ollama response parsing."
|
||||
(let ((old-post (symbol-function 'dex:post))
|
||||
(mock-response "{\"response\": \"Ollama thought\"}"))
|
||||
(unwind-protect
|
||||
(progn
|
||||
(setf (symbol-function 'dex:post) (mock-dex-post mock-response))
|
||||
(let ((res (org-agent::execute-llm-request "prompt" "sys" :provider :ollama)))
|
||||
(is (eq (getf res :status) :success))
|
||||
(is (equal "Ollama thought" (getf res :content)))))
|
||||
(setf (symbol-function 'dex:post) old-post))))
|
||||
|
||||
(test test-error-handling-missing-key
|
||||
"Ensure missing keys return a standardized error plist."
|
||||
(let ((res (org-agent:execute-llm-request "test" "sys" :provider :openai)))
|
||||
;; Clear environment
|
||||
(dolist (p '(:anthropic :openai :groq :openrouter :gemini-api))
|
||||
(setf (uiop:getenv (format nil "~a_API_KEY" (string-upcase (string p)))) ""))
|
||||
(let ((res (org-agent::execute-llm-request "test" "sys" :provider :openai)))
|
||||
(is (eq (getf res :status) :error))
|
||||
(is (search "API Key missing" (getf res :message)))))
|
||||
|
||||
Reference in New Issue
Block a user