From 9497a5955c9e144db66a151fa31885608c2bf394 Mon Sep 17 00:00:00 2001 From: Amr Gharbeia Date: Sat, 11 Apr 2026 15:17:34 -0400 Subject: [PATCH] FEAT: Verify all LLM providers and fix Gemini parsing --- docs/rca/rca-provider-verification.org | 40 +++++++++++++++ org-agent.asd | 4 ++ skills/org-skill-llm-gateway.org | 62 ++++++++++++----------- src/credentials-vault.lisp | 2 +- src/llm-gateway.lisp | 32 +++++++++--- tests/llm-gateway-tests.lisp | 70 +++++++++++++++++++++++--- 6 files changed, 165 insertions(+), 45 deletions(-) create mode 100644 docs/rca/rca-provider-verification.org diff --git a/docs/rca/rca-provider-verification.org b/docs/rca/rca-provider-verification.org new file mode 100644 index 0000000..54db889 --- /dev/null +++ b/docs/rca/rca-provider-verification.org @@ -0,0 +1,40 @@ +#+TITLE: Root Cause Analysis: Individual Provider Track Verification +#+DATE: 2026-04-11 +#+FILETAGS: :rca:providers:llm:testing:psf: + +* Executive Summary +Verified the unified LLM gateway implementation for all 6 individual provider tracks (Anthropic, Gemini, Groq, OpenAI, OpenRouter, Ollama). Identified and resolved critical parsing failures in the Gemini track and integration gaps in the system build definition. + +* 1. Issue: Fragile Response Parsing (Gemini) +** Symptoms +Gemini API responses were returning `NIL` content during mocked unit tests, despite the JSON structure being seemingly correct. +** Root Cause +Recursive `assoc` / `car` / `cdr` chains were hardcoded and brittle. Specifically, the Gemini extraction logic was incorrectly attempting to treat a single alist pair as a list of pairs, causing `assoc` to fail on the `:TEXT` key. +** Resolution +Implemented a robust `get-nested` helper function that safely traverses both nested objects (alists) and arrays (lists of alists). This normalized the extraction logic across all providers. + +* 2. Issue: Decoupled Build Configuration +** Symptoms +Provider logic was present in the codebase but inaccessible during tests and runtime. +** Root Cause +The `credentials-vault.lisp` and `llm-gateway.lisp` files (consolidated in a previous session) were never added to the `org-agent.asd` system definition. Furthermore, an incorrect loading order caused `UNDEFINED-FUNCTION` errors for `register-neuro-backend`. +** Resolution +1. Added both files to `org-agent.asd`. +2. Enforced strict loading order: `neuro` (defines registry) -> `credentials-vault` -> `llm-gateway` (uses registry). + +* 3. Issue: Credential Key Mismatch +** Symptoms +Gemini requests failed with "API Key missing" even when environment variables were set. +** Root Cause +`llm-gateway` requested secrets for the `:gemini-api` provider, but the `credentials-vault` fallback logic only recognized the `:gemini` keyword. +** Resolution +Updated `vault-get-secret` to map both `:gemini` and `:gemini-api` to the same `GEMINI_API_KEY` environment variable. + +* 4. PSF Mandate Alignment +** Invariant Check +- *High-Integrity Memory:* All individual provider tracks are now backed by automated unit tests (`llm-gateway-tests.lisp`). +- *Literate Programming:* Updated `org-skill-llm-gateway.org` to reflect the improved `get-nested` utility. + +* 5. Permanent Learnings +- **Tooling vs Source:** Tangled `.lisp` files are not enough; always ensure new modules are registered in the `.asd` file to be part of the official kernel build. +- **Robustness over Brevity:** Use abstraction helpers like `get-nested` instead of deep `car/cdr` chains when dealing with external JSON structures that may have varying array/object nesting. diff --git a/org-agent.asd b/org-agent.asd index f3c5971..d8e8074 100644 --- a/org-agent.asd +++ b/org-agent.asd @@ -13,6 +13,8 @@ (:file "src/context") (:file "src/skills") (:file "src/neuro") + (:file "src/credentials-vault") + (:file "src/llm-gateway") (:file "src/symbolic") (:file "src/safety-harness") (:file "src/self-fix") @@ -36,6 +38,7 @@ (:file "tests/self-fix-tests") (:file "tests/lisp-repair-tests") (:file "tests/bouncer-tests") + (:file "tests/llm-gateway-tests") (:file "tests/chaos-qa")) :perform (test-op (o s) (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :oacp-suite :org-agent-tests)) @@ -49,4 +52,5 @@ (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :self-fix-suite :org-agent-self-fix-tests)) (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :lisp-repair-suite :org-agent-lisp-repair-tests)) (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :bouncer-suite :org-agent-bouncer-tests)) + (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :llm-gateway-suite :org-agent-llm-gateway-tests)) (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :chaos-suite :org-agent-chaos-qa)))) diff --git a/skills/org-skill-llm-gateway.org b/skills/org-skill-llm-gateway.org index 295d9b5..a7603d7 100644 --- a/skills/org-skill-llm-gateway.org +++ b/skills/org-skill-llm-gateway.org @@ -1,6 +1,7 @@ :PROPERTIES: :ID: llm-gateway-skill :CREATED: [2026-04-09 Thu] +:EDITED: [2026-04-11 Sat] :END: #+TITLE: SKILL: Unified LLM Gateway (Universal Literate Note) #+STARTUP: content @@ -58,6 +59,26 @@ Verification will occur via `tests/llm-gateway-tests.lisp` using the FiveAM fram #+begin_src lisp :tangle ../src/llm-gateway.lisp (in-package :org-agent) #+end_src + +** Nested Extraction Helper (get-nested) +A robust utility to navigate deeply nested JSON alists produced by `cl-json`, handling both objects and arrays. + +#+begin_src lisp :tangle ../src/llm-gateway.lisp +(defun get-nested (alist &rest keys) + "Recursively extracts nested values from an alist, handling both objects and arrays." + (let ((val alist)) + (dolist (k keys) + ;; If val is an array (a list where the first element is a list but NOT a pair), + ;; descend into the first element. + (when (and (listp val) (listp (car val)) (not (keywordp (caar val)))) + (setf val (car val))) + (let ((pair (assoc k val))) + (if pair + (setf val (cdr pair)) + (return-from get-nested nil)))) + val)) +#+end_src + ** Unified Request Executor (execute-llm-request) This is the primary actuator for neural reasoning. It handles the specific JSON payload formats and HTTP headers required by each provider. It retrieves secrets from the [[file:org-skill-credentials-vault.org][Credentials Vault]], ensuring that API keys are masked in all diagnostic output. @@ -71,7 +92,6 @@ This is the primary actuator for neural reasoning. It handles the specific JSON provider (or model "default") (vault-mask-string api-key)) (case provider -... (:gemini-web (let ((res (uiop:symbol-call :org-agent.skills.org-skill-web-research :ask-gemini-web full-prompt))) (if res (list :status :success :content res) (list :status :error :message "Web Research Failure")))) @@ -87,7 +107,8 @@ This is the primary actuator for neural reasoning. It handles the specific JSON (error (c) (list :status :error :message (format nil "Ollama Failure: ~a" c)))))) (t ;; Cloud Providers (Anthropic, Gemini API, Groq, OpenAI, OpenRouter) - (unless api-key (return-from execute-llm-request (list :status :error :message (format nil "API Key missing for ~a" provider)))) + (when (or (null api-key) (string= api-key "")) + (return-from execute-llm-request (list :status :error :message (format nil "API Key missing for ~a" provider)))) (let* ((endpoint (case provider (:anthropic "https://api.anthropic.com/v1/messages") (:gemini-api (format nil "https://generativelanguage.googleapis.com/v1/models/~a:generateContent" (or model "gemini-1.5-flash-latest"))) @@ -102,17 +123,19 @@ This is the primary actuator for neural reasoning. It handles the specific JSON (t `(("Content-Type" . "application/json") ("Authorization" . ,(format nil "Bearer ~a" api-key)))))) (body (case provider (:anthropic (cl-json:encode-json-to-string `((model . ,(or model "claude-3-5-sonnet-20240620")) (max_tokens . 4096) (system . ,system-prompt) (messages . (( (role . "user") (content . ,prompt) )))))) - (:gemini-api (cl-json:encode-json-to-string `((contents . ((parts . ((text . ,full-prompt)))))))) + (:gemini-api (cl-json:encode-json-to-string `((contents . (((parts . (((text . ,full-prompt)))))))))) (t (cl-json:encode-json-to-string `((model . ,(or model (case provider (:groq "llama-3.3-70b-versatile") (:openai "gpt-4o") (t "openrouter/auto")))) (messages . (( (role . "system") (content . ,system-prompt) ) ( (role . "user") (content . ,prompt) ))))))))) (handler-case (let* ((response (dex:post endpoint :headers headers :content body :connect-timeout 10 :read-timeout 30)) (json (cl-json:decode-json-from-string response))) - (list :status :success :content - (case provider - (:anthropic (cdr (assoc :text (car (cdr (assoc :content json)))))) - (:gemini-api (cdr (assoc :text (cdr (assoc :parts (car (cdr (assoc :parts (car (cdr (assoc :candidates json))))))))))) - (t (cdr (assoc :content (cdr (assoc :message (car (cdr (assoc :choices json))))))))))) + (let ((content (case provider + (:anthropic (get-nested json :content :text)) + (:gemini-api (get-nested json :candidates :parts :text)) + (t (get-nested json :choices :message :content))))) + (if content + (list :status :success :content content) + (list :status :error :message (format nil "Failed to parse ~a response structure." provider))))) (error (c) (list :status :error :message (format nil "LLM Gateway Failure (~a): ~a" provider c))))))))) #+end_src @@ -152,26 +175,7 @@ We register all supported backends individually so that the kernel's `ask-neuro` * Phase E: Chaos (Verification) ** 1. Unit Tests (FiveAM) -#+begin_src lisp :tangle ../tests/llm-gateway-tests.lisp -(defpackage :org-agent-llm-gateway-tests - (:use :cl :fiveam :org-agent)) -(in-package :org-agent-llm-gateway-tests) - -(def-suite llm-gateway-suite :description "Tests for the Unified LLM Gateway.") -(in-suite llm-gateway-suite) - -(test test-credential-retrieval - "Ensure credentials are retrieved from the correct environment variables." - (uiop:setenv "ANTHROPIC_API_KEY" "sk-test-key") - (is (equal "sk-test-key" (org-agent::get-llm-credentials :anthropic))) - (uiop:setenv "ANTHROPIC_API_KEY" "")) - -(test test-error-handling-missing-key - "Ensure missing keys return a standardized error plist." - (let ((res (org-agent:execute-llm-request "test" "sys" :provider :openai))) - (is (eq (getf res :status) :error)) - (is (search "API Key missing" (getf res :message))))) -#+end_src +Verification is performed in `tests/llm-gateway-tests.lisp` by mocking the `dex:post` client. ** 2. Chaos Scenarios - *Scenario A (Key Exhaustion):* Use the `chaos` skill to temporarily clear an API key and verify the `token-accountant` successfully falls back to the next healthy provider. @@ -179,4 +183,4 @@ We register all supported backends individually so that the kernel's `ask-neuro` * Phase F: Memory (RCA) - *[2026-04-09 Thu]:* Refactored 6 providers into this unified gateway to solve the URL key-leakage security vulnerability and reduce boilerplate by 60%. - +- *[2026-04-11 Sat]:* Implemented `get-nested` robust extraction and verified all 6 individual provider tracks via unit test mocks. diff --git a/src/credentials-vault.lisp b/src/credentials-vault.lisp index 5e57eb2..cb76a94 100644 --- a/src/credentials-vault.lisp +++ b/src/credentials-vault.lisp @@ -17,7 +17,7 @@ val ;; Fallback to environment (let ((env-var (case provider - (:gemini "GEMINI_API_KEY") + ((:gemini :gemini-api) "GEMINI_API_KEY") (:openai "OPENAI_API_KEY") (:anthropic "ANTHROPIC_API_KEY") (:groq "GROQ_API_KEY") diff --git a/src/llm-gateway.lisp b/src/llm-gateway.lisp index 93bd7b3..aa85600 100644 --- a/src/llm-gateway.lisp +++ b/src/llm-gateway.lisp @@ -1,5 +1,19 @@ (in-package :org-agent) +(defun get-nested (alist &rest keys) + "Recursively extracts nested values from an alist, handling both objects and arrays." + (let ((val alist)) + (dolist (k keys) + ;; If val is an array (a list where the first element is a list but NOT a pair), + ;; descend into the first element. + (when (and (listp val) (listp (car val)) (not (keywordp (caar val)))) + (setf val (car val))) + (let ((pair (assoc k val))) + (if pair + (setf val (cdr pair)) + (return-from get-nested nil)))) + val)) + (defun execute-llm-request (prompt system-prompt &key provider model) "Unified entry point for all LLM providers." (let ((api-key (vault-get-secret provider :type :api-key)) @@ -9,7 +23,6 @@ provider (or model "default") (vault-mask-string api-key)) (case provider -... (:gemini-web (let ((res (uiop:symbol-call :org-agent.skills.org-skill-web-research :ask-gemini-web full-prompt))) (if res (list :status :success :content res) (list :status :error :message "Web Research Failure")))) @@ -25,7 +38,8 @@ (error (c) (list :status :error :message (format nil "Ollama Failure: ~a" c)))))) (t ;; Cloud Providers (Anthropic, Gemini API, Groq, OpenAI, OpenRouter) - (unless api-key (return-from execute-llm-request (list :status :error :message (format nil "API Key missing for ~a" provider)))) + (when (or (null api-key) (string= api-key "")) + (return-from execute-llm-request (list :status :error :message (format nil "API Key missing for ~a" provider)))) (let* ((endpoint (case provider (:anthropic "https://api.anthropic.com/v1/messages") (:gemini-api (format nil "https://generativelanguage.googleapis.com/v1/models/~a:generateContent" (or model "gemini-1.5-flash-latest"))) @@ -40,17 +54,19 @@ (t `(("Content-Type" . "application/json") ("Authorization" . ,(format nil "Bearer ~a" api-key)))))) (body (case provider (:anthropic (cl-json:encode-json-to-string `((model . ,(or model "claude-3-5-sonnet-20240620")) (max_tokens . 4096) (system . ,system-prompt) (messages . (( (role . "user") (content . ,prompt) )))))) - (:gemini-api (cl-json:encode-json-to-string `((contents . ((parts . ((text . ,full-prompt)))))))) + (:gemini-api (cl-json:encode-json-to-string `((contents . (((parts . (((text . ,full-prompt)))))))))) (t (cl-json:encode-json-to-string `((model . ,(or model (case provider (:groq "llama-3.3-70b-versatile") (:openai "gpt-4o") (t "openrouter/auto")))) (messages . (( (role . "system") (content . ,system-prompt) ) ( (role . "user") (content . ,prompt) ))))))))) (handler-case (let* ((response (dex:post endpoint :headers headers :content body :connect-timeout 10 :read-timeout 30)) (json (cl-json:decode-json-from-string response))) - (list :status :success :content - (case provider - (:anthropic (cdr (assoc :text (car (cdr (assoc :content json)))))) - (:gemini-api (cdr (assoc :text (cdr (assoc :parts (car (cdr (assoc :parts (car (cdr (assoc :candidates json))))))))))) - (t (cdr (assoc :content (cdr (assoc :message (car (cdr (assoc :choices json))))))))))) + (let ((content (case provider + (:anthropic (get-nested json :content :text)) + (:gemini-api (get-nested json :candidates :parts :text)) + (t (get-nested json :choices :message :content))))) + (if content + (list :status :success :content content) + (list :status :error :message (format nil "Failed to parse ~a response structure." provider))))) (error (c) (list :status :error :message (format nil "LLM Gateway Failure (~a): ~a" provider c))))))))) (def-cognitive-tool :ask-llm "Queries an LLM provider via the unified gateway." diff --git a/tests/llm-gateway-tests.lisp b/tests/llm-gateway-tests.lisp index 46000b3..614b406 100644 --- a/tests/llm-gateway-tests.lisp +++ b/tests/llm-gateway-tests.lisp @@ -1,18 +1,74 @@ (defpackage :org-agent-llm-gateway-tests - (:use :cl :fiveam :org-agent)) + (:use :cl :fiveam :org-agent) + (:export #:llm-gateway-suite)) (in-package :org-agent-llm-gateway-tests) (def-suite llm-gateway-suite :description "Tests for the Unified LLM Gateway.") (in-suite llm-gateway-suite) -(test test-credential-retrieval - "Ensure credentials are retrieved from the correct environment variables." - (uiop:setenv "ANTHROPIC_API_KEY" "sk-test-key") - (is (equal "sk-test-key" (org-agent::get-llm-credentials :anthropic))) - (uiop:setenv "ANTHROPIC_API_KEY" "")) +(defun mock-dex-post (expected-json) + "Returns a lambda that can be used to mock dex:post." + (lambda (url &key headers content connect-timeout read-timeout) + (declare (ignore url headers content connect-timeout read-timeout)) + expected-json)) + +(test test-provider-anthropic + "Verify Anthropic request formatting and response parsing." + (let ((old-post (symbol-function 'dex:post)) + (mock-response "{\"content\": [{\"text\": \"Anthropic thought\"}]}")) + (unwind-protect + (progn + (setf (symbol-function 'dex:post) (mock-dex-post mock-response)) + (setf (uiop:getenv "ANTHROPIC_API_KEY") "test-key") + (let ((res (org-agent::execute-llm-request "prompt" "sys" :provider :anthropic))) + (is (eq (getf res :status) :success)) + (is (equal "Anthropic thought" (getf res :content))))) + (setf (symbol-function 'dex:post) old-post)))) + +(test test-provider-gemini + "Verify Gemini request formatting and response parsing." + (let ((old-post (symbol-function 'dex:post)) + (mock-response "{\"candidates\": [{\"parts\": [{\"text\": \"Gemini thought\"}]}]}")) + (unwind-protect + (progn + (setf (symbol-function 'dex:post) (mock-dex-post mock-response)) + (setf (uiop:getenv "GEMINI_API_KEY") "test-key") + (let ((res (org-agent::execute-llm-request "prompt" "sys" :provider :gemini-api))) + (is (eq (getf res :status) :success)) + (is (equal "Gemini thought" (getf res :content))))) + (setf (symbol-function 'dex:post) old-post)))) + +(test test-provider-openai-compat + "Verify OpenAI-compatible (Groq, OpenAI, OpenRouter) response parsing." + (let ((old-post (symbol-function 'dex:post)) + (mock-response "{\"choices\": [{\"message\": {\"content\": \"OpenAI thought\"}}]}")) + (unwind-protect + (progn + (setf (symbol-function 'dex:post) (mock-dex-post mock-response)) + (dolist (p '(:openai :groq :openrouter)) + (setf (uiop:getenv (format nil "~a_API_KEY" (string-upcase (string p)))) "test-key") + (let ((res (org-agent::execute-llm-request "prompt" "sys" :provider p))) + (is (eq (getf res :status) :success)) + (is (equal "OpenAI thought" (getf res :content)))))) + (setf (symbol-function 'dex:post) old-post)))) + +(test test-provider-ollama + "Verify Ollama response parsing." + (let ((old-post (symbol-function 'dex:post)) + (mock-response "{\"response\": \"Ollama thought\"}")) + (unwind-protect + (progn + (setf (symbol-function 'dex:post) (mock-dex-post mock-response)) + (let ((res (org-agent::execute-llm-request "prompt" "sys" :provider :ollama))) + (is (eq (getf res :status) :success)) + (is (equal "Ollama thought" (getf res :content))))) + (setf (symbol-function 'dex:post) old-post)))) (test test-error-handling-missing-key "Ensure missing keys return a standardized error plist." - (let ((res (org-agent:execute-llm-request "test" "sys" :provider :openai))) + ;; Clear environment + (dolist (p '(:anthropic :openai :groq :openrouter :gemini-api)) + (setf (uiop:getenv (format nil "~a_API_KEY" (string-upcase (string p)))) "")) + (let ((res (org-agent::execute-llm-request "test" "sys" :provider :openai))) (is (eq (getf res :status) :error)) (is (search "API Key missing" (getf res :message)))))