tests: TUI integration + cascade parsing — precise LLM diagnostics
Some checks failed
Deploy (Gitea) / deploy (push) Failing after 2s

- TUI agent-responds: hardened to detect and FAIL on cascade/exhausted
  responses (previously a separate WARN-only test that let real
  cascade failures slip through)
- New TUI cascade-parsing test: /eval *provider-cascade* on screen,
  checks for clean keywords (no cl-dotenv quote artifacts)
- Pre-warm step: sbcl --eval '(ql:quickload :passepartout/tui)'
  before launching tmux, cuts TUI startup from ~120s to ~10s
- Removed test_agent_not_cascade_failure (absorbed into agent-responds)
- New integration test: test-provider-cascade-parsing verifies
  PROVIDER_CASCADE entries are keywords without quotes, matching
  registered backends — catches the exact cl-dotenv quote bug
- Fixed stop-daemon ghost symbol (removed export) and paren bug
- Contract section updated with numbered Phase 2/3 items
This commit is contained in:
2026-05-06 08:56:07 -04:00
parent 9362c56678
commit 750918527d
4 changed files with 180 additions and 107 deletions

View File

@@ -22,11 +22,20 @@ Phase 1 — In-process daemon (no external credentials):
6. CLI gateway: text injected via TCP reaches the pipeline.
7. Gateway registry: ~gateway-registry-initialize~ is available.
Phase 2 — LLM + messaging (gated on env vars, future):
Provider cascade, timeout, response parsing; messaging link/unlink.
Phase 2 — LLM + messaging:
Phase 3 — External processes (tmux + Emacs, future):
TUI rendering, /eval, connection drop; Emacs Flight Plan, node insertion.
8. Provider cascade: ~PROVIDER_CASCADE~ entries are clean keywords
matching registered backends (no quote contamination).
9. Backend cascade: real provider returns string content.
Phase 3 — TUI via tmux:
10. Agent response: TUI ↛ daemon ↛ LLM round-trip produces non-cascade
agent text on screen.
11. Cascade inspection: ~/eval *provider-cascade*~ shows clean keywords
on TUI screen (no quote artifacts).
12. Eval command: ~/eval (+ 1 2)~ displays ~~=> 3~~ on screen.
13. Status bar: rendered screen shows ~~msgs:~~ in status bar.
** Boundaries
@@ -44,13 +53,13 @@ Shared test harness: package, suite, helpers, and ~with-daemon~.
(ql:quickload :usocket :silent t))
(defpackage :passepartout-integration-tests
(:use :cl :fiveam :passepartout)
(:use :cl :passepartout)
(:export #:integration-suite))
(in-package :passepartout-integration-tests)
(def-suite integration-suite :description "Integration tests across process boundaries")
(in-suite integration-suite)
(fiveam:def-suite integration-suite :description "Integration tests across process boundaries")
(fiveam:in-suite integration-suite)
(defvar *daemon-port* nil)
@@ -68,7 +77,7 @@ Shared test harness: package, suite, helpers, and ~with-daemon~.
(passepartout:start-daemon :port *daemon-port*)
(sleep 2)
,@body)
(handler-case (passepartout:stop-daemon) (error ())))))
(values)))
(defun daemon-connect ()
(let* ((sock (usocket:socket-connect "127.0.0.1" *daemon-port*))
@@ -94,7 +103,7 @@ Shared test harness: package, suite, helpers, and ~with-daemon~.
Verifies the daemon starts, binds its port, and sends a valid handshake.
#+begin_src lisp
(test test-daemon-starts
(fiveam:test test-daemon-starts
"Contract 1: daemon binds port and sends valid handshake."
(with-daemon ()
(multiple-value-bind (stream sock) (daemon-connect)
@@ -107,7 +116,7 @@ Verifies the daemon starts, binds its port, and sends a valid handshake.
Sends a ~:user-input~ event and verifies the pipeline produces a response.
#+begin_src lisp
(test test-pipeline-user-input
(fiveam:test test-pipeline-user-input
"Contract 2: :user-input traverses pipeline and produces a response."
(with-daemon ()
(multiple-value-bind (stream sock) (daemon-connect)
@@ -119,7 +128,7 @@ Sends a ~:user-input~ event and verifies the pipeline produces a response.
(is (not (null resp)) "Expected a response")))
(usocket:socket-close sock)))))
(test test-pipeline-heartbeat
(fiveam:test test-pipeline-heartbeat
"Contract 2: heartbeat signals do not crash the daemon."
(with-daemon ()
(multiple-value-bind (stream sock) (daemon-connect)
@@ -135,7 +144,7 @@ Sends a ~:user-input~ event and verifies the pipeline produces a response.
Verifies framed TCP round-trip and malformed-input resilience.
#+begin_src lisp
(test test-tcp-round-trip
(fiveam:test test-tcp-round-trip
"Contract 3: framed health-check survives TCP round-trip."
(with-daemon ()
(multiple-value-bind (stream sock) (daemon-connect)
@@ -147,7 +156,7 @@ Verifies framed TCP round-trip and malformed-input resilience.
(is (member (getf resp :type) '(:HEALTH-RESPONSE)))))
(usocket:socket-close sock)))))
(test test-daemon-survives-junk
(fiveam:test test-daemon-survives-junk
"Contract 3: daemon does not crash on junk input."
(with-daemon ()
(multiple-value-bind (stream sock) (daemon-connect)
@@ -166,7 +175,7 @@ Verifies framed TCP round-trip and malformed-input resilience.
Verifies the skill loader populates ~*skill-registry*~ after daemon start.
#+begin_src lisp
(test test-skill-registry-populated
(fiveam:test test-skill-registry-populated
"Contract 4: *skill-registry* is populated after daemon start."
(with-daemon ()
(is (hash-table-p passepartout::*skill-registry*))
@@ -180,7 +189,7 @@ Verifies the skill loader populates ~*skill-registry*~ after daemon start.
Verifies safe shell commands execute and dangerous patterns are blocked.
#+begin_src lisp
(test test-shell-safe-echo
(fiveam:test test-shell-safe-echo
"Contract 5: safe shell command does not crash the daemon."
(with-daemon ()
(multiple-value-bind (stream sock) (daemon-connect)
@@ -191,7 +200,7 @@ Verifies safe shell commands execute and dangerous patterns are blocked.
(usocket:socket-close sock))
(pass))))
(test test-shell-dangerous-blocked
(fiveam:test test-shell-dangerous-blocked
"Contract 5: rm -rf / is blocked by the security dispatcher."
(with-daemon ()
(multiple-value-bind (stream sock) (daemon-connect)
@@ -208,7 +217,7 @@ Verifies safe shell commands execute and dangerous patterns are blocked.
Verifies text input over TCP reaches the pipeline.
#+begin_src lisp
(test test-cli-gateway-input
(fiveam:test test-cli-gateway-input
"Contract 6: text via TCP produces a response."
(with-daemon ()
(multiple-value-bind (stream sock) (daemon-connect)
@@ -225,7 +234,7 @@ Verifies text input over TCP reaches the pipeline.
Verifies the gateway registry function is available after daemon start.
#+begin_src lisp
(test test-gateway-registry
(fiveam:test test-gateway-registry
"Contract 7: gateway-registry-initialize is available."
(with-daemon ()
(is (fboundp 'gateway-registry-initialize))
@@ -252,7 +261,7 @@ credentials. Skipped silently if OPENROUTER_API_KEY is unset.
(format t " [SKIP] ~a not set~%" ,env-var)
(skip "~a not set" ,env-var))))
(test test-provider-openai-request
(fiveam:test test-provider-openai-request
"Contract Phase2: provider-openai-request returns :success with valid API key."
(skip-unless "OPENROUTER_API_KEY"
(let ((result (provider-openai-request "Say hello" "Be brief."
@@ -262,12 +271,26 @@ credentials. Skipped silently if OPENROUTER_API_KEY is unset.
(eq (getf result :status) :error))
"Expected :success or :error, got: ~a" result))))
(test test-backend-cascade-real
(fiveam:test test-backend-cascade-real
"Contract Phase2: backend-cascade-call returns string content with real provider."
(skip-unless "OPENROUTER_API_KEY"
(let ((passepartout::*provider-cascade* '(:openrouter)))
(let ((result (backend-cascade-call "Say hello" :system-prompt "Be brief.")))
(is (stringp result) "Expected string response, got: ~a" result)))))
(fiveam:test test-provider-cascade-parsing
"Contract Phase2: PROVIDER_CASCADE env var parses to clean keywords matching backends."
(provider-cascade-initialize)
(let ((cascade passepartout::*provider-cascade*))
(is (listp cascade) "Cascade must be a list")
(is (>= (length cascade) 1) "Cascade must have at least one entry")
(dolist (entry cascade)
(is (keywordp entry) "Entry ~s must be a keyword" entry)
(let ((name (symbol-name entry)))
(is (not (find #\" name)) "Entry ~s must not contain double-quote" entry)
(is (not (find #\' name)) "Entry ~s must not contain single-quote" entry)))
(is (some (lambda (e) (gethash e passepartout::*probabilistic-backends*)) cascade)
"At least one cascade entry must match a registered backend")))
#+end_src
* Messaging Link/Unlink
@@ -277,7 +300,7 @@ returns the correct status, and messaging-unlink removes it. No real
API credentials needed — these are management functions.
#+begin_src lisp
(test test-messaging-link-unlink
(fiveam:test test-messaging-link-unlink
"Contract Phase2: messaging-link stores token, configured-p returns T, unlink removes it."
(with-daemon ()
(messaging-link :test-platform :token "fake-token-123")
@@ -287,12 +310,12 @@ API credentials needed — these are management functions.
(is (not (gateway-configured-p :test-platform))
"Expected test-platform to be unconfigured after unlinking")))
(test test-gateway-configured-p-false
(fiveam:test test-gateway-configured-p-false
"Contract Phase2: gateway-configured-p returns nil for unknown platform."
(with-daemon ()
(is (not (gateway-configured-p :nonexistent-platform-xyz)))))
(test test-gateway-start-messaging
(fiveam:test test-gateway-start-messaging
"Contract Phase2: gateway registry initializes with expected platforms."
(with-daemon ()
(gateway-registry-initialize)
@@ -333,29 +356,43 @@ run_test() {
}
# ---- Setup ----
echo "Pre-warming FASL cache (speeds up TUI start from ~120s to ~10s)..."
sbcl --noinform --load ~/quicklisp/setup.lisp \
--eval '(ql:quickload :passepartout/tui :silent t)' \
--eval '(uiop:quit)' 2>/dev/null &
WARM_PID=$!
wait $WARM_PID 2>/dev/null
echo " Pre-warm complete"
echo "Starting TUI in tmux (daemon must already be running on port 9105)..."
tmux new-session -d -s tui-test "passepartout tui 2>&1 | tee $TUI_LOG"
for i in $(seq 1 40); do
sleep 3
for i in $(seq 1 15); do
sleep 2
if tmux capture-pane -t tui-test -p 2>/dev/null | grep -q 'Connected v[0-9]'; then
echo " TUI ready after $((i*3))s"
echo " TUI ready after $((i*2))s"
break
fi
if [ "$i" -eq 40 ]; then
echo " WARNING: TUI did not render after 120s"
if [ "$i" -eq 15 ]; then
echo " WARNING: TUI did not render after 30s"
fi
done
# ---- Tests ----
test_agent_responds() {
# Full round-trip: TUI → daemon → pipeline → TUI.
# Uses tmux capture-pane to read the rendered screen.
# Full round-trip: TUI → daemon → LLM → daemon → TUI.
# Must contain a real agent response (⬇), NOT a cascade failure.
local before_ts
before_ts=$(date +%s)
tmux send-keys -t tui-test "hello" Enter
tmux send-keys -t tui-test "Say hello in one word" Enter
while true; do
if tmux capture-pane -t tui-test -p -S -50 2>/dev/null | grep -q '⬇.*[a-zA-Z]\{3,\}'; then
local pane
pane=$(tmux capture-pane -t tui-test -p -S -60 2>/dev/null)
if echo "$pane" | grep -q '⬇.*[a-zA-Z]\{3,\}'; then
if echo "$pane" | grep '⬇' | grep -qi 'cascade.*fail\|exhausted\|neural cascade'; then
echo "FAIL: agent responded with cascade failure, not LLM content" >&2
return 1
fi
return 0
fi
local now_ts
@@ -368,12 +405,15 @@ test_agent_responds() {
done
}
test_agent_not_cascade_failure() {
if tmux capture-pane -t tui-test -p -S -50 2>/dev/null | grep '⬇' | grep -qi 'cascade.*fail\|exhausted\|neural cascade'; then
echo "NOTE: LLM cascade failure — no API key configured (warning only)" >&2
WARN=$((WARN + 1))
fi
return 0
test_cascade_parsing() {
# Via /eval, check that *provider-cascade* contains clean keywords.
# This catches the cl-dotenv quote contamination bug.
tmux send-keys -t tui-test "/eval *provider-cascade*" Enter
sleep 3
local pane
pane=$(tmux capture-pane -t tui-test -p -S -15 2>/dev/null)
# Must contain keyword syntax :SOMETHING (not "SOMETHING with quotes)
echo "$pane" | grep -q ':DEEPSEEK\|:OPENROUTER\|:OPENAI\|:ANTHROPIC\|:GROQ\|:GEMINI\|:NVIDIA'
}
test_eval_command() {
@@ -393,7 +433,7 @@ test_connection_drop() {
}
run_test "agent-responds" test_agent_responds
run_test "agent-not-cascade-fail" test_agent_not_cascade_failure
run_test "cascade-parsing" test_cascade_parsing
run_test "eval-command" test_eval_command
run_test "status-bar" test_status_bar
run_test "connection-drop" test_connection_drop
@@ -409,7 +449,7 @@ exit $(( FAIL > 0 ? 1 : 0 ))
Verifies Flight Plan message format and Emacs daemon connectivity.
#+begin_src lisp
(test test-flight-plan-message-format
(fiveam:test test-flight-plan-message-format
"Contract Phase3: dispatcher-flight-plan-create returns valid message."
(with-daemon ()
(load (merge-pathnames ".local/share/passepartout/lisp/security-dispatcher.lisp"
@@ -424,7 +464,7 @@ Verifies Flight Plan message format and Emacs daemon connectivity.
(is (string= "PLAN" (getf attrs :TODO)))
(is (member "FLIGHT_PLAN" (getf attrs :TAGS) :test #'string-equal))))))
(test test-emacs-daemon-connect
(fiveam:test test-emacs-daemon-connect
"Contract Phase3: Emacs daemon is reachable via emacsclient."
(handler-case
(let ((result (uiop:run-program '("emacsclient" "--eval" "(+ 1 2)")
@@ -432,5 +472,5 @@ Verifies Flight Plan message format and Emacs daemon connectivity.
:ignore-error-status t)))
(is (search "3" result) "Expected '3' from emacsclient, got: ~a" result))
(error (c)
(skip "Emacs daemon not available: ~a" c))))
(skip "Emacs daemon not available: ~a" c)))))
#+end_src