fix: pre-warm in setup, TUI rendering diagnostics

- passepartout setup: add pre-compile step for :passepartout + :passepartout/tui. So first daemon/TUI start is fast (~10s instead of ~120s). - TUI test: remove pre-warm (now in setup). Add 3 rendering diagnostics: * add-msg-render: /eval injects agent msg, verify text on screen (isolates TUI rendering from daemon) * daemon-msg-roundtrip: wait for LLM, check via /eval that :agent entry exists in :messages list (isolates daemon\xe2\x86\x92TUI comm) * agent-response-renders: full E2E \xe2\x80\x94 LLM response text on screen (confirms complete TUI\xe2\x86\x92daemon\xe2\x86\x92LLM\xe2\x86\x92TUI pipeline) - Fix missing #+end_src in shell block (was preventing tangle) - Update Contract section with new Phase 3 diagnostic items - Test: 7/7 pass (was 5/5)
2026-05-06 09:20:42 -04:00
parent 7c9cc629a1
commit 7d7a4be668
3 changed files with 149 additions and 91 deletions
--- a/org/system-integration-tests.org
+++ b/org/system-integration-tests.org
@@ -28,14 +28,18 @@ Phase 2 — LLM + messaging:
     matching registered backends (no quote contamination).
  9. Backend cascade: real provider returns string content.
-Phase 3 — TUI via tmux:
+Phase 3 — TUI via tmux (rendering diagnostics):
-  10. Agent response: TUI ↛ daemon ↛ LLM round-trip produces non-cascade
+  10. Cascade inspection: ~/eval *provider-cascade*~ shows clean keywords
-      agent text on screen.
+      on TUI screen (no quote artifacts from cl-dotenv).
-  11. Cascade inspection: ~/eval *provider-cascade*~ shows clean keywords
+  11. Eval command: ~/eval (+ 1 2)~ displays ~~=> 3~~ on screen.
-      on TUI screen (no quote artifacts).
+  12. Status bar: rendered screen shows ~~msgs:~~ in status bar.
-  12. Eval command: ~/eval (+ 1 2)~ displays ~~=> 3~~ on screen.
+  13. Direct render: ~/eval (add-msg :agent ...)~ renders text on screen
-  13. Status bar: rendered screen shows ~~msgs:~~ in status bar.
+      independent of daemon — isolates TUI rendering from pipeline.
  14. Daemon roundtrip: daemon LLM response stored in TUI ~~:messages~~
      list as ~~:agent~~ entry — isolates daemon→TUI communication.
  15. Full render: agent response text appears on rendered screen
      after LLM roundtrip — tests complete TUI→daemon→LLM→TUI pipeline.
 ** Boundaries
@@ -356,14 +360,6 @@ run_test() {
 }
 # ---- Setup ----
 echo "Pre-warming FASL cache (speeds up TUI start from ~120s to ~10s)..."
 sbcl --noinform --load ~/quicklisp/setup.lisp \
     --eval '(ql:quickload :passepartout/tui :silent t)' \
     --eval '(uiop:quit)' 2>/dev/null &
 WARM_PID=$!
 wait $WARM_PID 2>/dev/null
 echo "  Pre-warm complete"
 echo "Starting TUI in tmux (daemon must already be running on port 9105)..."
 tmux new-session -d -s tui-test "passepartout tui 2>&1 | tee $TUI_LOG"
 for i in $(seq 1 20); do
@@ -379,41 +375,12 @@ done
 # ---- Tests ----
 test_agent_responds() {
  # Full round-trip: TUI → daemon → LLM → daemon → TUI.
  # Looks for actual response text on screen (not just the ⬇ marker).
  local before_ts
  before_ts=$(date +%s)
  tmux send-keys -t tui-test "Say hello in one word" Enter
  while true; do
    local pane
    pane=$(tmux capture-pane -t tui-test -p -S -60 2>/dev/null)
    # LLM response should contain recognizable text, not cascade failure
    if echo "$pane" | grep -qi 'hello\|hi there\|greeting\|hi[.!?]\|hey[.!?]'; then
      if echo "$pane" | grep -qi 'cascade.*fail\|exhausted\|neural cascade'; then
        echo "FAIL: agent responded with cascade failure, not LLM content" >&2
        return 1
      fi
      return 0
    fi
    local now_ts
    now_ts=$(date +%s)
    if (( now_ts - before_ts > 90 )); then
      echo "TIMEOUT: no agent response after 90s" >&2
      return 1
    fi
    sleep 3
  done
 }
 test_cascade_parsing() {
  # Via /eval, check that *provider-cascade* contains clean keywords.
  # This catches the cl-dotenv quote contamination bug.
  tmux send-keys -t tui-test "/eval *provider-cascade*" Enter
  sleep 3
  local pane
  pane=$(tmux capture-pane -t tui-test -p -S -15 2>/dev/null)
  # Must contain keyword syntax :SOMETHING (not "SOMETHING with quotes)
  echo "$pane" | grep -q ':DEEPSEEK\|:OPENROUTER\|:OPENAI\|:ANTHROPIC\|:GROQ\|:GEMINI\|:NVIDIA'
 }
@@ -427,6 +394,64 @@ test_status_bar() {
  tmux capture-pane -t tui-test -p -S -20 2>/dev/null | grep -q 'msgs:'
 }
 # ---- Diagnostic: rendering pipeline isolation ----
 test_add_msg_render() {
  # Stage A: can the TUI render an agent message at all?
  # Inject a message directly via /eval — bypasses daemon entirely.
  tmux send-keys -t tui-test "/eval (passepartout.gateway-tui:add-msg :agent \"RENDER-TEST-OK\")" Enter
  sleep 2
  tmux capture-pane -t tui-test -p -S -10 2>/dev/null | grep -q 'RENDER-TEST-OK'
 }
 test_daemon_msg_roundtrip() {
  # Stage B: does the daemon's LLM response reach the TUI's message list?
  # Sends a message, waits, then checks via /eval that an :agent message exists.
  tmux send-keys -t tui-test "Say hello" Enter
  local before_ts
  before_ts=$(date +%s)
  while true; do
    local result
    result=$(tmux send-keys -t tui-test "/eval (loop for m in (passepartout.gateway-tui:st :messages) when (eq :agent (getf m :role)) return t)" Enter 2>/dev/null; sleep 3; tmux capture-pane -t tui-test -p -S -15 2>/dev/null | grep -o '=> [^ ]*' | tail -1)
    if echo "$result" | grep -q '=> T'; then
      return 0
    fi
    local now_ts
    now_ts=$(date +%s)
    if (( now_ts - before_ts > 90 )); then
      echo "TIMEOUT: no :agent msg in message list after 90s" >&2
      return 1
    fi
    sleep 3
  done
 }
 test_agent_response_renders() {
  # Stage C: full end-to-end — LLM response appears on the rendered screen.
  # Must show actual response text, not a cascade failure.
  local before_ts
  before_ts=$(date +%s)
  tmux send-keys -t tui-test "Say hello in one word" Enter
  while true; do
    local pane
    pane=$(tmux capture-pane -t tui-test -p -S -60 2>/dev/null)
    if echo "$pane" | grep -qi 'hello\|hi there\|greeting\|hi[.!?]\|hey[.!?]'; then
      if echo "$pane" | grep -qi 'cascade.*fail\|exhausted\|neural cascade'; then
        echo "FAIL: agent responded with cascade failure, not LLM content" >&2
        return 1
      fi
      return 0
    fi
    local now_ts
    now_ts=$(date +%s)
    if (( now_ts - before_ts > 90 )); then
      echo "TIMEOUT: no agent response on screen after 90s" >&2
      return 1
    fi
    sleep 3
  done
 }
 test_connection_drop() {
  sleep 1
  tmux capture-pane -t tui-test -p -S -10 2>/dev/null | grep -qi 'connection.*lost\|ERROR.*Connection\|error.*connect' || true
@@ -436,7 +461,9 @@ test_connection_drop() {
 run_test "cascade-parsing"         test_cascade_parsing
 run_test "eval-command"            test_eval_command
 run_test "status-bar"              test_status_bar
-run_test "agent-responds"       test_agent_responds
+run_test "add-msg-render"          test_add_msg_render
 run_test "daemon-msg-roundtrip"    test_daemon_msg_roundtrip
 run_test "agent-response-renders"   test_agent_response_renders
 run_test "connection-drop"         test_connection_drop
 # ---- Summary ----
--- a/8
+++ b/8
@@ -124,6 +124,14 @@ setup_system() {
        esac
    fi
    # Pre-compile core + TUI so first daemon/TUI start is fast
    echo -e "${YELLOW}--- Pre-compiling core system ---${NC}"
    sbcl --noinform --load "$HOME/quicklisp/setup.lisp" \
         --eval "(push (truename \"$PASSEPARTOUT_DATA_DIR/\") asdf:*central-registry*)" \
         --eval '(ql:quickload :passepartout)' \
         --eval '(ql:quickload :passepartout/tui :silent t)' \
         --eval '(uiop:quit)' 2>&1 | grep -v '^;' || true
    if [ "$NON_INTERACTIVE" = true ]; then
        echo "Configure complete."
        exit 0
--- a/test/integration-tui.sh
+++ b/test/integration-tui.sh
@@ -25,14 +25,6 @@ run_test() {
 }
 # ---- Setup ----
 echo "Pre-warming FASL cache (speeds up TUI start from ~120s to ~10s)..."
 sbcl --noinform --load ~/quicklisp/setup.lisp \
     --eval '(ql:quickload :passepartout/tui :silent t)' \
     --eval '(uiop:quit)' 2>/dev/null &
 WARM_PID=$!
 wait $WARM_PID 2>/dev/null
 echo "  Pre-warm complete"
 echo "Starting TUI in tmux (daemon must already be running on port 9105)..."
 tmux new-session -d -s tui-test "passepartout tui 2>&1 | tee $TUI_LOG"
 for i in $(seq 1 20); do
@@ -48,41 +40,12 @@ done
 # ---- Tests ----
 test_agent_responds() {
  # Full round-trip: TUI → daemon → LLM → daemon → TUI.
  # Looks for actual response text on screen (not just the ⬇ marker).
  local before_ts
  before_ts=$(date +%s)
  tmux send-keys -t tui-test "Say hello in one word" Enter
  while true; do
    local pane
    pane=$(tmux capture-pane -t tui-test -p -S -60 2>/dev/null)
    # LLM response should contain recognizable text, not cascade failure
    if echo "$pane" | grep -qi 'hello\|hi there\|greeting\|hi[.!?]\|hey[.!?]'; then
      if echo "$pane" | grep -qi 'cascade.*fail\|exhausted\|neural cascade'; then
        echo "FAIL: agent responded with cascade failure, not LLM content" >&2
        return 1
      fi
      return 0
    fi
    local now_ts
    now_ts=$(date +%s)
    if (( now_ts - before_ts > 90 )); then
      echo "TIMEOUT: no agent response after 90s" >&2
      return 1
    fi
    sleep 3
  done
 }
 test_cascade_parsing() {
  # Via /eval, check that *provider-cascade* contains clean keywords.
  # This catches the cl-dotenv quote contamination bug.
  tmux send-keys -t tui-test "/eval *provider-cascade*" Enter
  sleep 3
  local pane
  pane=$(tmux capture-pane -t tui-test -p -S -15 2>/dev/null)
  # Must contain keyword syntax :SOMETHING (not "SOMETHING with quotes)
  echo "$pane" | grep -q ':DEEPSEEK\|:OPENROUTER\|:OPENAI\|:ANTHROPIC\|:GROQ\|:GEMINI\|:NVIDIA'
 }
@@ -96,6 +59,64 @@ test_status_bar() {
  tmux capture-pane -t tui-test -p -S -20 2>/dev/null | grep -q 'msgs:'
 }
 # ---- Diagnostic: rendering pipeline isolation ----
 test_add_msg_render() {
  # Stage A: can the TUI render an agent message at all?
  # Inject a message directly via /eval — bypasses daemon entirely.
  tmux send-keys -t tui-test "/eval (passepartout.gateway-tui:add-msg :agent \"RENDER-TEST-OK\")" Enter
  sleep 2
  tmux capture-pane -t tui-test -p -S -10 2>/dev/null | grep -q 'RENDER-TEST-OK'
 }
 test_daemon_msg_roundtrip() {
  # Stage B: does the daemon's LLM response reach the TUI's message list?
  # Sends a message, waits, then checks via /eval that an :agent message exists.
  tmux send-keys -t tui-test "Say hello" Enter
  local before_ts
  before_ts=$(date +%s)
  while true; do
    local result
    result=$(tmux send-keys -t tui-test "/eval (loop for m in (passepartout.gateway-tui:st :messages) when (eq :agent (getf m :role)) return t)" Enter 2>/dev/null; sleep 3; tmux capture-pane -t tui-test -p -S -15 2>/dev/null | grep -o '=> [^ ]*' | tail -1)
    if echo "$result" | grep -q '=> T'; then
      return 0
    fi
    local now_ts
    now_ts=$(date +%s)
    if (( now_ts - before_ts > 90 )); then
      echo "TIMEOUT: no :agent msg in message list after 90s" >&2
      return 1
    fi
    sleep 3
  done
 }
 test_agent_response_renders() {
  # Stage C: full end-to-end — LLM response appears on the rendered screen.
  # Must show actual response text, not a cascade failure.
  local before_ts
  before_ts=$(date +%s)
  tmux send-keys -t tui-test "Say hello in one word" Enter
  while true; do
    local pane
    pane=$(tmux capture-pane -t tui-test -p -S -60 2>/dev/null)
    if echo "$pane" | grep -qi 'hello\|hi there\|greeting\|hi[.!?]\|hey[.!?]'; then
      if echo "$pane" | grep -qi 'cascade.*fail\|exhausted\|neural cascade'; then
        echo "FAIL: agent responded with cascade failure, not LLM content" >&2
        return 1
      fi
      return 0
    fi
    local now_ts
    now_ts=$(date +%s)
    if (( now_ts - before_ts > 90 )); then
      echo "TIMEOUT: no agent response on screen after 90s" >&2
      return 1
    fi
    sleep 3
  done
 }
 test_connection_drop() {
  sleep 1
  tmux capture-pane -t tui-test -p -S -10 2>/dev/null | grep -qi 'connection.*lost\|ERROR.*Connection\|error.*connect' || true
@@ -105,7 +126,9 @@ test_connection_drop() {
 run_test "cascade-parsing"         test_cascade_parsing
 run_test "eval-command"            test_eval_command
 run_test "status-bar"              test_status_bar
-run_test "agent-responds"       test_agent_responds
+run_test "add-msg-render"          test_add_msg_render
 run_test "daemon-msg-roundtrip"    test_daemon_msg_roundtrip
 run_test "agent-response-renders"   test_agent_response_renders
 run_test "connection-drop"         test_connection_drop
 # ---- Summary ----