FEAT: Implement Playwright-Python Bridge for high-fidelity browsing
This commit is contained in:
39
docs/rca/rca-playwright-bridge.org
Normal file
39
docs/rca/rca-playwright-bridge.org
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
#+TITLE: Root Cause Analysis: Playwright-Python Bridge (High-Fidelity Browsing)
|
||||||
|
#+DATE: 2026-04-11
|
||||||
|
#+FILETAGS: :rca:intelligence:browsing:automation:psf:
|
||||||
|
|
||||||
|
* Executive Summary
|
||||||
|
Successfully implemented a high-fidelity browsing bridge using Playwright and Python. This allows the `org-agent` to interact with modern, JavaScript-rendered web applications that were previously inaccessible via simple HTTP clients.
|
||||||
|
|
||||||
|
* 1. Architectural Strategy: The I/O Bridge
|
||||||
|
** Problem
|
||||||
|
Common Lisp lacks a mature, native Playwright implementation. Direct bindings are complex and fragile.
|
||||||
|
** Resolution
|
||||||
|
Implemented a **JSON-over-STDIO Bridge**.
|
||||||
|
- A standalone Python script (`browser-bridge.py`) manages the Playwright lifecycle and Chromium instance.
|
||||||
|
- The Lisp kernel communicates with this script using `uiop:run-program`, passing parameters via `stdin` and receiving structured results via `stdout`. This provides a stable, decoupled interface.
|
||||||
|
|
||||||
|
* 2. Environment & Dependency Management
|
||||||
|
** Issue
|
||||||
|
Playwright requires a specific version of Chromium and several system-level libraries not present in the base Debian image.
|
||||||
|
** Resolution
|
||||||
|
Updated the `Dockerfile` to:
|
||||||
|
1. Install Python3, pip, and venv.
|
||||||
|
2. Create a virtual environment for isolated dependency management.
|
||||||
|
3. Install the `playwright` package and execute `playwright install --with-deps chromium` during the image build. This ensures the production container is ready for high-fidelity browsing immediately upon startup.
|
||||||
|
|
||||||
|
* 3. Cognitive Tooling
|
||||||
|
Created the `:browser` cognitive tool, which exposes three primary capabilities to System 1:
|
||||||
|
- **Navigation:** Full JS rendering and waiting for network idle.
|
||||||
|
- **Extraction:** Targeted text retrieval via CSS selectors.
|
||||||
|
- **Vision:** Base64-encoded screenshot capture for future multimodal processing.
|
||||||
|
|
||||||
|
* 4. PSF Mandate Alignment
|
||||||
|
** Zero-Bloat (Managed)
|
||||||
|
While adding Playwright increases the image size, it is a "Complexity Earned" trade-off that dramatically expands the agent's capability frontier.
|
||||||
|
** Literate Granularity
|
||||||
|
The `org-skill-playwright.org` file strictly follows the "one definition per block" mandate.
|
||||||
|
|
||||||
|
* 5. Permanent Learnings
|
||||||
|
- **Inter-Process JSON:** JSON is the ideal lingua franca for Lisp-Python bridges.
|
||||||
|
- **Path Portability:** Always use `uiop:native-namestring` when passing Lisp paths to external shell commands to ensure OS compatibility.
|
||||||
@@ -24,7 +24,8 @@
|
|||||||
(:file "src/core")
|
(:file "src/core")
|
||||||
(:file "src/gateway-telegram")
|
(:file "src/gateway-telegram")
|
||||||
(:file "src/gateway-signal")
|
(:file "src/gateway-signal")
|
||||||
(:file "src/gateway-matrix"))
|
(:file "src/gateway-matrix")
|
||||||
|
(:file "src/playwright"))
|
||||||
:build-operation "program-op"
|
:build-operation "program-op"
|
||||||
:build-pathname "org-agent-server"
|
:build-pathname "org-agent-server"
|
||||||
:entry-point "org-agent:main")
|
:entry-point "org-agent:main")
|
||||||
@@ -47,6 +48,7 @@
|
|||||||
(:file "tests/gateway-telegram-tests")
|
(:file "tests/gateway-telegram-tests")
|
||||||
(:file "tests/gateway-signal-tests")
|
(:file "tests/gateway-signal-tests")
|
||||||
(:file "tests/gateway-matrix-tests")
|
(:file "tests/gateway-matrix-tests")
|
||||||
|
(:file "tests/playwright-tests")
|
||||||
(:file "tests/chaos-qa"))
|
(:file "tests/chaos-qa"))
|
||||||
:perform (test-op (o s)
|
:perform (test-op (o s)
|
||||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :oacp-suite :org-agent-tests))
|
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :oacp-suite :org-agent-tests))
|
||||||
@@ -66,4 +68,5 @@
|
|||||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :gateway-telegram-suite :org-agent-gateway-telegram-tests))
|
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :gateway-telegram-suite :org-agent-gateway-telegram-tests))
|
||||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :gateway-signal-suite :org-agent-gateway-signal-tests))
|
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :gateway-signal-suite :org-agent-gateway-signal-tests))
|
||||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :gateway-matrix-suite :org-agent-gateway-matrix-tests))
|
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :gateway-matrix-suite :org-agent-gateway-matrix-tests))
|
||||||
|
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :playwright-suite :org-agent-playwright-tests))
|
||||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :chaos-suite :org-agent-chaos-qa))))
|
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :chaos-suite :org-agent-chaos-qa))))
|
||||||
|
|||||||
54
scripts/browser-bridge.py
Normal file
54
scripts/browser-bridge.py
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import base64
|
||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
|
||||||
|
def run_bridge():
|
||||||
|
# Read command from stdin
|
||||||
|
try:
|
||||||
|
raw_input = sys.stdin.read()
|
||||||
|
if not raw_input:
|
||||||
|
print(json.dumps({"status": "error", "message": "No input provided"}))
|
||||||
|
return
|
||||||
|
|
||||||
|
args = json.loads(raw_input)
|
||||||
|
except Exception as e:
|
||||||
|
print(json.dumps({"status": "error", "message": f"Invalid JSON input: {str(e)}"}))
|
||||||
|
return
|
||||||
|
|
||||||
|
url = args.get("url")
|
||||||
|
action = args.get("action", "extract_text")
|
||||||
|
selector = args.get("selector", "body")
|
||||||
|
|
||||||
|
if not url:
|
||||||
|
print(json.dumps({"status": "error", "message": "No URL provided"}))
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
with sync_playwright() as p:
|
||||||
|
browser = p.chromium.launch(headless=True)
|
||||||
|
page = browser.new_page()
|
||||||
|
|
||||||
|
# Navigate and wait for network to be idle
|
||||||
|
page.goto(url, wait_until="networkidle")
|
||||||
|
|
||||||
|
result = {"status": "success", "url": url}
|
||||||
|
|
||||||
|
if action == "extract_text":
|
||||||
|
result["content"] = page.inner_text(selector)
|
||||||
|
elif action == "screenshot":
|
||||||
|
screenshot_bytes = page.screenshot()
|
||||||
|
result["screenshot_base64"] = base64.b64encode(screenshot_bytes).decode("utf-8")
|
||||||
|
else:
|
||||||
|
result["status"] = "error"
|
||||||
|
result["message"] = f"Unknown action: {action}"
|
||||||
|
|
||||||
|
browser.close()
|
||||||
|
print(json.dumps(result))
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(json.dumps({"status": "error", "message": f"Playwright Error: {str(e)}"}))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run_bridge()
|
||||||
96
skills/org-skill-playwright.org
Normal file
96
skills/org-skill-playwright.org
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
:PROPERTIES:
|
||||||
|
:ID: playwright-bridge-skill
|
||||||
|
:CREATED: [2026-04-11 Sat 18:00]
|
||||||
|
:END:
|
||||||
|
#+TITLE: SKILL: Playwright-Python Bridge (Universal Literate Note)
|
||||||
|
#+STARTUP: content
|
||||||
|
#+FILETAGS: :intelligence:browsing:automation:psf:
|
||||||
|
|
||||||
|
* Overview
|
||||||
|
The *Playwright Bridge* provides high-fidelity web browsing capabilities by wrapping a headless Chromium instance managed via Python. It allows the agent to interact with JavaScript-heavy applications that are inaccessible to standard HTTP clients.
|
||||||
|
|
||||||
|
* Phase A: Demand (PRD)
|
||||||
|
:PROPERTIES:
|
||||||
|
:STATUS: SIGNED
|
||||||
|
:END:
|
||||||
|
|
||||||
|
** 1. Purpose
|
||||||
|
Enable the agent to "see" and "read" the modern web by executing JavaScript and waiting for network idle states.
|
||||||
|
|
||||||
|
** 2. Success Criteria
|
||||||
|
- [ ] *Interaction:* Can navigate to any URL and wait for full page rendering.
|
||||||
|
- [ ] *Extraction:* Can retrieve inner text from any CSS selector.
|
||||||
|
- [ ] *Vision:* Can take base64-encoded screenshots of rendered pages.
|
||||||
|
|
||||||
|
* Phase B: Blueprint (PROTOCOL)
|
||||||
|
:PROPERTIES:
|
||||||
|
:STATUS: SIGNED
|
||||||
|
:END:
|
||||||
|
|
||||||
|
** 1. Architectural Intent
|
||||||
|
Uses a "JSON Bridge" over standard I/O. The Lisp kernel executes a standalone Python script, passing parameters via `stdin` and receiving structured results via `stdout`.
|
||||||
|
|
||||||
|
** 2. Semantic Interfaces
|
||||||
|
- `(:target :tool :action :call :tool "browser" :args (:url "..." :action "extract_text"))`
|
||||||
|
|
||||||
|
* Phase D: Build (Implementation)
|
||||||
|
|
||||||
|
** Package Context
|
||||||
|
#+begin_src lisp :tangle ../src/playwright.lisp
|
||||||
|
(in-package :org-agent)
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
** Bridge Script Path
|
||||||
|
Calculates the location of the Python bridge script relative to the project root.
|
||||||
|
|
||||||
|
#+begin_src lisp :tangle ../src/playwright.lisp
|
||||||
|
(defun get-browser-bridge-path ()
|
||||||
|
"Returns the absolute path to the Python browser bridge script."
|
||||||
|
(let ((root (or (uiop:getenv "PROJECT_ROOT") (uiop:native-namestring (uiop:getcwd)))))
|
||||||
|
(merge-pathnames "scripts/browser-bridge.py" (uiop:ensure-directory-pathname root))))
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
** Execution Wrapper (execute-browser-command)
|
||||||
|
Invokes the Python bridge and parses its JSON output.
|
||||||
|
|
||||||
|
#+begin_src lisp :tangle ../src/playwright.lisp
|
||||||
|
(defun execute-browser-command (args)
|
||||||
|
"Invokes the Playwright Python bridge with the provided arguments."
|
||||||
|
(let* ((script-path (get-browser-bridge-path))
|
||||||
|
(json-input (cl-json:encode-json-to-string args)))
|
||||||
|
(handler-case
|
||||||
|
(let ((output (uiop:run-program (list "python3" (uiop:native-namestring script-path))
|
||||||
|
:input (make-string-input-stream json-input)
|
||||||
|
:output :string
|
||||||
|
:error-output :string)))
|
||||||
|
(cl-json:decode-json-from-string output))
|
||||||
|
(error (c)
|
||||||
|
(list :status "error" :message (format nil "Bridge Execution Failed: ~a" c))))))
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
** Cognitive Tool: Browser
|
||||||
|
Register the high-fidelity browsing tool with the kernel.
|
||||||
|
|
||||||
|
#+begin_src lisp :tangle ../src/playwright.lisp
|
||||||
|
(def-cognitive-tool :browser
|
||||||
|
"High-fidelity web browsing via Playwright (Chromium). Supports JS rendering."
|
||||||
|
((:url :type :string :description "The target URL")
|
||||||
|
(:action :type :string :description "Action to perform: 'extract_text' or 'screenshot'")
|
||||||
|
(:selector :type :string :description "Optional CSS selector (default: 'body')"))
|
||||||
|
:body (lambda (args)
|
||||||
|
(let ((result (execute-browser-command args)))
|
||||||
|
(if (string= (cdr (assoc :status result)) "success")
|
||||||
|
(or (cdr (assoc :content result))
|
||||||
|
(cdr (assoc :screenshot--base64 result))
|
||||||
|
"Success (no content returned)")
|
||||||
|
(format nil "BROWSER ERROR: ~a" (cdr (assoc :message result)))))))
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
** Registration: Skill
|
||||||
|
#+begin_src lisp :tangle ../src/playwright.lisp
|
||||||
|
(defskill :skill-playwright
|
||||||
|
:priority 150
|
||||||
|
:trigger (lambda (ctx) (declare (ignore ctx)) nil) ; Passive tool provider
|
||||||
|
:neuro nil
|
||||||
|
:symbolic (lambda (action ctx) (declare (ignore ctx)) action))
|
||||||
|
#+end_src
|
||||||
38
src/playwright.lisp
Normal file
38
src/playwright.lisp
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
(in-package :org-agent)
|
||||||
|
|
||||||
|
(defun get-browser-bridge-path ()
|
||||||
|
"Returns the absolute path to the Python browser bridge script."
|
||||||
|
(let ((root (or (uiop:getenv "PROJECT_ROOT") (uiop:native-namestring (uiop:getcwd)))))
|
||||||
|
(merge-pathnames "scripts/browser-bridge.py" (uiop:ensure-directory-pathname root))))
|
||||||
|
|
||||||
|
(defun execute-browser-command (args)
|
||||||
|
"Invokes the Playwright Python bridge with the provided arguments."
|
||||||
|
(let* ((script-path (get-browser-bridge-path))
|
||||||
|
(json-input (cl-json:encode-json-to-string args)))
|
||||||
|
(handler-case
|
||||||
|
(let ((output (uiop:run-program (list "python3" (uiop:native-namestring script-path))
|
||||||
|
:input (make-string-input-stream json-input)
|
||||||
|
:output :string
|
||||||
|
:error-output :string)))
|
||||||
|
(cl-json:decode-json-from-string output))
|
||||||
|
(error (c)
|
||||||
|
(list :status "error" :message (format nil "Bridge Execution Failed: ~a" c))))))
|
||||||
|
|
||||||
|
(def-cognitive-tool :browser
|
||||||
|
"High-fidelity web browsing via Playwright (Chromium). Supports JS rendering."
|
||||||
|
((:url :type :string :description "The target URL")
|
||||||
|
(:action :type :string :description "Action to perform: 'extract_text' or 'screenshot'")
|
||||||
|
(:selector :type :string :description "Optional CSS selector (default: 'body')"))
|
||||||
|
:body (lambda (args)
|
||||||
|
(let ((result (execute-browser-command args)))
|
||||||
|
(if (string= (cdr (assoc :status result)) "success")
|
||||||
|
(or (cdr (assoc :content result))
|
||||||
|
(cdr (assoc :screenshot--base64 result))
|
||||||
|
"Success (no content returned)")
|
||||||
|
(format nil "BROWSER ERROR: ~a" (cdr (assoc :message result)))))))
|
||||||
|
|
||||||
|
(defskill :skill-playwright
|
||||||
|
:priority 150
|
||||||
|
:trigger (lambda (ctx) (declare (ignore ctx)) nil) ; Passive tool provider
|
||||||
|
:neuro nil
|
||||||
|
:symbolic (lambda (action ctx) (declare (ignore ctx)) action))
|
||||||
45
tests/playwright-tests.lisp
Normal file
45
tests/playwright-tests.lisp
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
(defpackage :org-agent-playwright-tests
|
||||||
|
(:use :cl :fiveam :org-agent)
|
||||||
|
(:export #:playwright-suite))
|
||||||
|
(in-package :org-agent-playwright-tests)
|
||||||
|
|
||||||
|
(def-suite playwright-suite :description "Tests for Playwright Browser Bridge.")
|
||||||
|
(in-suite playwright-suite)
|
||||||
|
|
||||||
|
(test test-browser-bridge-success
|
||||||
|
"Verify that successful bridge output is parsed correctly."
|
||||||
|
(let ((old-run-program (symbol-function 'uiop:run-program))
|
||||||
|
(mock-output "{\"status\": \"success\", \"url\": \"https://example.com\", \"content\": \"Example Domain Content\"}"))
|
||||||
|
(unwind-protect
|
||||||
|
(progn
|
||||||
|
(setf (symbol-function 'uiop:run-program)
|
||||||
|
(lambda (cmd &key input output error-output)
|
||||||
|
(declare (ignore cmd input output error-output))
|
||||||
|
mock-output))
|
||||||
|
|
||||||
|
(let ((result (org-agent::execute-browser-command '((:url . "https://example.com")))))
|
||||||
|
(is (equal "success" (cdr (assoc :status result))))
|
||||||
|
(is (equal "Example Domain Content" (cdr (assoc :content result))))))
|
||||||
|
(setf (symbol-function 'uiop:run-program) old-run-program))))
|
||||||
|
|
||||||
|
(test test-browser-bridge-error
|
||||||
|
"Verify that bridge errors are captured."
|
||||||
|
(let ((old-run-program (symbol-function 'uiop:run-program))
|
||||||
|
(mock-output "{\"status\": \"error\", \"message\": \"Page Load Timeout\"}"))
|
||||||
|
(unwind-protect
|
||||||
|
(progn
|
||||||
|
(setf (symbol-function 'uiop:run-program)
|
||||||
|
(lambda (cmd &key input output error-output)
|
||||||
|
(declare (ignore cmd input output error-output))
|
||||||
|
mock-output))
|
||||||
|
|
||||||
|
(let ((result (org-agent::execute-browser-command '((:url . "https://broken.com")))))
|
||||||
|
(is (equal "error" (cdr (assoc :status result))))
|
||||||
|
(is (equal "Page Load Timeout" (cdr (assoc :message result))))))
|
||||||
|
(setf (symbol-function 'uiop:run-program) old-run-program))))
|
||||||
|
|
||||||
|
(test test-browser-tool-registration
|
||||||
|
"Verify that the :browser tool is correctly registered."
|
||||||
|
(let ((tool (gethash "browser" org-agent::*cognitive-tools*)))
|
||||||
|
(is (not (null tool)))
|
||||||
|
(is (search "High-fidelity" (org-agent::cognitive-tool-description tool)))))
|
||||||
Reference in New Issue
Block a user