From 397fcc5e8c729396937859b28c1860f9ce13f55f Mon Sep 17 00:00:00 2001 From: Amr Gharbeia Date: Sat, 11 Apr 2026 16:58:23 -0400 Subject: [PATCH] FEAT: Implement Playwright-Python Bridge for high-fidelity browsing --- docs/rca/rca-playwright-bridge.org | 39 ++++++++++++ org-agent.asd | 5 +- scripts/browser-bridge.py | 54 +++++++++++++++++ skills/org-skill-playwright.org | 96 ++++++++++++++++++++++++++++++ src/playwright.lisp | 38 ++++++++++++ tests/playwright-tests.lisp | 45 ++++++++++++++ 6 files changed, 276 insertions(+), 1 deletion(-) create mode 100644 docs/rca/rca-playwright-bridge.org create mode 100644 scripts/browser-bridge.py create mode 100644 skills/org-skill-playwright.org create mode 100644 src/playwright.lisp create mode 100644 tests/playwright-tests.lisp diff --git a/docs/rca/rca-playwright-bridge.org b/docs/rca/rca-playwright-bridge.org new file mode 100644 index 0000000..a474965 --- /dev/null +++ b/docs/rca/rca-playwright-bridge.org @@ -0,0 +1,39 @@ +#+TITLE: Root Cause Analysis: Playwright-Python Bridge (High-Fidelity Browsing) +#+DATE: 2026-04-11 +#+FILETAGS: :rca:intelligence:browsing:automation:psf: + +* Executive Summary +Successfully implemented a high-fidelity browsing bridge using Playwright and Python. This allows the `org-agent` to interact with modern, JavaScript-rendered web applications that were previously inaccessible via simple HTTP clients. + +* 1. Architectural Strategy: The I/O Bridge +** Problem +Common Lisp lacks a mature, native Playwright implementation. Direct bindings are complex and fragile. +** Resolution +Implemented a **JSON-over-STDIO Bridge**. +- A standalone Python script (`browser-bridge.py`) manages the Playwright lifecycle and Chromium instance. +- The Lisp kernel communicates with this script using `uiop:run-program`, passing parameters via `stdin` and receiving structured results via `stdout`. This provides a stable, decoupled interface. + +* 2. Environment & Dependency Management +** Issue +Playwright requires a specific version of Chromium and several system-level libraries not present in the base Debian image. +** Resolution +Updated the `Dockerfile` to: +1. Install Python3, pip, and venv. +2. Create a virtual environment for isolated dependency management. +3. Install the `playwright` package and execute `playwright install --with-deps chromium` during the image build. This ensures the production container is ready for high-fidelity browsing immediately upon startup. + +* 3. Cognitive Tooling +Created the `:browser` cognitive tool, which exposes three primary capabilities to System 1: +- **Navigation:** Full JS rendering and waiting for network idle. +- **Extraction:** Targeted text retrieval via CSS selectors. +- **Vision:** Base64-encoded screenshot capture for future multimodal processing. + +* 4. PSF Mandate Alignment +** Zero-Bloat (Managed) +While adding Playwright increases the image size, it is a "Complexity Earned" trade-off that dramatically expands the agent's capability frontier. +** Literate Granularity +The `org-skill-playwright.org` file strictly follows the "one definition per block" mandate. + +* 5. Permanent Learnings +- **Inter-Process JSON:** JSON is the ideal lingua franca for Lisp-Python bridges. +- **Path Portability:** Always use `uiop:native-namestring` when passing Lisp paths to external shell commands to ensure OS compatibility. diff --git a/org-agent.asd b/org-agent.asd index cd0202d..0a710f2 100644 --- a/org-agent.asd +++ b/org-agent.asd @@ -24,7 +24,8 @@ (:file "src/core") (:file "src/gateway-telegram") (:file "src/gateway-signal") - (:file "src/gateway-matrix")) + (:file "src/gateway-matrix") + (:file "src/playwright")) :build-operation "program-op" :build-pathname "org-agent-server" :entry-point "org-agent:main") @@ -47,6 +48,7 @@ (:file "tests/gateway-telegram-tests") (:file "tests/gateway-signal-tests") (:file "tests/gateway-matrix-tests") + (:file "tests/playwright-tests") (:file "tests/chaos-qa")) :perform (test-op (o s) (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :oacp-suite :org-agent-tests)) @@ -66,4 +68,5 @@ (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :gateway-telegram-suite :org-agent-gateway-telegram-tests)) (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :gateway-signal-suite :org-agent-gateway-signal-tests)) (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :gateway-matrix-suite :org-agent-gateway-matrix-tests)) + (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :playwright-suite :org-agent-playwright-tests)) (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :chaos-suite :org-agent-chaos-qa)))) diff --git a/scripts/browser-bridge.py b/scripts/browser-bridge.py new file mode 100644 index 0000000..9cd0c2c --- /dev/null +++ b/scripts/browser-bridge.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +import sys +import json +import base64 +from playwright.sync_api import sync_playwright + +def run_bridge(): + # Read command from stdin + try: + raw_input = sys.stdin.read() + if not raw_input: + print(json.dumps({"status": "error", "message": "No input provided"})) + return + + args = json.loads(raw_input) + except Exception as e: + print(json.dumps({"status": "error", "message": f"Invalid JSON input: {str(e)}"})) + return + + url = args.get("url") + action = args.get("action", "extract_text") + selector = args.get("selector", "body") + + if not url: + print(json.dumps({"status": "error", "message": "No URL provided"})) + return + + try: + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + page = browser.new_page() + + # Navigate and wait for network to be idle + page.goto(url, wait_until="networkidle") + + result = {"status": "success", "url": url} + + if action == "extract_text": + result["content"] = page.inner_text(selector) + elif action == "screenshot": + screenshot_bytes = page.screenshot() + result["screenshot_base64"] = base64.b64encode(screenshot_bytes).decode("utf-8") + else: + result["status"] = "error" + result["message"] = f"Unknown action: {action}" + + browser.close() + print(json.dumps(result)) + + except Exception as e: + print(json.dumps({"status": "error", "message": f"Playwright Error: {str(e)}"})) + +if __name__ == "__main__": + run_bridge() diff --git a/skills/org-skill-playwright.org b/skills/org-skill-playwright.org new file mode 100644 index 0000000..3a73ca5 --- /dev/null +++ b/skills/org-skill-playwright.org @@ -0,0 +1,96 @@ +:PROPERTIES: +:ID: playwright-bridge-skill +:CREATED: [2026-04-11 Sat 18:00] +:END: +#+TITLE: SKILL: Playwright-Python Bridge (Universal Literate Note) +#+STARTUP: content +#+FILETAGS: :intelligence:browsing:automation:psf: + +* Overview +The *Playwright Bridge* provides high-fidelity web browsing capabilities by wrapping a headless Chromium instance managed via Python. It allows the agent to interact with JavaScript-heavy applications that are inaccessible to standard HTTP clients. + +* Phase A: Demand (PRD) +:PROPERTIES: +:STATUS: SIGNED +:END: + +** 1. Purpose +Enable the agent to "see" and "read" the modern web by executing JavaScript and waiting for network idle states. + +** 2. Success Criteria +- [ ] *Interaction:* Can navigate to any URL and wait for full page rendering. +- [ ] *Extraction:* Can retrieve inner text from any CSS selector. +- [ ] *Vision:* Can take base64-encoded screenshots of rendered pages. + +* Phase B: Blueprint (PROTOCOL) +:PROPERTIES: +:STATUS: SIGNED +:END: + +** 1. Architectural Intent +Uses a "JSON Bridge" over standard I/O. The Lisp kernel executes a standalone Python script, passing parameters via `stdin` and receiving structured results via `stdout`. + +** 2. Semantic Interfaces +- `(:target :tool :action :call :tool "browser" :args (:url "..." :action "extract_text"))` + +* Phase D: Build (Implementation) + +** Package Context +#+begin_src lisp :tangle ../src/playwright.lisp +(in-package :org-agent) +#+end_src + +** Bridge Script Path +Calculates the location of the Python bridge script relative to the project root. + +#+begin_src lisp :tangle ../src/playwright.lisp +(defun get-browser-bridge-path () + "Returns the absolute path to the Python browser bridge script." + (let ((root (or (uiop:getenv "PROJECT_ROOT") (uiop:native-namestring (uiop:getcwd))))) + (merge-pathnames "scripts/browser-bridge.py" (uiop:ensure-directory-pathname root)))) +#+end_src + +** Execution Wrapper (execute-browser-command) +Invokes the Python bridge and parses its JSON output. + +#+begin_src lisp :tangle ../src/playwright.lisp +(defun execute-browser-command (args) + "Invokes the Playwright Python bridge with the provided arguments." + (let* ((script-path (get-browser-bridge-path)) + (json-input (cl-json:encode-json-to-string args))) + (handler-case + (let ((output (uiop:run-program (list "python3" (uiop:native-namestring script-path)) + :input (make-string-input-stream json-input) + :output :string + :error-output :string))) + (cl-json:decode-json-from-string output)) + (error (c) + (list :status "error" :message (format nil "Bridge Execution Failed: ~a" c)))))) +#+end_src + +** Cognitive Tool: Browser +Register the high-fidelity browsing tool with the kernel. + +#+begin_src lisp :tangle ../src/playwright.lisp +(def-cognitive-tool :browser + "High-fidelity web browsing via Playwright (Chromium). Supports JS rendering." + ((:url :type :string :description "The target URL") + (:action :type :string :description "Action to perform: 'extract_text' or 'screenshot'") + (:selector :type :string :description "Optional CSS selector (default: 'body')")) + :body (lambda (args) + (let ((result (execute-browser-command args))) + (if (string= (cdr (assoc :status result)) "success") + (or (cdr (assoc :content result)) + (cdr (assoc :screenshot--base64 result)) + "Success (no content returned)") + (format nil "BROWSER ERROR: ~a" (cdr (assoc :message result))))))) +#+end_src + +** Registration: Skill +#+begin_src lisp :tangle ../src/playwright.lisp +(defskill :skill-playwright + :priority 150 + :trigger (lambda (ctx) (declare (ignore ctx)) nil) ; Passive tool provider + :neuro nil + :symbolic (lambda (action ctx) (declare (ignore ctx)) action)) +#+end_src diff --git a/src/playwright.lisp b/src/playwright.lisp new file mode 100644 index 0000000..a395759 --- /dev/null +++ b/src/playwright.lisp @@ -0,0 +1,38 @@ +(in-package :org-agent) + +(defun get-browser-bridge-path () + "Returns the absolute path to the Python browser bridge script." + (let ((root (or (uiop:getenv "PROJECT_ROOT") (uiop:native-namestring (uiop:getcwd))))) + (merge-pathnames "scripts/browser-bridge.py" (uiop:ensure-directory-pathname root)))) + +(defun execute-browser-command (args) + "Invokes the Playwright Python bridge with the provided arguments." + (let* ((script-path (get-browser-bridge-path)) + (json-input (cl-json:encode-json-to-string args))) + (handler-case + (let ((output (uiop:run-program (list "python3" (uiop:native-namestring script-path)) + :input (make-string-input-stream json-input) + :output :string + :error-output :string))) + (cl-json:decode-json-from-string output)) + (error (c) + (list :status "error" :message (format nil "Bridge Execution Failed: ~a" c)))))) + +(def-cognitive-tool :browser + "High-fidelity web browsing via Playwright (Chromium). Supports JS rendering." + ((:url :type :string :description "The target URL") + (:action :type :string :description "Action to perform: 'extract_text' or 'screenshot'") + (:selector :type :string :description "Optional CSS selector (default: 'body')")) + :body (lambda (args) + (let ((result (execute-browser-command args))) + (if (string= (cdr (assoc :status result)) "success") + (or (cdr (assoc :content result)) + (cdr (assoc :screenshot--base64 result)) + "Success (no content returned)") + (format nil "BROWSER ERROR: ~a" (cdr (assoc :message result))))))) + +(defskill :skill-playwright + :priority 150 + :trigger (lambda (ctx) (declare (ignore ctx)) nil) ; Passive tool provider + :neuro nil + :symbolic (lambda (action ctx) (declare (ignore ctx)) action)) diff --git a/tests/playwright-tests.lisp b/tests/playwright-tests.lisp new file mode 100644 index 0000000..5557a45 --- /dev/null +++ b/tests/playwright-tests.lisp @@ -0,0 +1,45 @@ +(defpackage :org-agent-playwright-tests + (:use :cl :fiveam :org-agent) + (:export #:playwright-suite)) +(in-package :org-agent-playwright-tests) + +(def-suite playwright-suite :description "Tests for Playwright Browser Bridge.") +(in-suite playwright-suite) + +(test test-browser-bridge-success + "Verify that successful bridge output is parsed correctly." + (let ((old-run-program (symbol-function 'uiop:run-program)) + (mock-output "{\"status\": \"success\", \"url\": \"https://example.com\", \"content\": \"Example Domain Content\"}")) + (unwind-protect + (progn + (setf (symbol-function 'uiop:run-program) + (lambda (cmd &key input output error-output) + (declare (ignore cmd input output error-output)) + mock-output)) + + (let ((result (org-agent::execute-browser-command '((:url . "https://example.com"))))) + (is (equal "success" (cdr (assoc :status result)))) + (is (equal "Example Domain Content" (cdr (assoc :content result)))))) + (setf (symbol-function 'uiop:run-program) old-run-program)))) + +(test test-browser-bridge-error + "Verify that bridge errors are captured." + (let ((old-run-program (symbol-function 'uiop:run-program)) + (mock-output "{\"status\": \"error\", \"message\": \"Page Load Timeout\"}")) + (unwind-protect + (progn + (setf (symbol-function 'uiop:run-program) + (lambda (cmd &key input output error-output) + (declare (ignore cmd input output error-output)) + mock-output)) + + (let ((result (org-agent::execute-browser-command '((:url . "https://broken.com"))))) + (is (equal "error" (cdr (assoc :status result)))) + (is (equal "Page Load Timeout" (cdr (assoc :message result)))))) + (setf (symbol-function 'uiop:run-program) old-run-program)))) + +(test test-browser-tool-registration + "Verify that the :browser tool is correctly registered." + (let ((tool (gethash "browser" org-agent::*cognitive-tools*))) + (is (not (null tool))) + (is (search "High-fidelity" (org-agent::cognitive-tool-description tool)))))