From abda622145dc0c8ab9c484fe634dac28fc46ab2d Mon Sep 17 00:00:00 2001 From: Amr Gharbeia Date: Mon, 27 Apr 2026 13:16:14 -0400 Subject: [PATCH] build: embed Python bridge inside web-research skill - In accordance with the strict literate programming mandate, the standalone browser-bridge.py script has been embedded as a :tangle block directly inside the org-skill-web-research.org file. - It dynamically tangles to the INSTALL_DIR's skills directory alongside its parent Lisp logic. --- org-skill-web-research.org | 58 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/org-skill-web-research.org b/org-skill-web-research.org index 7ccc5eb..21f8b50 100644 --- a/org-skill-web-research.org +++ b/org-skill-web-research.org @@ -130,3 +130,61 @@ loginGemini().catch(err => { :probabilistic (lambda (context) nil) :deterministic (lambda (action context) (ask-gemini-web (getf (getf action :payload) :prompt)))) #+end_src + +*** Headless Data Extraction Bridge +#+begin_src python :tangle (expand-file-name "browser-bridge.py" (concat (or (getenv "INSTALL_DIR") ".") "/skills")) +#!/usr/bin/env python3 +import sys +import json +import base64 +from playwright.sync_api import sync_playwright + +def run_bridge(): + # Read command from stdin + try: + raw_input = sys.stdin.read() + if not raw_input: + print(json.dumps({"status": "error", "message": "No input provided"})) + return + + args = json.loads(raw_input) + except Exception as e: + print(json.dumps({"status": "error", "message": f"Invalid JSON input: {str(e)}"})) + return + + url = args.get("url") + action = args.get("action", "extract_text") + selector = args.get("selector", "body") + + if not url: + print(json.dumps({"status": "error", "message": "No URL provided"})) + return + + try: + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + page = browser.new_page() + + # Navigate and wait for network to be idle + page.goto(url, wait_until="networkidle") + + result = {"status": "success", "url": url} + + if action == "extract_text": + result["content"] = page.inner_text(selector) + elif action == "screenshot": + screenshot_bytes = page.screenshot() + result["screenshot_base64"] = base64.b64encode(screenshot_bytes).decode("utf-8") + else: + result["status"] = "error" + result["message"] = f"Unknown action: {action}" + + browser.close() + print(json.dumps(result)) + + except Exception as e: + print(json.dumps({"status": "error", "message": f"Playwright Error: {str(e)}"})) + +if __name__ == "__main__": + run_bridge() +#+end_src