build: embed Python bridge inside web-research skill
- In accordance with the strict literate programming mandate, the standalone browser-bridge.py script has been embedded as a :tangle block directly inside the org-skill-web-research.org file. - It dynamically tangles to the INSTALL_DIR's skills directory alongside its parent Lisp logic.
This commit is contained in:
@@ -130,3 +130,61 @@ loginGemini().catch(err => {
|
|||||||
:probabilistic (lambda (context) nil)
|
:probabilistic (lambda (context) nil)
|
||||||
:deterministic (lambda (action context) (ask-gemini-web (getf (getf action :payload) :prompt))))
|
:deterministic (lambda (action context) (ask-gemini-web (getf (getf action :payload) :prompt))))
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
|
*** Headless Data Extraction Bridge
|
||||||
|
#+begin_src python :tangle (expand-file-name "browser-bridge.py" (concat (or (getenv "INSTALL_DIR") ".") "/skills"))
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import base64
|
||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
|
||||||
|
def run_bridge():
|
||||||
|
# Read command from stdin
|
||||||
|
try:
|
||||||
|
raw_input = sys.stdin.read()
|
||||||
|
if not raw_input:
|
||||||
|
print(json.dumps({"status": "error", "message": "No input provided"}))
|
||||||
|
return
|
||||||
|
|
||||||
|
args = json.loads(raw_input)
|
||||||
|
except Exception as e:
|
||||||
|
print(json.dumps({"status": "error", "message": f"Invalid JSON input: {str(e)}"}))
|
||||||
|
return
|
||||||
|
|
||||||
|
url = args.get("url")
|
||||||
|
action = args.get("action", "extract_text")
|
||||||
|
selector = args.get("selector", "body")
|
||||||
|
|
||||||
|
if not url:
|
||||||
|
print(json.dumps({"status": "error", "message": "No URL provided"}))
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
with sync_playwright() as p:
|
||||||
|
browser = p.chromium.launch(headless=True)
|
||||||
|
page = browser.new_page()
|
||||||
|
|
||||||
|
# Navigate and wait for network to be idle
|
||||||
|
page.goto(url, wait_until="networkidle")
|
||||||
|
|
||||||
|
result = {"status": "success", "url": url}
|
||||||
|
|
||||||
|
if action == "extract_text":
|
||||||
|
result["content"] = page.inner_text(selector)
|
||||||
|
elif action == "screenshot":
|
||||||
|
screenshot_bytes = page.screenshot()
|
||||||
|
result["screenshot_base64"] = base64.b64encode(screenshot_bytes).decode("utf-8")
|
||||||
|
else:
|
||||||
|
result["status"] = "error"
|
||||||
|
result["message"] = f"Unknown action: {action}"
|
||||||
|
|
||||||
|
browser.close()
|
||||||
|
print(json.dumps(result))
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(json.dumps({"status": "error", "message": f"Playwright Error: {str(e)}"}))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run_bridge()
|
||||||
|
#+end_src
|
||||||
|
|||||||
Reference in New Issue
Block a user