Files
org-agent-contrib/org-skill-web-research.org
Amr Gharbeia abda622145 build: embed Python bridge inside web-research skill
- In accordance with the strict literate programming mandate, the standalone browser-bridge.py script has been embedded as a :tangle block directly inside the org-skill-web-research.org file.
- It dynamically tangles to the INSTALL_DIR's skills directory alongside its parent Lisp logic.
2026-04-27 13:16:14 -04:00

6.3 KiB

SKILL: Web Research Agent (Universal Literate Note)

Overview

The Web Research Agent provides high-fidelity information retrieval and serves as the bridge to non-API web interfaces (like Gemini Advanced) to leverage user subscriptions.

Phase A: Demand (PRD)

1. Purpose

Automate web-based information retrieval and subscription-tier AI access.

Phase B: Blueprint (PROTOCOL)

1. Architectural Intent

Implement a Lisp-to-Node bridge using Playwright for high-fidelity web interaction.

2. Semantic Interfaces

`fetch-url`

:signature `(fetch-url url &key (engine :browser)) :string`

`ask-gemini-web`

:signature `(ask-gemini-web prompt) :string`

Phase D: Build (Implementation)

Browser Logic

Headless Query Script

const { chromium } = require('playwright-extra');
const stealth = require('puppeteer-extra-plugin-stealth')();
chromium.use(stealth);

async function askGemini(prompt) {
    const browser = await chromium.launchPersistentContext('/home/user/.local/share/opencortex/browser-profile', { 
        headless: true,
        args: ['--disable-blink-features=AutomationControlled']
    });

    const page = await browser.newPage();
    try {
        await page.goto('https://gemini.google.com/app', { waitUntil: 'networkidle', timeout: 60000 });

        const inputSelector = 'div[role="textbox"], textarea[aria-label="Prompt"], .input-area';
        await page.waitForSelector(inputSelector, { timeout: 15000 });
        
        await page.fill(inputSelector, prompt);
        await page.keyboard.press('Enter');

        // Wait for response to generate
        await page.waitForSelector('.model-response-text:last-child, message-content:last-child', { state: 'visible', timeout: 60000 });
        const response = await page.innerText('.model-response-text:last-child, message-content:last-child');
        console.log(response);
    } catch (err) {
        const url = page.url();
        console.error(`FAILED at ${url}`);
        throw err;
    } finally {
        await browser.close();
    }
}

const args = process.argv.slice(2);
const prompt = args[0];

askGemini(prompt).catch(err => {
    console.error(err);
    process.exit(1);
});

Human-in-the-Loop Login Script

const { chromium } = require('playwright-extra');
const stealth = require('puppeteer-extra-plugin-stealth')();
chromium.use(stealth);

async function loginGemini() {
    console.log("Opening browser for manual Google login...");
    console.log("Please log in, pass any captchas, wait for the Gemini chat interface to load, and then close the browser window.");
    
    const browser = await chromium.launchPersistentContext('/home/user/.local/share/opencortex/browser-profile', { 
        headless: false,
        args: ['--disable-blink-features=AutomationControlled']
    });

    const page = await browser.newPage();
    await page.goto('https://gemini.google.com/app');
    
    // The script keeps running until the user manually closes the window
}

loginGemini().catch(err => {
    console.error(err);
    process.exit(1);
});
(in-package :opencortex)

(defun ask-gemini-web (prompt)
  "Calls the Playwright stealth bridge to interact with Gemini Web UI via a persistent profile."
  (let* ((script-path (namestring (merge-pathnames "src/gemini-web.js" (asdf:system-source-directory :org-skill-web-research)))))
    (multiple-value-bind (output error-output exit-code)
        (uiop:run-program (list "node" script-path prompt) :output :string :error-output :string :ignore-error-status t)
      (if (= exit-code 0)
          output
          (format nil "(:type :LOG :payload (:text \"Node Error (~a): ~a\"))" exit-code error-output)))))

Registration

(defskill :skill-web-research
  :priority 60
  :trigger (lambda (context) (eq (getf (getf context :payload) :sensor) :web-search))
  :probabilistic (lambda (context) nil)
  :deterministic (lambda (action context) (ask-gemini-web (getf (getf action :payload) :prompt))))

Headless Data Extraction Bridge

#!/usr/bin/env python3
import sys
import json
import base64
from playwright.sync_api import sync_playwright

def run_bridge():
    # Read command from stdin
    try:
        raw_input = sys.stdin.read()
        if not raw_input:
            print(json.dumps({"status": "error", "message": "No input provided"}))
            return
        
        args = json.loads(raw_input)
    except Exception as e:
        print(json.dumps({"status": "error", "message": f"Invalid JSON input: {str(e)}"}))
        return

    url = args.get("url")
    action = args.get("action", "extract_text")
    selector = args.get("selector", "body")

    if not url:
        print(json.dumps({"status": "error", "message": "No URL provided"}))
        return

    try:
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True)
            page = browser.new_page()
            
            # Navigate and wait for network to be idle
            page.goto(url, wait_until="networkidle")

            result = {"status": "success", "url": url}

            if action == "extract_text":
                result["content"] = page.inner_text(selector)
            elif action == "screenshot":
                screenshot_bytes = page.screenshot()
                result["screenshot_base64"] = base64.b64encode(screenshot_bytes).decode("utf-8")
            else:
                result["status"] = "error"
                result["message"] = f"Unknown action: {action}"

            browser.close()
            print(json.dumps(result))

    except Exception as e:
        print(json.dumps({"status": "error", "message": f"Playwright Error: {str(e)}"}))

if __name__ == "__main__":
    run_bridge()