Files
org-agent-contrib/org-skill-web-research.org

4.4 KiB

SKILL: Web Research Agent (Universal Literate Note)

Overview

The Web Research Agent provides high-fidelity information retrieval and serves as the bridge to non-API web interfaces (like Gemini Advanced) to leverage user subscriptions.

Phase A: Demand (PRD)

1. Purpose

Automate web-based information retrieval and subscription-tier AI access.

Phase B: Blueprint (PROTOCOL)

1. Architectural Intent

Implement a Lisp-to-Node bridge using Playwright for high-fidelity web interaction.

2. Semantic Interfaces

`fetch-url`

:signature `(fetch-url url &key (engine :browser)) :string`

`ask-gemini-web`

:signature `(ask-gemini-web prompt) :string`

Phase D: Build (Implementation)

Browser Logic

Headless Query Script

const { chromium } = require('playwright-extra');
const stealth = require('puppeteer-extra-plugin-stealth')();
chromium.use(stealth);

async function askGemini(prompt) {
    const browser = await chromium.launchPersistentContext('/home/user/.local/share/org-agent/browser-profile', { 
        headless: true,
        args: ['--disable-blink-features=AutomationControlled']
    });

    const page = await browser.newPage();
    try {
        await page.goto('https://gemini.google.com/app', { waitUntil: 'networkidle', timeout: 60000 });

        const inputSelector = 'div[role="textbox"], textarea[aria-label="Prompt"], .input-area';
        await page.waitForSelector(inputSelector, { timeout: 15000 });
        
        await page.fill(inputSelector, prompt);
        await page.keyboard.press('Enter');

        // Wait for response to generate
        await page.waitForSelector('.model-response-text:last-child, message-content:last-child', { state: 'visible', timeout: 60000 });
        const response = await page.innerText('.model-response-text:last-child, message-content:last-child');
        console.log(response);
    } catch (err) {
        const url = page.url();
        console.error(`FAILED at ${url}`);
        throw err;
    } finally {
        await browser.close();
    }
}

const args = process.argv.slice(2);
const prompt = args[0];

askGemini(prompt).catch(err => {
    console.error(err);
    process.exit(1);
});

Human-in-the-Loop Login Script

const { chromium } = require('playwright-extra');
const stealth = require('puppeteer-extra-plugin-stealth')();
chromium.use(stealth);

async function loginGemini() {
    console.log("Opening browser for manual Google login...");
    console.log("Please log in, pass any captchas, wait for the Gemini chat interface to load, and then close the browser window.");
    
    const browser = await chromium.launchPersistentContext('/home/user/.local/share/org-agent/browser-profile', { 
        headless: false,
        args: ['--disable-blink-features=AutomationControlled']
    });

    const page = await browser.newPage();
    await page.goto('https://gemini.google.com/app');
    
    // The script keeps running until the user manually closes the window
}

loginGemini().catch(err => {
    console.error(err);
    process.exit(1);
});
(in-package :org-agent)

(defun ask-gemini-web (prompt)
  "Calls the Playwright stealth bridge to interact with Gemini Web UI via a persistent profile."
  (let* ((script-path (namestring (merge-pathnames "src/gemini-web.js" (asdf:system-source-directory :org-skill-web-research)))))
    (multiple-value-bind (output error-output exit-code)
        (uiop:run-program (list "node" script-path prompt) :output :string :error-output :string :ignore-error-status t)
      (if (= exit-code 0)
          output
          (format nil "(:type :LOG :payload (:text \"Node Error (~a): ~a\"))" exit-code error-output)))))

Registration

(defskill :skill-web-research
  :priority 60
  :trigger (lambda (context) (eq (getf (getf context :payload) :sensor) :web-search))
  :neuro (lambda (context) nil)
  :symbolic (lambda (action context) (ask-gemini-web (getf (getf action :payload) :prompt))))