From 5d48e808ae79f58be8138439e0fd1859cfb92503 Mon Sep 17 00:00:00 2001 From: Amr Gharbeia Date: Mon, 13 Apr 2026 09:03:42 -0400 Subject: [PATCH] FEAT: Add optional skills moved from core repository --- org-skill-chaos.org | 161 ++++++++ org-skill-function-calling.org | 89 +++++ org-skill-inbox-processor.org | 89 +++++ org-skill-latent-reflection.org | 103 +++++ org-skill-lisp-machine-bootstrap.org | 59 +++ org-skill-log-aggregator.org | 81 ++++ org-skill-model-explorer.org | 12 + org-skill-playwright.org | 96 +++++ org-skill-scribe.org | 4 +- org-skill-sub-agent-manager.org | 82 ++++ org-skill-token-accountant.org | 558 +++++++++++++++++++++++++++ 11 files changed, 1332 insertions(+), 2 deletions(-) create mode 100644 org-skill-chaos.org create mode 100644 org-skill-function-calling.org create mode 100644 org-skill-inbox-processor.org create mode 100644 org-skill-latent-reflection.org create mode 100644 org-skill-lisp-machine-bootstrap.org create mode 100644 org-skill-log-aggregator.org create mode 100644 org-skill-model-explorer.org create mode 100644 org-skill-playwright.org create mode 100644 org-skill-sub-agent-manager.org create mode 100644 org-skill-token-accountant.org diff --git a/org-skill-chaos.org b/org-skill-chaos.org new file mode 100644 index 0000000..0aca5fc --- /dev/null +++ b/org-skill-chaos.org @@ -0,0 +1,161 @@ +:PROPERTIES: +:ID: 9dd80d2f-ec60-4263-92d9-a50b87270dda +:CREATED: [2026-03-30 Mon 21:16] +:EDITED: [2026-04-07 Tue 13:42] +:END: +#+TITLE: SKILL: Chaos Gauntlet (Universal Literate Note) +#+STARTUP: content +#+FILETAGS: :chaos:testing:reliability:psf: +#+DEPENDS_ON: skill-shell-actuator skill-tdd-runner + +* Overview +The *Chaos Gauntlet* is an adversarial testing skill designed to ensure the system's resilience. It simulates environmental failures, malformed LLM responses, and network disruptions, forcing the harness and its skills to handle "Byzantine" conditions gracefully. + +* Phase A: Demand (PRD) +:PROPERTIES: +:STATUS: FROZEN +:END: + +** 1. Purpose +Verify the system's stability and error-handling capabilities under stress. + +** 2. User Needs +- *Failure Simulation:* Ability to inject artificial delays or errors into the Harness Protocol bus. +- *Byzantine Response Testing:* Test how Deterministic Engine handles nonsensical or malicious Probabilistic Engine proposals. +- *Network Resilience:* Simulate Gitea or LLM provider timeouts. +- *Recovery Verification:* Ensure the harness can recover from a "skip-event" restart. + +* Phase D: Build (Implementation) +:PROPERTIES: +:STATUS: SIGNED +:END: + +** Chaos Injection Logic +#+begin_src lisp :tangle ../src/chaos-logic.lisp +(in-package :org-agent) + +(defun chaos-inject-error (sensor-type) + "Injects a synthetic error into a specific sensor pipeline." + (unless *chaos-enabled-p* + (harness-log "CHAOS ERROR - Injection blocked. Production gate is ACTIVE.") + (return-from chaos-inject-error nil)) + (harness-log "CHAOS - Injecting synthetic error into ~a sensor..." sensor-type) + (inject-stimulus + `(:type :EVENT :payload (:sensor ,sensor-type :error "SYNTHETIC_CHAOS_ERROR")))) + +(defun chaos-stress-test (action context) + "Executes a randomized stress test by injecting failures into the system." + (declare (ignore context)) + (unless *chaos-enabled-p* + (harness-log "CHAOS ERROR - Stress test blocked. Production gate is ACTIVE.") + (return-from chaos-stress-test "FAILURE - Production gate active.")) + (let* ((payload (getf action :payload)) + (mode (or (getf payload :mode) :random)) + (intensity (or (getf payload :intensity) 3))) + (harness-log "CHAOS - Commencing stress test (Mode: ~a, Intensity: ~a)" mode intensity) + (snapshot-object-store) + (case mode + (:random (dotimes (i intensity) + (let ((failure-type (nth (random 3) '(:test-failure :shell-timeout :llm-error)))) + (inject-stimulus + `(:type :EVENT :payload (:sensor :chaos-injection :type ,failure-type)))))) + (:shell (inject-stimulus + `(:type :EVENT :payload (:sensor :shell-response :cmd "git push" :exit-code 128 :stderr "fatal: network unreachable"))))) + (snapshot-object-store) + (format nil "SUCCESS - Chaos stress test initiated."))) + +(defun chaos-enable () + "Disables the production gate and allows chaos injection." + (setf *chaos-enabled-p* t) + (harness-log "CHAOS - Production gate DISABLED. Chaos injection is now ALLOWED.") + t) + +(defun chaos-disable () + "Enables the production gate and blocks chaos injection." + (setf *chaos-enabled-p* nil) + (harness-log "CHAOS - Production gate ENABLED. Chaos injection is now BLOCKED.") + t) +#+end_src + + +* Phase B: Blueprint (PROTOCOL) +:PROPERTIES: +:STATUS: SIGNED +:END: + +** 1. Architectural Intent +The *Chaos Gauntlet* skill is designed to be non-invasive, running primarily in a background mode. It should not interfere with normal system operation unless explicitly triggered. It is protected by a **Production Gate** (`*chaos-enabled-p*`) to prevent accidental disruptions during real work. + +- *Controlled Chaos:* Failures must be injected in a precise and controllable manner. +- *Merkle Integrity:* Every stress test triggers a Merkle snapshot before and after to allow for full-system rollback. +- *Observability:* The system's response to failures must be easily observable through logging. + +** 2. Semantic Interfaces + +*** A. Gate Control + +#+begin_src lisp +(defun chaos-enable () + "Disables the production gate and allows chaos injection.") + +(defun chaos-disable () + "Enables the production gate and blocks chaos injection.") +#+end_src + +*** B. Triggering Chaos + + *`chaos-trigger` Sensor:* + Events of type `:EVENT` with a `:payload` containing `(:sensor :chaos-trigger)` trigger the skill. The payload can contain a `:mode` key to specify the type of chaos to inject (e.g., `:random`, `:shell`), and an `:intensity` to control the number of failures injected. + + *Signature:* + + `#+begin_src lisp + ;; Triggers the chaos skill. + (defun trigger-chaos (mode intensity) + "Triggers the chaos gauntlet with a specified mode and intensity." + (org-agent:inject-stimulus + `(:type :EVENT :payload (:sensor :chaos-trigger :mode ,mode :intensity ,intensity)))) + #+end_src + + *** B. Injecting Synthetic Errors + + *`chaos-inject-error` Function:* + Injects a synthetic error event into a specified sensor pipeline. Different sensor types will react differently to synthetic errors. + + *Signature:* + + `#+begin_src lisp + ;; Injects a synthetic error into a specific sensor pipeline. + (defun chaos-inject-error (sensor-type error-message) + "Injects a specific synthetic error into a specific sensor." + (org-agent:inject-stimulus + `(:type :EVENT :payload (:sensor ,sensor-type :error ,error-message)))) + #+end_src + + *** C. Simulating Network Disruptions + + The `chaos-stress-test` function, when `mode` is `:shell`, simulates network disruptions by returning a specific error code from a shell command (e.g., `git push`). + + *Signature:* (covered by existing implementation in Phase D). + + *** D. Kernel Restart Simulation + + Deliberately trigger `skip-event` to test recovery protocols. + *Signature:* + + `#+begin_src lisp + ;; simulates a skip event (a full org-agent reboot) + (defun chaos-force-skip-event ()) + #+end_src + + +* Registration +#+begin_src lisp +(defskill :skill-chaos + :priority 10 ; Lower priority, used for background testing + :trigger (lambda (context) (eq (getf (getf context :payload) :sensor) :chaos-trigger)) + :neuro (lambda (context) + (let ((p (getf context :payload))) + (format nil "A chaos trigger was received (~a). Should I run a stress test?" (getf p :mode)))) + :symbolic #'chaos-stress-test) +#+end_src diff --git a/org-skill-function-calling.org b/org-skill-function-calling.org new file mode 100644 index 0000000..254825b --- /dev/null +++ b/org-skill-function-calling.org @@ -0,0 +1,89 @@ +:PROPERTIES: +:ID: 1cbff23d-aef1-412f-97a0-260302034001 +:CREATED: [2026-03-31 Tue 18:43] +:EDITED: [2026-04-07 Tue 13:42] +:END: +#+TITLE: SKILL: Native Function Calling (Universal Literate Note) +#+STARTUP: content +#+FILETAGS: :llm:tools:json-schema:reliability:psf: +#+DEPENDS_ON: id:homoiconic-memory-skill + +* Overview +The *Native Function Calling* skill provides the translation layer between the system's deterministic Lisp interfaces and the LLM's neural tool-calling capabilities. It ensures that Probabilistic Engine (the LLM) interacts with the world via structured, validated schemas rather than raw text plists, virtually eliminating "formatting hallucinations." + +* Phase A: Demand (PRD) +:PROPERTIES: +:STATUS: FROZEN +:END: + +** 1. Purpose +Define a high-reliability bridge for LLM-native "Tool Use." + +** 2. User Needs +- *Schema Generation:* Automatically convert Lisp `defun` signatures into JSON Schema tool definitions. +- *Reliable Ingress:* Parse the LLM's structured `tool_calls` response back into a valid Lisp plist. +- *Provider Agnostic:* Support schema formats for Gemini, OpenAI, and Anthropic. +- *Validation:* Ensure arguments match the required types before reaching Deterministic Engine. + +** 3. Success Criteria +*** TODO Lisp-to-JSON Schema conversion logic verification +*** TODO Multi-provider schema formatting (Gemini vs OpenAI) +*** TODO Response parsing from tool_call to symbolic action + + +* Phase B: Blueprint (PROTOCOL) +:PROPERTIES: +:STATUS: SIGNED +:END: + +* Phase B: Blueprint (PROTOCOL) +:PROPERTIES: +:STATUS: DRAFT +:END: + +** 1. Architectural Intent + +The core intent is to create a robust, bi-directional translation layer. This layer guarantees type safety and schema adherence between the LLM's Tool Calling mechanism and the Lisp environment. The design emphasizes clear separation of concerns: schema generation, response parsing, and provider-specific formatting. We should aim for a modular architecture that allows for easier extension to new LLM providers and new data types. The validation process must be explicit and easily auditable. Error handling is critical; parsing failures should yield informative error messages, enabling rapid debugging. + +** 2. Semantic Interfaces (Lisp Signatures) + +*** `defun json-schema-from-defun (function-name)` + - *Purpose:* Generates a JSON Schema representation from a Lisp function definition. + - *Args:* + - `function-name`: A symbol representing the name of the Lisp function. + - *Returns:* A Lisp plist representing the JSON Schema. Keys should correspond to standard JSON Schema fields (e.g., `:type`, `:properties`, `:required`). + - *Side Effects:* None. Pure function. + +*** `defun parse-tool-call-arguments (function-name arguments tool-provider)` + - *Purpose:* Parses the arguments returned by an LLM tool call into a Lisp plist. Validates the arguments against the schema generated by `json-schema-from-defun`. + - *Args:* + - `function-name`: A symbol representing the name of the Lisp function being called. + - `arguments`: A string containing the JSON arguments returned by the LLM's `tool_calls` field. + - `tool-provider`: A keyword (e.g., `:openai`, `:gemini`, `:anthropic`) indicating the LLM provider. + - *Returns:* A Lisp plist representing the parsed arguments, or `nil` if parsing fails. On failure, appropriate error messages should be logged. + - *Side Effects:* May signal errors. + +*** `defun format-json-schema-for-provider (json-schema tool-provider)` + - *Purpose:* Formats the automatically generated JSON schema to the specific format required by each LLM provider. + - *Args:* + - `json-schema`: A Lisp plist containing the generic JSON schema (output of `json-schema-from-defun`). + - `tool-provider`: A keyword (e.g., `:openai`, `:gemini`, `:anthropic`) indicating the LLM provider. + - *Returns:* A Lisp plist representing the provider-specific JSON schema. + - *Side Effects:* None. Pure function. + +*** `defun validate-arguments (function-name arguments)` + - *Purpose:* Validates that the parsed arguments conform to the expected schema. + - *Args:* + - `function-name`: A symbol identifying the function being called. Used to retrieve the function definition and associated JSON schema. + - `arguments`: A Lisp plist containing the parsed arguments. + - *Returns:* `T` if validation succeeds, `NIL` if it fails. + - *Side Effects:* May signal errors if validation fails. Logs validation errors. + +** 3. Data Structures + +*** JSON Schema (Lisp Representation) + A Lisp plist mimicking the structure of a JSON Schema. Keys will generally be keywords mirroring JSON Schema vocabulary (e.g., `:type`, `:properties`, `:required`, `:description`). Values will be Lisp datatypes corresponding to the schema datatypes (e.g., symbols, strings, booleans, numbers, lists of symbols/strings/numbers). + +*** Tool Call Response + The expected format of an LLM's `tool_calls` response will be parsed using a dedicated JSON parsing library. `parse-tool-call-arguments` will handle the conversion to lisp datatypes based upon the `json-schema` + diff --git a/org-skill-inbox-processor.org b/org-skill-inbox-processor.org new file mode 100644 index 0000000..90a0d2f --- /dev/null +++ b/org-skill-inbox-processor.org @@ -0,0 +1,89 @@ +:PROPERTIES: +:ID: 95029300-1d11-444c-a90a-a9f2c5474ca9 +:CREATED: [2026-04-04 Sat 20:27] +:EDITED: [2026-04-07 Tue 13:42] +:END: +#+TITLE: SKILL: Enriched Inbox Processor Agent (Universal Literate Note) +#+STARTUP: content +#+FILETAGS: :inbox:processor:workflow:psf: + +* Overview +The *Enriched Inbox Processor Agent* is responsible for the daily migration of captured nodes from ~inbox.org~ to the ~daily/~ archive. It enforces a strict privacy wall for ~@personal~ content while providing deep semantic enrichment for public research. + +* Phase A: Demand (PRD) +:PROPERTIES: +:STATUS: FROZEN +:END: + +** 1. Purpose +Automate the sorting and enrichment of inbox captures. + +** 2. User Needs +- *Privacy Wall:* Headlines tagged ~@personal~ are moved **symbolically only**. No LLM processing allowed. +- *Semantic Enrichment:* For public items (non-@personal), generate: + 1. A **Summary** sub-heading (1 sentence). + 2. A **Significance** paragraph explaining the PSF use-case. + 3. A **Full Text** extraction for items tagged ~!archive~. +- *Archive-First:* ALL originals are moved to ~daily/YYYY-MM-DD.org~ based on the ~:CREATED:~ property. + +* Phase B: Blueprint (PROTOCOL) +:PROPERTIES: +:STATUS: SIGNED +:END: + +** 1. Architectural Intent +Iterate through the inbox. Use Deterministic Engine (Symbolic) to identify the tag. If ~@personal~, perform a direct move. If not, trigger Probabilistic Engine (Neuro) for enrichment. + +* Phase D: Build (Implementation) + +** Helper: Privacy & Archive Checks +#+begin_src lisp :tangle ../src/processor-logic.lisp +(in-package :org-agent) +(defun inbox-is-private-p (tags) + (member "@personal" tags :test #'string-equal)) + +(defun inbox-is-archive-p (tags) + (member "!archive" tags :test #'string-equal)) +#+end_src + +** Neural Stage (Enrichment) +#+begin_src lisp :tangle ../src/processor-logic.lisp +(in-package :org-agent) +(defun neuro-skill-inbox-processor (context) + (let* ((payload (getf context :payload)) + (content (getf payload :content)) + (tags (getf payload :tags)) + (is-archive (inbox-is-archive-p tags))) + (ask-neuro content :system-prompt + (format nil "You are the PSF Librarian. Your goal is to ENRICH this Org-mode capture. +RULES: +1. Create a '** Summary' sub-heading with a 1-sentence summary. +2. Create a '** Significance' sub-heading with a paragraph explaining why this matters to a Sovereign Lisp Machine and how it can be used. +3. ~:[~;~* ARCHIVE MODE: Extract the full text of the item into a '** Full Text' sub-heading, preserving Org-mode structure.~] +4. Return ONLY a Lisp plist with :summary :significance :full-text. +5. NO conversational filler." is-archive)))) +#+end_src + +** Symbolic Stage (The Physical Move) +#+begin_src lisp :tangle ../src/processor-logic.lisp +(in-package :org-agent) +(defun inbox-process-logic (action context) + (declare (ignore action)) + (let* ((payload (getf context :payload)) + (sensor (getf payload :sensor))) + (when (eq sensor :heartbeat) + (let* ((base-dir (or (uiop:getenv "MEMEX_DIR") "/home/user/memex/")) + (inbox-path (merge-pathnames "inbox.org" base-dir))) + (org-agent:harness-log "INBOX - Scanning ~a for migration..." (uiop:native-namestring inbox-path)) + ;; Physical move logic would go here using Org AST parsing + '(:target :system :payload (:action :message :text "Inbox processing complete (Simulation).")))))) +#+end_src + +* Registration +#+begin_src lisp +(defskill :skill-inbox-processor + :priority 100 + :trigger (lambda (context) (eq (getf (getf context :payload) :sensor) :heartbeat)) + :neuro #'neuro-skill-inbox-processor + :symbolic #'inbox-process-logic) +#+end_src diff --git a/org-skill-latent-reflection.org b/org-skill-latent-reflection.org new file mode 100644 index 0000000..486c9ca --- /dev/null +++ b/org-skill-latent-reflection.org @@ -0,0 +1,103 @@ +:PROPERTIES: +:ID: latent-reflection-skill +:CREATED: [2026-04-09 Thu] +:END: +#+TITLE: SKILL: Latent Reflection (Proactive Gardening) +#+STARTUP: content +#+FILETAGS: :memory:gardening:reflection:psf: +#+DEPENDS_ON: id:state-persistence-skill + +* Overview +The *Latent Reflection* skill utilizes the idle cycles of the heartbeat to proactively garden the Memex. By randomly sampling the knowledge graph, it surfaces forgotten connections and synthesizes new insights without direct user prompting. This mimics the human default mode network, fostering creativity and serendipity. + +* Implementation + +** Package +#+begin_src lisp :tangle ../src/latent-reflection.lisp +(in-package :org-agent) +#+end_src + +** State +We maintain an interval to prevent the agent from spamming the LLM on every 60-second heartbeat. + +#+begin_src lisp :tangle ../src/latent-reflection.lisp +(defvar *last-reflection-time* 0) +(defvar *reflection-interval* 14400) ;; 4 hours by default +#+end_src + +** Random Sampling (sample-random-memories) +Selects N random objects from the entire `*object-store*`. + +#+begin_src lisp :tangle ../src/latent-reflection.lisp +(defun sample-random-memories (count) + "Returns COUNT random objects from the object-store." + (let ((keys nil) + (selected nil)) + (maphash (lambda (k v) (declare (ignore v)) (push k keys)) *object-store*) + (let ((len (length keys))) + (when (> len 0) + (dotimes (i count) + (let* ((random-key (nth (random len) keys)) + (obj (gethash random-key *object-store*))) + (when obj + (push obj selected)))))) + selected)) +#+end_src + +** Tool Registry +Allows the user to manually trigger a reflection cycle if desired. + +#+begin_src lisp :tangle ../src/latent-reflection.lisp +(def-cognitive-tool :trigger-latent-reflection "Manually triggers a proactive gardening cycle." + :parameters nil + :body (lambda (args) + (declare (ignore args)) + (setf *last-reflection-time* 0) + "Latent reflection triggered. Wait for the next heartbeat.")) +#+end_src + +** Skill Definition +Hooks into the `:heartbeat` sensor. + +#+begin_src lisp :tangle ../src/latent-reflection.lisp +(defskill :skill-latent-reflection + :priority 30 + :trigger (lambda (ctx) + (let* ((payload (getf ctx :payload)) + (sensor (getf payload :sensor)) + (now (get-universal-time))) + (if (and (eq sensor :heartbeat) + (> (- now *last-reflection-time*) *reflection-interval*)) + (progn + (harness-log "GARDENER - Initiating Latent Reflection...") + (setf *last-reflection-time* now) + t) + nil))) + :neuro (lambda (ctx) + (declare (ignore ctx)) + (let* ((memories (sample-random-memories 3)) + (context-string "LATENT REFLECTION CANDIDATES:\n")) + (dolist (m memories) + (let ((title (or (getf (org-object-attributes m) :TITLE) "Untitled")) + (content (or (org-object-content m) ""))) + (setf context-string + (concatenate 'string context-string + (format nil "- ID: ~a | TITLE: ~a | CONTENT: ~a~%" + (org-object-id m) title content))))) + (format nil "You are the Proactive Gardener of the Memex. +I have selected 3 random notes from the knowledge graph. +Please read them and synthesize a 'Latent Reflection'. +Find hidden connections, suggest new tags, or propose a new insight that bridges them. + +~a + +MANDATE: Output EXACTLY ONE Common Lisp property list starting with (:type :REQUEST). +Use the :emacs target and :insert-at-end action to write your reflection into the \"*org-agent-chat*\" buffer." + context-string))) + :symbolic (lambda (action ctx) + (declare (ignore ctx)) + ;; Approve any safe request + action)) +#+end_src + + diff --git a/org-skill-lisp-machine-bootstrap.org b/org-skill-lisp-machine-bootstrap.org new file mode 100644 index 0000000..7f81544 --- /dev/null +++ b/org-skill-lisp-machine-bootstrap.org @@ -0,0 +1,59 @@ +:PROPERTIES: +:ID: 81e3492e-0d28-416c-a67c-c7f6f114643d +:CREATED: [2026-04-07 Tue 12:57] +:EDITED: [2026-04-07 Tue 13:42] +:END: +#+TITLE: SKILL: Lisp Machine Bootstrap (Universal Literate Note) +#+STARTUP: content +#+FILETAGS: :hardware:lisp:sovereignty:fpga:psf: + +* Overview +The *Lisp Machine Bootstrap* project is the "Endgame" of the PSF. It aims to eliminate the "Unix/C Tax" by building a hardware-native Lisp machine where CAR, CDR, and CONS are primitive gates. This ensures ultimate digital sovereignty and a provably secure, homoiconic environment. + +* Phase A: Demand (PRD) +:PROPERTIES: +:STATUS: FROZEN +:END: + +** 1. Purpose +Define the requirements for a hardware environment optimized for Lisp and user sovereignty. + +** 2. User Needs +- *Hardware-Native Lisp:* ISA designed for list processing efficiency. +- *Tagged Memory:* Hardware-level safety preventing memory corruption. +- *Bootstrapping Path:* Progression from Soft Machine (Linux) to Sovereign Silicon (ASIC). +- *Transparency:* Every gate and instruction must be introspectable and documented. + +** 3. Success Criteria +*** TODO Research existing Lisp-on-FPGA implementations (Openora, etc.) +*** TODO Define minimum hardware-native Lisp ISA (Instruction Set Architecture) +*** TODO Draft CLOSOS-style Virtual Machine specification +*** TODO FPGA Implementation: Verilog/VHDL skeleton synthesis + +* Phase B: Blueprint (PROTOCOL) +:PROPERTIES: +:STATUS: SIGNED +:END: + +** 1. Architectural Intent +The protocol defines the interface between the high-level Lisp image and the low-level ISA/VM. + +** 2. Semantic Interfaces +#+begin_src lisp +(defun isa-dispatch (instruction opcode) + "Simulates the hardware dispatch of a Lisp-native instruction.") + +(defun memory-tag-audit (address) + "Verifies the hardware tag of a specific memory word.") +#+end_src + +* Phase D: Build (Implementation) +Current focus is on research and simulation (Phase 1: Soft Machine). + +** Simulation Interface +#+begin_src lisp +;; Soft machine simulator stubs +#+end_src + +* Phase E: Chaos (Verification) +Verification will occur via FPGA synthesis reports and VM regression tests. diff --git a/org-skill-log-aggregator.org b/org-skill-log-aggregator.org new file mode 100644 index 0000000..c106f20 --- /dev/null +++ b/org-skill-log-aggregator.org @@ -0,0 +1,81 @@ +:PROPERTIES: +:ID: 2883a21a-6b95-4f8c-a785-3c1472a994b6 +:CREATED: [2026-04-04 Sat 20:27] +:EDITED: [2026-04-07 Tue 13:42] +:END: +#+TITLE: SKILL: Log Aggregator (Universal Literate Note) +#+STARTUP: content +#+FILETAGS: :logging:observability:system:psf: + +* Overview +The *Log Aggregator* monitors and summarizes system logs to provide insights into agent behavior and system health. + +* Phase A: Demand (PRD) +:PROPERTIES: +:STATUS: FROZEN +:END: + +** 1. Purpose +Collect and summarize agent logs. + +** 2. User Needs +- *Scan:* Retrieve logs from the system. +- *Summarize:* Provide a high-level summary of recent activities. + + +* Phase B: Blueprint (PROTOCOL) +:PROPERTIES: +:STATUS: SIGNED +:END: + + +* Phase B: Blueprint (PROTOCOL) +:PROPERTIES: +:STATUS: DRAFT +:END: + +** 1. Architectural Intent +The Log Aggregator will employ a modular architecture, consisting of a Log Source Connector, a Summarization Engine, and a Presenter. This allows for flexibility in adapting to different log formats and presentation styles. The system will prioritize low overhead impact on the monitored system. + +** 2. Semantic Interfaces (Lisp Signatures) + +*** a. Log Source Connector + + #+BEGIN_SRC lisp + ;;; Function: fetch-logs + ;;; Description: Retrieves logs based on specified criteria. + ;;; Parameters: + ;;; :source (keyword) - Specifies the log source (e.g., :systemd, :file, :journald). + ;;; :start-time (timestamp) - Optional. The starting timestamp for the logs. + ;;; :end-time (timestamp) - Optional. The ending timestamp for the logs. + ;;; :filters (list) - Optional. A list of filters to apply to the logs (e.g., '((:level . :error) (:component . "foo"))). + ;;; Returns: A list of log entries (each entry being a plist). + (defun fetch-logs (&key source start-time end-time filters) + ...) + #+END_SRC + +*** b. Summarization Engine + + #+BEGIN_SRC lisp + ;;; Function: summarize-logs + ;;; Description: Summarizes a list of log entries. + ;;; Parameters: + ;;; :log-entries (list) - A list of log entries (plists). + ;;; :summary-type (keyword) - Specifies the type of summary (e.g., :count-by-level, :count-by-component, :recent-errors). + ;;; Returns: A summary of the logs (a plist). + (defun summarize-logs (&key log-entries summary-type) + ...) + #+END_SRC + +*** c. Presenter + + #+BEGIN_SRC lisp + ;;; Function: present-summary + ;;; Description: Presents a log summary in a human-readable format. + ;;; Parameters: + ;;; :summary (plist) - A log summary as returned by `summarize-logs`. + ;;; :format (keyword) - Specifies the output format (e.g., :text, :html). + ;;; Returns: A string containing the formatted summary. + (defun present-summary (&key summary format) + ...) + #+END_SRC diff --git a/org-skill-model-explorer.org b/org-skill-model-explorer.org new file mode 100644 index 0000000..8b051c9 --- /dev/null +++ b/org-skill-model-explorer.org @@ -0,0 +1,12 @@ +:PROPERTIES: +:ID: ae49bb72-8c5d-4392-af70-4eb00d285054 +:CREATED: [2026-03-30 Mon 21:16] +:EDITED: [2026-04-07 Tue 13:42] +:END: +#+TITLE: SKILL: Model Explorer Agent (Universal Literate Note) +#+STARTUP: content +#+FILETAGS: :discovery:telemetry:psf: + +* Overview +The *Model Explorer Agent* provides dynamic introspection of the system's LLM capabilities. It intercepts specific user commands to list and describe all available models across providers, rendering them as native Org-mode tables. + diff --git a/org-skill-playwright.org b/org-skill-playwright.org new file mode 100644 index 0000000..4da91cf --- /dev/null +++ b/org-skill-playwright.org @@ -0,0 +1,96 @@ +:PROPERTIES: +:ID: playwright-bridge-skill +:CREATED: [2026-04-11 Sat 18:00] +:END: +#+TITLE: SKILL: Playwright-Python Bridge (Universal Literate Note) +#+STARTUP: content +#+FILETAGS: :intelligence:browsing:automation:psf: + +* Overview +The *Playwright Bridge* provides high-fidelity web browsing capabilities by wrapping a headless Chromium instance managed via Python. It allows the agent to interact with JavaScript-heavy applications that are inaccessible to standard HTTP clients. + +* Phase A: Demand (PRD) +:PROPERTIES: +:STATUS: SIGNED +:END: + +** 1. Purpose +Enable the agent to "see" and "read" the modern web by executing JavaScript and waiting for network idle states. + +** 2. Success Criteria +- [ ] *Interaction:* Can navigate to any URL and wait for full page rendering. +- [ ] *Extraction:* Can retrieve inner text from any CSS selector. +- [ ] *Vision:* Can take base64-encoded screenshots of rendered pages. + +* Phase B: Blueprint (PROTOCOL) +:PROPERTIES: +:STATUS: SIGNED +:END: + +** 1. Architectural Intent +Uses a "JSON Bridge" over standard I/O. The Lisp kernel executes a standalone Python script, passing parameters via `stdin` and receiving structured results via `stdout`. + +** 2. Semantic Interfaces +- `(:target :tool :action :call :tool "browser" :args (:url "..." :action "extract_text"))` + +* Phase D: Build (Implementation) + +** Package Context +#+begin_src lisp :tangle ../src/playwright.lisp +(in-package :org-agent) +#+end_src + +** Bridge Script Path +Calculates the location of the Python bridge script relative to the project root. + +#+begin_src lisp :tangle ../src/playwright.lisp +(defun get-browser-bridge-path () + "Returns the absolute path to the Python browser bridge script." + (let ((root (or (uiop:getenv "PROJECT_ROOT") (uiop:native-namestring (uiop:getcwd))))) + (merge-pathnames "scripts/browser-bridge.py" (uiop:ensure-directory-pathname root)))) +#+end_src + +** Execution Wrapper (execute-browser-command) +Invokes the Python bridge and parses its JSON output. + +#+begin_src lisp :tangle ../src/playwright.lisp +(defun execute-browser-command (args) + "Invokes the Playwright Python bridge with the provided arguments." + (let* ((script-path (get-browser-bridge-path)) + (json-input (cl-json:encode-json-to-string args))) + (handler-case + (let ((output (uiop:run-program (list "python3" (uiop:native-namestring script-path)) + :input (make-string-input-stream json-input) + :output :string + :error-output :string))) + (cl-json:decode-json-from-string output)) + (error (c) + (list :status "error" :message (format nil "Bridge Execution Failed: ~a" c)))))) +#+end_src + +** Cognitive Tool: Browser +Register the high-fidelity browsing tool with the harness. + +#+begin_src lisp :tangle ../src/playwright.lisp +(def-cognitive-tool :browser + "High-fidelity web browsing via Playwright (Chromium). Supports JS rendering." + ((:url :type :string :description "The target URL") + (:action :type :string :description "Action to perform: 'extract_text' or 'screenshot'") + (:selector :type :string :description "Optional CSS selector (default: 'body')")) + :body (lambda (args) + (let ((result (execute-browser-command args))) + (if (string= (cdr (assoc :status result)) "success") + (or (cdr (assoc :content result)) + (cdr (assoc :screenshot--base64 result)) + "Success (no content returned)") + (format nil "BROWSER ERROR: ~a" (cdr (assoc :message result))))))) +#+end_src + +** Registration: Skill +#+begin_src lisp :tangle ../src/playwright.lisp +(defskill :skill-playwright + :priority 150 + :trigger (lambda (ctx) (declare (ignore ctx)) nil) ; Passive tool provider + :neuro nil + :symbolic (lambda (action ctx) (declare (ignore ctx)) action)) +#+end_src diff --git a/org-skill-scribe.org b/org-skill-scribe.org index cb5a86e..a6838be 100644 --- a/org-skill-scribe.org +++ b/org-skill-scribe.org @@ -40,7 +40,7 @@ Define automated distillation, enrichment, and auditing behaviors. :END: ** 1. Architectural Intent -Uses a weekly heartbeat trigger. Employs a "Compiler" approach: System 1 (Neuro) generates synthesis proposals, System 2 (Symbolic) verifies file-system safety and tag constraints. +Uses a weekly heartbeat trigger. Employs a "Compiler" approach: Probabilistic Engine (Neuro) generates synthesis proposals, Deterministic Engine (Symbolic) verifies file-system safety and tag constraints. ** 2. Semantic Interfaces #+begin_src lisp @@ -64,7 +64,7 @@ Uses a weekly heartbeat trigger. Employs a "Compiler" approach: System 1 (Neuro) #+begin_src lisp :tangle ../projects/org-skill-scribe/src/scribe-engine.lisp (defun scribe-filter-personal (org-ast-node) "Recursively strips out any headline or content tagged with @personal. - This runs strictly in System 2 BEFORE any data is passed to System 1." + This runs strictly in Deterministic Engine BEFORE any data is passed to Probabilistic Engine." (let ((tags (getf (org-agent:org-object-attributes org-ast-node) :TAGS))) (when (not (member "@personal" tags :test #'string=)) org-ast-node))) diff --git a/org-skill-sub-agent-manager.org b/org-skill-sub-agent-manager.org new file mode 100644 index 0000000..db89898 --- /dev/null +++ b/org-skill-sub-agent-manager.org @@ -0,0 +1,82 @@ +:PROPERTIES: +:ID: e870d860-5db7-443a-aaf3-23cd5521b27c +:CREATED: [2026-03-31 Tue 18:28] +:EDITED: [2026-04-07 Tue 13:42] +:END: +#+TITLE: SKILL: Sub-Agent Manager (Concurrency & Parallelism) +#+STARTUP: content +#+FILETAGS: :concurrency:parallelism:threads:psf: + +* Overview +The *Sub-Agent Manager* enables the Neurosymbolic Lisp Machine to handle multiple concurrent thoughts. It allows the primary kernel to "spawn" lightweight, isolated Lisp threads (sub-agents) to perform long-running or background tasks (research, massive refactors, etc.) without blocking the main event bus. + +* Phase A: Demand (PRD) +:PROPERTIES: +:STATUS: FROZEN +:END: + +** 1. Purpose +Define the interfaces for parallel cognitive execution and thread lifecycle management. + +** 2. User Needs +- *Non-Blocking Execution:* Spawn background threads for long-running tasks. +- *Context Isolation:* Sub-agents must have their own execution context to prevent parent context poisoning. +- *Communication Loop:* Sub-agents must inject a "Return Stimulus" upon completion. +- *Observability:* Ability to list and terminate active sub-agents. + +** 3. Success Criteria +*** TODO Successful spawning of a non-blocking background thread +*** TODO Verification of context isolation (distinct local variables) +*** TODO Autonomous injection of :sub-agent-complete stimulus +*** TODO Thread safety verification using bordeaux-threads locks + + +* Phase B: Blueprint (PROTOCOL) +:PROPERTIES: +:STATUS: SIGNED +:END: + +* Phase B: Blueprint (PROTOCOL) + :PROPERTIES: + :STATUS: DRAFT + :END: + +** 1. Architectural Intent +The Sub-Agent Manager is designed as a facade over a thread management library (initially `bordeaux-threads`). It provides a high-level API for spawning, managing, and monitoring sub-agents. The core principle is to create isolated Lisp environments for each sub-agent, encapsulating all state and preventing interference with the main system or other sub-agents. Communication back to the main kernel occurs through a standardized `:sub-agent-complete` stimulus injected into the event bus. Thread safety, enforced with locks where necessary, is paramount. + +** 2. Semantic Interfaces (Lisp Signatures) + +*** `spawn-sub-agent (task-fn &key name)` + - *Purpose:* Creates and starts a new sub-agent thread. + - *Parameters:* + - `task-fn`: A function of no arguments that contains the code to be executed in the sub-agent. + - `name`: (optional) A symbol representing the name of the sub-agent for identification and debugging. + - *Returns:* A sub-agent object (e.g., a struct) representing the spawned thread, containing its ID, status, and other metadata. + - *Side Effects:* Creates a new thread and starts the execution of `task-fn` within it. + +*** `kill-sub-agent (sub-agent)` + - *Purpose:* Terminates a running sub-agent. + - *Parameters:* + - `sub-agent`: The sub-agent object (returned by `spawn-sub-agent`) representing the thread to terminate. + - *Returns:* `T` if the sub-agent was successfully terminated, `NIL` otherwise. + - *Side Effects:* Attempts to terminate the specified thread, potentially releasing any resources held by the sub-agent. + +*** `list-sub-agents ()` + - *Purpose:* Returns a list of all active sub-agents. + - *Parameters:* None + - *Returns:* A list of sub-agent objects, each representing a running sub-agent. + +*** `sub-agent-status (sub-agent)` + - *Purpose:* Returns the current status of a sub-agent. + - *Parameters:* + - `sub-agent`: The sub-agent object to query. + - *Returns:* A symbol representing the status of the sub-agent (e.g., `:running`, `:completed`, `:terminated`, `:error`). + +*** `inject-sub-agent-completion-stimulus (result &key sub-agent)` + - *Purpose:* This PRIVATE function (not exposed directly) is called by the sub-agent, to inject knowledge of the result of its process into the stimulus stream. + - *Parameters:* + - `result`: The result of the sub-agent's computation. + - `sub-agent`: The current sub-agent (optional). + - *Returns:* `T` if stimulus was injected successfully + - *Side Effects:* Injects a `:sub-agent-complete` stimulus into the event bus. The stimulus will contain the `result` and any metadata associated with the `sub-agent` (including its name/id). The stimulus will be of the form `(:type :sub-agent-complete :result :sub-agent )` + diff --git a/org-skill-token-accountant.org b/org-skill-token-accountant.org new file mode 100644 index 0000000..7129e60 --- /dev/null +++ b/org-skill-token-accountant.org @@ -0,0 +1,558 @@ +:PROPERTIES: +:ID: f3e3a6b3-8cd8-4e64-a835-5cdf5d13b75b +:CREATED: [2026-04-07 Tue 13:42] +:EDITED: [2026-04-08 Wed 11:45] +:END: +#+TITLE: SKILL: Token Accountant Agent (Universal Literate Note) +#+STARTUP: content +#+FILETAGS: :infrastructure:budget:llm:psf:economics: +#+DEPENDS_ON: id:event-orchestrator-skill + +* Overview +The *Token Accountant* is the governor of the Neural Engine. It manages the cost, reliability, and routing of LLM providers. Its primary mission is to ensure the PSF operates at maximum intelligence with minimum marginal cost by aggressively prioritizing subsidized free models when appropriate. + +* Phase A: Demand (PRD) +:PROPERTIES: +:STATUS: SIGNED +:END: + +** 1. Purpose +Autonomously manage the provider cascade and model selection to optimize for cost, speed, and reliability. + +* Phase B: Blueprint (PROTOCOL) +:PROPERTIES: +:STATUS: SIGNED +:END: + +** 1. Architectural Intent +Maintain a state-aware provider cascade that routes around "pain" (failures) and dynamically selects models based on task complexity. + +** 2. Semantic Interfaces + +*** Routing and Pain Management +#+begin_src lisp :tangle ../src/accountant-logic.lisp +(in-package :org-agent) + +(defvar *provider-pain-table* (make-hash-table :test 'equal)) + +(defun token-accountant-record-pain (provider) + "Marks a provider as 'pained' (failed). It will be de-prioritized." + (setf (gethash provider *provider-pain-table*) (+ (get-universal-time) 600)) ; 10 min penalty + (harness-log "ACCOUNTANT - Provider ~a de-prioritized due to failure." provider)) + +(defun token-accountant-get-cascade (context) + "Returns a dynamic list of providers, routing around pained ones. Uses standardized gateway keywords." + (let ((all-providers '(:openrouter :groq :gemini-api :ollama)) + (healthy nil) + (pained nil) + (now (get-universal-time))) + (dolist (p all-providers) + (if (> (or (gethash p *provider-pain-table*) 0) now) + (push p pained) + (push p healthy))) + (append (nreverse healthy) (nreverse pained)))) + +(defun token-accountant-get-model-for-provider (provider &optional context) + "Returns the recommended model for the provider, prioritizing free/subsidized models. Updated April 2026." + (let ((complexity (ignore-errors (uiop:symbol-call :org-agent.skills.org-skill-router :router-classify-complexity context)))) + (case provider + (:openrouter + (case complexity + (:REASONING "meta-llama/llama-3.3-70b-instruct:free") ; High fidelity, zero cost + (:COGNITION "qwen/qwen3.6-plus:free") ; Latest interaction, zero cost + (t "meta-llama/llama-3.2-3b-instruct:free"))) ; Ultra-fast reflex, zero cost + (:groq + (case complexity + (:REASONING "llama-3.3-70b-versatile") + (t "llama-3.1-8b-instant"))) + (:gemini-api + "gemini-1.5-flash-latest") + (t nil)))) + +(defun token-accountant-patch-kernel () + "Hot-patches the harness's cascade and model selector to use our dynamic logic." + (setf org-agent:*provider-cascade* #'token-accountant-get-cascade) + (setf org-agent::*model-selector-fn* #'token-accountant-get-model-for-provider)) +#+end_src + +* Registration +#+begin_src lisp +(progn + (token-accountant-patch-kernel) + (defskill :skill-token-accountant + :priority 100 + :trigger (lambda (context) + (let ((sensor (getf (getf context :payload) :sensor))) + (or (eq sensor :tool-error) (eq sensor :cost-audit)))) + :neuro (lambda (context) nil) + :symbolic (lambda (action context) + (let ((p (getf (getf context :payload) :provider))) + (when p (token-accountant-record-pain p)) + action)))) +#+end_src + +* Documentation (Token Optimization) +** research.org +#+TITLE: Token Management & Model Optimization Research +#+author: Amero Garcia +#+created: [2026-03-16 Mon 14:28] +#+DATE: 2026-03-04 +#+FILETAGS: :research:token:optimization:models + +* Token Management Strategy Research + +** Initial Findings + +*** OpenRouter Free Tier +- URL: https://openrouter.ai/collections/free-models +- Providers moving from free to paid-only models +- Belief: "Free models play crucial role in democratizing access" + +*** Google AI Studio (Gemini) +- Free tier available +- Limits: 60 requests/minute, 300K tokens/day +- No credit card required +- Every API key gets these limits + +** Research Questions + +1. Which providers offer free or low-cost tiers? +2. What are the rate limits and quotas? +3. Which models are best for which use cases? +4. How to optimize context windows? +5. What is the cost per token breakdown? + +** To Research Further + +| Provider | Free Tier | Paid Tier | Best For | +|----------|-----------|-----------|----------| +| Google Gemini | 300K tokens/day | Pay per use? | General, coding | +| OpenRouter | Varies by model | Per-request | Routing, variety | +| OpenAI | ? | ? | GPT-4 quality | +| Anthropic | ? | ? | Claude capabilities | +| Mistral | ? | ? | Open weights | +| Local | Hardware cost | Free | Privacy, control | + +** Token Optimization Strategies to Explore + +1. *Tiered Model Usage* + - Simple tasks: Fast/cheap models + - Complex tasks: Stronger models + - Fallback: Lower tier if higher fails + +2. *Context Compression* + - Summarize long contexts + - Use RAG instead of full context + - Prune old conversation + +3. *Caching* + - Cache common responses + - Reuse embeddings + - Batch requests + +4. *Hybrid Approach* + - Local models for simple queries + - Cloud APIs for complex tasks + - Manual review for critical outputs + +** X Account Access + +*Pending:* X account access via Google login +*Blocker:* Requires OTP from user per security rule (SOUL.md) +*Action needed:* User provides OTP, I complete OAuth, access bookmarks +** budget-50.org +#+TITLE: Token Optimization - $50 Monthly Budget +#+author: Amero Garcia +#+created: [2026-03-16 Mon 14:28] +#+DATE: 2026-03-04 +#+FILETAGS: :budget:constraints:optimization + +* Budget: $50/Month + +** Budget Breakdown + +| Tier | Provider | Allocation | Tokens Est. | Use Case | +|------|----------|-----------|-------------|----------| +| FREE | Google Gemini | $0 | ~9M/month | 90% of work | +| CHEAP | OpenRouter | $20 | ~6M tokens | Fallback, complex tasks | +| PREMIUM | Claude/GPT-4o | $25 | ~500K tokens | Critical decisions | +| BUFFER | Various | $5 | Emergency | Overruns, testing | + +** Daily Free Allowance + +- *Google Gemini:* 300K tokens/day = 9M/month = *$0* +- This covers 90-95% of expected workload + +** Paid Tier Allocation ($45) + +- *$20 → OpenRouter* (Qwen, Mistral, Llama) + - ~6M tokens at $0.003/1K + - Use when: Gemini rate limited, need different model + +- *$25 → Premium models* (Claude, GPT-4o) + - ~500K tokens at $0.05/1K average + - Use when: Architecture decisions, critical code review, final validation + +- *$5 → Buffer* + - Handle overruns + - Emergency access + - Testing new models + +** Hard Limits + +| Provider | Monthly Cap | Alert At | +|----------|-------------|----------| +| OpenRouter | $20 | $16 (80%) | +| Premium | $25 | $20 (80%) | +| Total | $50 | $45 (90%) | + +** Daily Tracking + +Target: *Monitor consumption every session* + +``` +IF daily_cost > $1.50: + → Switch to Gemini only + → Defer premium tasks + +IF weekly_cost > $12: + → Review usage patterns + → Find optimization opportunities +``` + +** Emergency Protocol + +If approaching $50 limit before month end: +1. Halt all paid API calls +2. Switch to Gemini-only mode +3. Queue premium tasks for next month +4. Consider local inference setup + +** Cost-Per-Task Guidelines + +| Task Type | Max Cost | Preferred Model | +|-----------|----------|-----------------| +| Quick lookup | $0.00 | Gemini | +| Code review | $0.01 | Gemini/OpenRouter | +| Feature design | $0.05 | OpenRouter | +| Architecture review | $0.10 | Claude/GPT-4o | +| Emergency debug | $0.20 | Best available | + +** Optimization Imperative + +With $50/month, waste is not affordable: +- ❌ No speculative queries +- ❌ No "just curious" premium calls +- ❌ No repeated similar prompts +- ✅ Always use Gemini first +- ✅ Batch similar requests +- ✅ Cache embeddings locally +- ✅ Summarize long contexts + +** Monthly Review + +1. Compare actual vs. projected usage +2. Adjust model routing rules +3. Identify expensive query patterns +4. Plan next month's allocation + +** Break-Even Analysis + +At $50/month = $600/year: +- *Option A:* Continue APIs (flexible, managed) +- *Option B:* Local inference (~$800 hardware, $0 ongoing) + - Break-even: 16 months + - Risk: Hardware failure, maintenance + +*Recommendation:* Stick with APIs until $100+/month, then evaluate hardware. + +** Questions for Human Partner + +1. Is $50 firm or flexible in emergencies? +2. What happens if we hit limit mid-critical-task? +3. Preference for which premium model? (Claude vs GPT-4 vs both) +4. Should I track and report costs per project? +5. Any tasks that are "unlimited budget" critical? +** README.org +#+TITLE: Token Optimization +#+AUTHOR: Amr +#+CREATED: [2026-03-17 Tue] +#+BEGIN_COMMENT +Cost-effective LLM usage through smart routing, context compression, and multi-provider strategies. +#+END_COMMENT + +* Token Optimization + +Strategy and implementation for minimizing LLM costs while maintaining quality. + +* Project Tasks + +See the actionable tasks for this project in [[file:../../gtd.org::*Token Optimization][GTD.org > Projects > Token Optimization]] + +* Key Documents + +- [[file:plan.org][Optimization Plan]] +- [[file:token-optimization.yaml][Configuration]] + +* Current Focus + +- Multi-provider setup (Gemini primary, OpenRouter fallback) +- Usage tracking and budget alerts +- Smart routing by task type +- Context compression techniques +** quick-start.org +#+TITLE: Token Optimization - Quick Start +#+author: Amero Garcia +#+created: [2026-03-16 Mon 14:28] +#+DATE: 2026-03-04 + +* Quick Reference for Daily Use + +** Rule of Thumb + +| What you need | Use this | Cost | +|---------------|----------|------| +| Quick answer, formatting, lookup | Gemini Flash | FREE | +| Code review, analysis | Gemini Pro | FREE | +| Complex problem solving | Claude Haiku / Qwen | $ | +| Critical architecture decision | GPT-4o | $$ | + +** Free Tier Limits (Daily) + +| Provider | Tokens | Requests | Reset | +|----------|--------|----------|-------| +| Google AI Studio | 300,000 | 60/min | Daily | +| OpenRouter Free | Varies | Limited | - | + +** Current Recommendation + +→ *Use Google Gemini exclusively* until hitting 250K tokens/day +→ Then add OpenRouter fallback +→ Only use GPT-4 for final reviews + +** This will reduce token costs by ~90% + +** Next Steps + +1. Configure Gemini as primary (already partially done) +2. Add quota tracking +3. Set alerts at 80% of free limits +4. Implement tiered routing + +** Savings Potential: $100-500/month → $10-50/month +** plan.org +#+TITLE: Token Optimization Strategy +#+author: Amero Garcia +#+created: [2026-03-16 Mon 14:28] +#+DATE: 2026-03-04 +#+FILETAGS: :strategy:token:optimization:cost + +* Executive Summary + +** Goal: Minimize inference costs while maximizing capability + +Current approach: Single default model → Multi-tier, multi-provider strategy + +* Three-Tier Model Strategy + +** Tier 1: Fast/Cheap (80% of queries) +- *Purpose:* Simple tasks, formatting, lookups +- *Models:* Google Gemini Flash, Local models +- *Cost:* $0-0.000001 per 1K tokens +- *Speed:* Fastest + +** Tier 2: Balanced (18% of queries) +- *Purpose:* Complex reasoning, code generation, analysis +- *Models:* Gemini Pro, Claude Haiku, Llama 3 70B +- *Cost:* $0.0001-0.003 per 1K tokens +- *Speed:* Medium + +** Tier 3: High-Performance (2% of queries) +- *Purpose:* Critical decisions, complex architecture, final review +- *Models:* GPT-4, Claude Opus, Gemini Ultra +- *Cost:* $0.01-0.03 per 1K tokens +- *Speed:* Slower + +* Provider Analysis + +** Google AI Studio (Primary Recommended) + +| Model | Free Tier | Rate Limit | Best For | +|-------|-----------|------------|----------| +| Gemini 2.0 Flash | 300K tokens/day | 60 req/min | Quick tasks, coding | +| Gemini 1.5 Flash | 300K tokens/day | 60 req/min | Fast responses | +| Gemini 1.5 Pro | 300K tokens/day | 60 req/min | Complex tasks | + +*Cost: FREE (within limits)* + +** OpenRouter.Aggregated (Secondary) + +| Model | Price/1K tokens | Context | Reliability | +|-------|-----------------|---------|-------------| +| Qwen 3 235B | $0.0001-0.0003 | 128K | High | +| Mistral Large | $0.002-0.006 | 128K | High | +| Llama 4 405B | $0.0002-0.0005 | 128K | Medium | +| Free tier models | $0 | Varies | Variable | + +** OpenAI (Tier 3 only) +- GPT-4: $0.03/1K tokens (expensive) +- GPT-4o: $0.005/1K tokens (better value) +- Use sparingly for critical tasks only + +** Local Inference (Long-term goal) +- Hardware: $1000-5000 initial investment +- Ongoing: $0 (electricity only) +- Models: Llama 3, Mistral, DeepSeek +- Best for: High-volume, privacy-sensitive work + +* Context Optimization Strategies + +** 1. Context Windows by Task Type + +| Task Type | Optimal Context | Compression | Savings | +|-----------|-----------------|-------------|---------| +| Code review | 4K-8K | Truncate old files | 50% | +| Documentation | 8K-16K | Summarize sections | 30% | +| Research | 16K-32K | Chunk + RAG | 70% | +| Architecture | 32K-128K | Maintain full | 0% | + +** 2. Conversation Pruning +- Remove "thinking" blocks from history +- Summarize conversation every 10 turns +- Archive old sessions to external storage + +** 3. RAG vs. Full Context +- *Rule:* < 5K tokens of context → Full +- *Rule:* > 10K tokens of context → Use embeddings/RAG +- *Savings:* 60-80% on large document tasks + +* Request Optimization + +** Batching Strategy +- Group similar requests (3-5 per batch) +- Same model, same parameters +- Shared overhead costs + +** Caching Strategy +- Cache embeddings for repeated contexts +- Store common completions (templates) +- Reuse code snippet suggestions + +** Streaming vs. Non-Stream +- *Streaming:* Better UX, but higher token overhead +- *Non-stream:* More efficient for programmatic use +- *Recommendation:* Non-stream for background tasks + +* Smart Routing Rules + +** Automatic Selection Logic + +``` +IF task_type == "simple_lookup" OR "formatting": + → Gemini Flash (free) + +ELIF task_type == "code_generation" AND complexity < 3: + → Gemini Pro (free tier) + +ELIF task_type == "complex_reasoning" OR "architecture": + → Claude Sonnet or GPT-4o + +ELIF task_type == "final_review" OR "critical_decision": + → GPT-4 or Claude Opus +``` + +** Fallback Chain +1. Try Gemini (free) +2. If rate limited → OpenRouter (cheap) +3. If quality insufficient → GPT-4o +4. If critical failure → GPT-4 + +* Concrete Implementation + +** Config Structure (openclaw.json) + +```json +{ + "models": { + "defaults": { + "primary": "google-gemini-cli/gemini-2.0-flash", + "fallbacks": [ + "openrouter/qwen/qwen3-235b-a22b", + "google-gemini-cli/gemini-1.5-pro", + "openai/gpt-4o" + ] + }, + "providers": { + "google-gemini-cli": { + "freeTier": true, + "dailyLimit": 300000, + "rateLimit": 60 + }, + "openrouter": { + "freeTierModels": ["openrouter/auto"], + "budgetLimit": 500 + }, + "openai": { + "budgetLimit": 200, + "useFor": ["critical", "architecture"] + } + } + } +} +``` + +** Monitoring & Alerts + +- Track daily token usage per provider +- Alert at 80% of free tier limits +- Monthly budget review and adjustment + +* Cost Projections + +** Current Unknown Usage → Optimized + +| Scenario | Monthly Tokens | Current Cost | Optimized Cost | Savings | +|----------|---------------|--------------|----------------|---------| +| Light (< 1M) | 1M | $50-100 | $0-10 | 90% | +| Medium (1-5M) | 3M | $200-500 | $20-100 | 80% | +| Heavy (5-20M) | 10M | $1000-3000 | $200-500 | 80% | + +* Immediate Actions + +** Week 1: Setup +- Configure Gemini as primary provider +- Set up OpenRouter fallback +- Implement basic usage tracking +- Document current baseline + +** Week 2: Implement +- Add smart routing logic +- Implement context compression +- Set up budget alerts +- A/B test model choices + +** Week 3: Optimize +- Analyze usage patterns +- Fine-tune routing rules +- Tune context windows +- Document findings + +** Week 4: Scale +- Full multi-provider setup +- Implement full caching +- Maximize free tier usage +- Plan for paid tiers if needed + +* Long-term: Local Inference Path + +** Minimum Viable Setup +- Hardware: RTX 4090 or Apple Silicon M3 Max +- Software: Ollama + OpenClaw integration +- Cost: ~$2000-4000 one-time +- Break-even: 3-6 months vs. API costs + +** Full Self-Hosted +- Hardware: Dual RTX 4090 or 2x Mac Studio +- Models: Llama 3 70B, Mixtral 8x22B +- Cost: ~$8000-12000 +- For: Privacy, unlimited inference, control