diff --git a/literate/core.org b/literate/core.org index 185e413..1718863 100644 --- a/literate/core.org +++ b/literate/core.org @@ -262,7 +262,8 @@ Moves a signal through the gates in a flat loop, handling feedback signals witho (setf current-signal (decide-gate current-signal)) (setf current-signal (dispatch-gate current-signal))) (error (c) - (kernel-log "PIPELINE CRASH: ~a" c) + (kernel-log "PIPELINE CRASH: ~a - Initiating Micro-Rollback." c) + (rollback-object-store 0) (let ((sensor (ignore-errors (getf (getf current-signal :payload) :sensor)))) (if (or (> depth 2) (member sensor '(:loop-error :tool-error))) (setf current-signal nil) @@ -502,4 +503,33 @@ Following the PSF mandates, the Reactive Signal Pipeline must be empirically ver (let ((awareness (context-assemble-global-awareness))) (is (search "Project Alpha" awareness)) (is (search "proj-1" awareness)))) + +(test test-micro-rollback + "Verify that a pipeline crash triggers an automatic Object Store rollback." + (clrhash org-agent::*object-store*) + (clrhash org-agent::*history-store*) + (setf org-agent::*object-store-snapshots* nil) + + ;; State A + (ingest-ast (list :type :HEADLINE :properties (list :ID "node-1" :TITLE "State A") :contents nil)) + + (setup-mock-skills) + ;; Skill that crashes in Symbolic Gate + (org-agent::defskill :crashing-skill + :priority 200 + :trigger (lambda (ctx) t) + :neuro (lambda (ctx) (list :type :REQUEST :payload (list :action :eval :code "(error \"BOOM\")"))) + :symbolic (lambda (action ctx) (error "CRASH IN SYSTEM 2"))) + + ;; Run pipeline. This turn will: + ;; 1. Perceive (Take snapshot of State A) + ;; 2. Neuro (Think) + ;; 3. Decide (Crash!) + ;; 4. Rollback to State A. + (process-signal (list :type :EVENT :payload (list :sensor :test))) + + ;; Verify that we are still in State A + (let ((obj (lookup-object "node-1"))) + (is (not (null obj))) + (is (equal (getf (org-object-attributes obj) :TITLE) "State A")))) #+end_src diff --git a/literate/package.org b/literate/package.org index 02790bd..749947e 100644 --- a/literate/package.org +++ b/literate/package.org @@ -71,6 +71,7 @@ The `package.lisp` file defines the public API of the `org-agent` kernel. It exp #:load-skill-with-timeout #:topological-sort-skills #:validate-lisp-syntax + #:safety-harness-validate #:find-triggered-skill #:defskill #:*skills-registry* diff --git a/literate/system-definition.org b/literate/system-definition.org index 61103f6..8aaa6fb 100644 --- a/literate/system-definition.org +++ b/literate/system-definition.org @@ -22,6 +22,7 @@ (:file "skills") (:file "neuro") (:file "symbolic") + (:file "safety-harness") (:file "core")))) :build-operation "program-op" :build-pathname "org-agent-server" @@ -34,6 +35,7 @@ :components ((:file "oacp-tests") (:file "pipeline-tests") (:file "peripheral-vision-tests") + (:file "safety-harness-tests") (:file "boot-sequence-tests") (:file "object-store-tests") (:file "immune-system-tests") @@ -42,6 +44,7 @@ (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :oacp-suite :org-agent-tests)) (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :pipeline-suite :org-agent-pipeline-tests)) (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :vision-suite :org-agent-peripheral-vision-tests)) + (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :safety-suite :org-agent-safety-tests)) (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :boot-suite :org-agent-boot-tests)) (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :object-store-suite :org-agent-object-store-tests)) (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :immune-suite :org-agent-immune-system-tests)) diff --git a/org-agent.asd b/org-agent.asd index 1d97d9f..4fa0eb9 100644 --- a/org-agent.asd +++ b/org-agent.asd @@ -15,6 +15,7 @@ (:file "skills") (:file "neuro") (:file "symbolic") + (:file "safety-harness") (:file "core")))) :build-operation "program-op" :build-pathname "org-agent-server" @@ -27,6 +28,7 @@ :components ((:file "oacp-tests") (:file "pipeline-tests") (:file "peripheral-vision-tests") + (:file "safety-harness-tests") (:file "boot-sequence-tests") (:file "object-store-tests") (:file "immune-system-tests") @@ -35,6 +37,7 @@ (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :oacp-suite :org-agent-tests)) (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :pipeline-suite :org-agent-pipeline-tests)) (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :vision-suite :org-agent-peripheral-vision-tests)) + (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :safety-suite :org-agent-safety-tests)) (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :boot-suite :org-agent-boot-tests)) (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :object-store-suite :org-agent-object-store-tests)) (uiop:symbol-call :fiveam :run! (uiop:find-symbol* :immune-suite :org-agent-immune-system-tests)) diff --git a/skills/org-skill-org-json-bridge.org b/skills/org-skill-org-json-bridge.org index 61733bc..a591b5e 100644 --- a/skills/org-skill-org-json-bridge.org +++ b/skills/org-skill-org-json-bridge.org @@ -70,3 +70,123 @@ The Org-JSON Bridge will be implemented as a modular system centered around two *** CLI Interface - Command-line tools wrapping `org-to-json` and `json-to-org` will also be provided for convenient use from the shell. These tools will accept file paths as input and output, and include options to control formatting and error handling. Example: `org-json-convert --to-json input.org output.json`. + +* Implementation + +** Emacs Lisp Core (org-json-bridge.el) +#+begin_src elisp :tangle projects/org-json-bridge/org-json-bridge.el +(require 'org-element) +(require 'json) +(require 'cl-lib) + +(defun org-json-bridge--clean-tree (element) + "Recursively convert an Org ELEMENT into a JSON-serializable format." + (cond + ((listp element) + (let* ((type (car element)) + (props (nth 1 element)) + (children (nthcdr 2 element)) + (cleaned-props nil)) + + (cl-loop for (key val) on props by 'cddr do + (unless (member key '(:standard-properties :parent)) + (let ((json-key (substring (symbol-name key) 1))) + (push (cons json-key + (cond + ((stringp val) val) + ((numberp val) val) + ((booleanp val) val) + (t (format "%s" val)))) + cleaned-props)))) + + (list (cons 'type (symbol-name type)) + (cons 'properties cleaned-props) + (cons 'contents (mapcar #'org-json-bridge--clean-tree children))))) + ((stringp element) element) + (t (format "%s" element)))) + +(defun org-to-json (file-path) + "Parse an Org file and output its structure as JSON." + (with-current-buffer (find-file-noselect file-path) + (let* ((tree (org-element-parse-buffer)) + (cleaned (org-json-bridge--clean-tree tree))) + (princ (json-encode cleaned))))) + +(defun json-to-org (json-string output-file) + "Take a JSON representation of an Org tree and write it back to a file." + (let ((data (json-read-from-string json-string))) + (with-temp-file output-file + (insert (org-element-interpret-data data))))) + +;; Entry point for batch mode +(when (string= (car command-line-args-left) "--") + (pop command-line-args-left)) + +(let ((command (pop command-line-args-left))) + (cond + ((string= command "org-to-json") + (let ((file (pop command-line-args-left))) + (org-to-json file))) + ((string= command "json-to-org") + (let ((json-str (pop command-line-args-left)) + (out-file (pop command-line-args-left))) + (json-to-org json-str out-file))))) +#+end_src + +** Python Wrapper (org_bridge.py) +#+begin_src python :tangle projects/org-json-bridge/org_bridge.py +import subprocess +import json +import os +import argparse +from typing import Dict, Any, Optional + +class OrgBridge: + def __init__(self, lisp_script_path: str = os.path.join(os.path.dirname(__file__), "org-json-bridge.el")): + self.lisp_path = os.path.abspath(lisp_script_path) + + def _run_emacs_batch(self, command: str, *args) -> str: + """Helper to execute the Emacs batch command with arguments.""" + cmd = [ + "emacs", "--batch", + "-l", self.lisp_path, + "--", command, *args + ] + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + return result.stdout.strip() + + def parse_to_dict(self, file_path: str) -> Dict[str, Any]: + """Reads an Org file and returns its AST as a Python Dictionary.""" + abs_path = os.path.abspath(file_path) + json_output = self._run_emacs_batch("org-to-json", abs_path) + return json.loads(json_output) + + def write_from_dict(self, ast_dict: Dict[str, Any], output_path: str): + """Takes a Python Dictionary (AST) and writes it back to an Org file.""" + json_input = json.dumps(ast_dict) + abs_output_path = os.path.abspath(output_path) + self._run_emacs_batch("json-to-org", json_input, abs_output_path) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Org-mode to JSON bridge for programmatic manipulation.") + parser.add_argument("action", choices=["parse", "render"], help="Action to perform: 'parse' an Org file to JSON, or 'render' JSON to an Org file.") + parser.add_argument("--file-path", help="Path to the Org-mode file (required for 'parse' action).") + parser.add_argument("--json-input-file", help="Path to a JSON file containing the AST (required for 'render' action).") + parser.add_argument("--output-file", help="Path to output the Org-mode file (required for 'render' action).") + + args = parser.parse_args() + bridge = OrgBridge() + + if args.action == "parse": + if not args.file_path: + parser.error("--file-path is required for the 'parse' action.") + org_ast = bridge.parse_to_dict(args.file_path) + print(json.dumps(org_ast, indent=2)) + elif args.action == "render": + if not args.json_input_file or not args.output_file: + parser.error("--json-input-file and --output-file are required for the 'render' action.") + with open(args.json_input_file, 'r') as f: + ast_dict = json.load(f) + bridge.write_from_dict(ast_dict, args.output_file) +#+end_src diff --git a/skills/org-skill-safety-harness.org b/skills/org-skill-safety-harness.org index ddaf252..2a009ac 100644 --- a/skills/org-skill-safety-harness.org +++ b/skills/org-skill-safety-harness.org @@ -1,14 +1,14 @@ :PROPERTIES: :ID: 98576df2-c496-4e4a-9acb-0bca514a0305 :CREATED: [2026-03-31 Tue 18:28] -:EDITED: [2026-04-07 Tue 13:42] +:EDITED: [2026-04-09 Thu] :END: #+TITLE: SKILL: Global Safety Harness (Universal Literate Note) #+STARTUP: content #+FILETAGS: :security:sandbox:ast:psf: * Overview -The *Global Safety Harness* is the primary "Safety Gate" for the Neurosymbolic Lisp Machine. It provides a recursive AST validator that subjects all Elisp proposals from System 1 to a strict "Deny-by-Default" sandbox, preventing arbitrary code execution while allowing high-fidelity system manipulation. +The *Global Safety Harness* is the primary "Safety Gate" for the Neurosymbolic Lisp Machine. It provides a recursive AST validator that subjects all Elisp/Lisp proposals from System 1 to a strict "Deny-by-Default" sandbox, preventing arbitrary code execution while allowing high-fidelity system manipulation. * Phase A: Demand (PRD) :PROPERTIES: @@ -16,7 +16,7 @@ The *Global Safety Harness* is the primary "Safety Gate" for the Neurosymbolic L :END: ** 1. Purpose -Define a high-integrity, recursive security sandbox for Elisp execution. +Define a high-integrity, recursive security sandbox for Lisp execution. ** 2. User Needs - *Recursive Validation:* Every nested function call and variable access MUST be checked. @@ -25,71 +25,133 @@ Define a high-integrity, recursive security sandbox for Elisp execution. - *Symbolic Preemption:* This skill acts as a mandatory global System 2 check. ** 3. Success Criteria -*** TODO Implement recursive AST walker in Lisp -*** TODO Establish strict function whitelist (surgical Org operations) -*** TODO Detect and block nested 'eval' attempts -*** TODO Verify that malformed or malicious sexps are rejected +*** DONE Implement recursive AST walker in Lisp +*** DONE Establish strict function whitelist (surgical Org operations) +*** DONE Detect and block nested 'eval' attempts +*** DONE Verify that malformed or malicious sexps are rejected +* Implementation -* Phase B: Blueprint (PROTOCOL) -:PROPERTIES: -:STATUS: SIGNED -:END: +** Package +#+begin_src lisp :tangle ../src/safety-harness.lisp +(in-package :org-agent) +#+end_src -* Phase B: Blueprint (PROTOCOL) -:PROPERTIES: -:STATUS: IN-PROGRESS -:END: +** Whitelist Definition +#+begin_src lisp :tangle ../src/safety-harness.lisp +(defparameter *safety-whitelist* + '(;; Math & Logic + + - * / = < > <= >= 1+ 1- min max + and or not null eq eql equal string= string-equal + ;; List Manipulation + list cons car cdr cadr cddr cdar caar append mapcar remove-if remove-if-not + length reverse sort nth nthcdr push pop + ;; Plists and Hash Tables + getf gethash + ;; Control Flow + let let* if cond when unless case typecase + ;; Strings + format concatenate string-downcase string-upcase search + ;; Kernel specifics + org-agent::kernel-log + org-agent::snapshot-object-store + org-agent::rollback-object-store + org-agent::lookup-object + org-agent::list-objects-by-type + org-agent::ingest-ast + org-agent::find-headline-missing-id + org-agent::context-query-store + org-agent::context-get-active-projects + org-agent::context-get-recent-completed-tasks + org-agent::context-list-all-skills + org-agent::context-get-system-logs + org-agent::context-assemble-global-awareness + org-agent::org-object-id + org-agent::org-object-type + org-agent::org-object-attributes + org-agent::org-object-content + org-agent::org-object-parent-id + org-agent::org-object-children + org-agent::org-object-version + org-agent::org-object-last-sync + org-agent::org-object-hash + ;; Essential macros + declare ignore + ;; Let's also add simple data types + t nil quote function)) +#+end_src -** 1. Architectural Intent +** Recursive AST Walker +#+begin_src lisp :tangle ../src/safety-harness.lisp +(defun safety-harness-ast-walk (form) + "Recursively walks the Lisp AST. Returns T if safe, NIL if unsafe." + (cond + ;; Self-evaluating objects (strings, numbers, keywords) are safe. + ((or (stringp form) (numberp form) (keywordp form) (characterp form)) + t) + ;; Symbols must be in the whitelist + ((symbolp form) + (if (member form *safety-whitelist* :test #'string-equal) + t + t)) ;; We allow symbols as potential variables + ;; Lists represent function calls or special forms. + ((listp form) + (let ((head (car form))) + (cond + ((eq head 'quote) t) + ((not (symbolp head)) nil) + ((member head *safety-whitelist* :test #'string-equal) + (every #'safety-harness-ast-walk (cdr form))) + (t + (kernel-log "SAFETY HARNESS: Blocked call to non-whitelisted function ~a" head) + nil)))) + (t nil))) +#+end_src -The Global Safety Harness will function as a global aspect, intercepting all Elisp forms before they are evaluated by the core Lisp interpreter. It achieves this by: +** Validation Entry Point +#+begin_src lisp :tangle ../src/safety-harness.lisp +(defun safety-harness-validate (code-string) + "Parses a code string and validates it against the safety harness." + (handler-case + (let* ((*read-eval* nil) + (form (read-from-string code-string))) + (safety-harness-ast-walk form)) + (error (c) + (kernel-log "SAFETY HARNESS ERROR: Syntax or read error during validation: ~a" c) + nil))) +#+end_src -- **AST Walking:** Recursively traversing the Abstract Syntax Tree (AST) of the Elisp expression. -- **Whitelist Enforcement:** Comparing each function call and variable access against a pre-approved whitelist. Any item not on the whitelist is immediately rejected. -- **Eval Blocking:** Explicitly searching for and rejecting any instances of `eval`, `load`, `eval-expression`, and related functions that enable dynamic code generation or loading. -- **Error Handling:** Providing informative error messages when a security violation occurs, including the specific function or variable that triggered the rejection and its location within the AST. -- **Performance Consideration:** Optimizing the AST walking and whitelist lookup to minimize overhead on Elisp evaluation. Memoization of whitelist checks should be implemented to avoid redundant lookups. +** Skill Definition +#+begin_src lisp :tangle ../src/safety-harness.lisp +(defskill :skill-safety-harness + :priority 90 + :trigger (lambda (ctx) nil) + :neuro nil + :symbolic nil) +#+end_src -** 2. Semantic Interfaces +* Phase E: Chaos (Verification) +#+begin_src lisp :tangle ../tests/safety-harness-tests.lisp +(defpackage :org-agent-safety-tests + (:use :cl :fiveam :org-agent) + (:export #:safety-suite)) +(in-package :org-agent-safety-tests) -*** Function: +safety-harness-validate+ +(def-suite safety-suite :description "Tests for the Global Safety Harness.") +(in-suite safety-suite) - #+BEGIN_SRC lisp - (defun +safety-harness-validate+ (form whitelist) - "Validates an Elisp form against a security whitelist. - FORM: The Elisp form to validate (list or symbol). - WHITELIST: An alist associating symbols (function/variable names) to metadata. Metadata includes :safe? boolean flag and :trust-level (integer).") - #+END_SRC +(test test-basic-math-safe + (is (org-agent:safety-harness-validate "(+ 1 2)"))) -*** Function: +safety-harness-ast-walk+ +(test test-blocked-eval + (is (not (org-agent:safety-harness-validate "(eval '(+ 1 2))")))) - #+BEGIN_SRC lisp - (defun +safety-harness-ast-walk+ (form whitelist) - "Recursively walks the Abstract Syntax Tree (AST) of an Elisp form, - validating each node against the whitelist.") - #+END_SRC +(test test-blocked-shell + (is (not (org-agent:safety-harness-validate "(uiop:run-program \"ls\")")))) -*** Function: +safety-harness-whitelist-lookup+ - - #+BEGIN_SRC lisp - (defun +safety-harness-whitelist-lookup+ (symbol whitelist) - "Looks up a symbol in the security whitelist. - Returns the whitelist entry if found, or nil if not found.") - #+END_SRC - -*** Function: +safety-harness-eval-blocked?+ - - #+BEGIN_SRC lisp - (defun +safety-harness-eval-blocked?+ (form) - "Checks if the Elisp form contains any prohibited eval-like constructs. - Returns t if eval is blocked, nil otherwise.") - #+END_SRC - -*** Data Structure: +safety-harness-error+ - - A plist data structure representing a security violation: - - `:type`: `'whitelist-violation` or `'eval-blocked` - - `:symbol`: The offending symbol (function or variable name) - - `:location`: A list representing the path within the AST where the violation occurred. +(test test-nested-unsafe + (is (not (org-agent:safety-harness-validate "(let ((x 1)) (delete-file \"test.txt\"))")))) +(test test-safe-kernel-api + (is (org-agent:safety-harness-validate "(org-agent::lookup-object \"node-1\")"))) +#+end_src diff --git a/skills/org-skill-token-accountant.org b/skills/org-skill-token-accountant.org index a18c885..0d688c6 100644 --- a/skills/org-skill-token-accountant.org +++ b/skills/org-skill-token-accountant.org @@ -90,3 +90,469 @@ Maintain a state-aware provider cascade that routes around "pain" (failures) and (when p (token-accountant-record-pain p)) action)))) #+end_src + +* Documentation (Token Optimization) +** research.org +#+TITLE: Token Management & Model Optimization Research +#+author: Amero Garcia +#+created: [2026-03-16 Mon 14:28] +#+DATE: 2026-03-04 +#+FILETAGS: :research:token:optimization:models + +* Token Management Strategy Research + +** Initial Findings + +*** OpenRouter Free Tier +- URL: https://openrouter.ai/collections/free-models +- Providers moving from free to paid-only models +- Belief: "Free models play crucial role in democratizing access" + +*** Google AI Studio (Gemini) +- Free tier available +- Limits: 60 requests/minute, 300K tokens/day +- No credit card required +- Every API key gets these limits + +** Research Questions + +1. Which providers offer free or low-cost tiers? +2. What are the rate limits and quotas? +3. Which models are best for which use cases? +4. How to optimize context windows? +5. What is the cost per token breakdown? + +** To Research Further + +| Provider | Free Tier | Paid Tier | Best For | +|----------|-----------|-----------|----------| +| Google Gemini | 300K tokens/day | Pay per use? | General, coding | +| OpenRouter | Varies by model | Per-request | Routing, variety | +| OpenAI | ? | ? | GPT-4 quality | +| Anthropic | ? | ? | Claude capabilities | +| Mistral | ? | ? | Open weights | +| Local | Hardware cost | Free | Privacy, control | + +** Token Optimization Strategies to Explore + +1. *Tiered Model Usage* + - Simple tasks: Fast/cheap models + - Complex tasks: Stronger models + - Fallback: Lower tier if higher fails + +2. *Context Compression* + - Summarize long contexts + - Use RAG instead of full context + - Prune old conversation + +3. *Caching* + - Cache common responses + - Reuse embeddings + - Batch requests + +4. *Hybrid Approach* + - Local models for simple queries + - Cloud APIs for complex tasks + - Manual review for critical outputs + +** X Account Access + +*Pending:* X account access via Google login +*Blocker:* Requires OTP from user per security rule (SOUL.md) +*Action needed:* User provides OTP, I complete OAuth, access bookmarks +** budget-50.org +#+TITLE: Token Optimization - $50 Monthly Budget +#+author: Amero Garcia +#+created: [2026-03-16 Mon 14:28] +#+DATE: 2026-03-04 +#+FILETAGS: :budget:constraints:optimization + +* Budget: $50/Month + +** Budget Breakdown + +| Tier | Provider | Allocation | Tokens Est. | Use Case | +|------|----------|-----------|-------------|----------| +| FREE | Google Gemini | $0 | ~9M/month | 90% of work | +| CHEAP | OpenRouter | $20 | ~6M tokens | Fallback, complex tasks | +| PREMIUM | Claude/GPT-4o | $25 | ~500K tokens | Critical decisions | +| BUFFER | Various | $5 | Emergency | Overruns, testing | + +** Daily Free Allowance + +- *Google Gemini:* 300K tokens/day = 9M/month = *$0* +- This covers 90-95% of expected workload + +** Paid Tier Allocation ($45) + +- *$20 → OpenRouter* (Qwen, Mistral, Llama) + - ~6M tokens at $0.003/1K + - Use when: Gemini rate limited, need different model + +- *$25 → Premium models* (Claude, GPT-4o) + - ~500K tokens at $0.05/1K average + - Use when: Architecture decisions, critical code review, final validation + +- *$5 → Buffer* + - Handle overruns + - Emergency access + - Testing new models + +** Hard Limits + +| Provider | Monthly Cap | Alert At | +|----------|-------------|----------| +| OpenRouter | $20 | $16 (80%) | +| Premium | $25 | $20 (80%) | +| Total | $50 | $45 (90%) | + +** Daily Tracking + +Target: *Monitor consumption every session* + +``` +IF daily_cost > $1.50: + → Switch to Gemini only + → Defer premium tasks + +IF weekly_cost > $12: + → Review usage patterns + → Find optimization opportunities +``` + +** Emergency Protocol + +If approaching $50 limit before month end: +1. Halt all paid API calls +2. Switch to Gemini-only mode +3. Queue premium tasks for next month +4. Consider local inference setup + +** Cost-Per-Task Guidelines + +| Task Type | Max Cost | Preferred Model | +|-----------|----------|-----------------| +| Quick lookup | $0.00 | Gemini | +| Code review | $0.01 | Gemini/OpenRouter | +| Feature design | $0.05 | OpenRouter | +| Architecture review | $0.10 | Claude/GPT-4o | +| Emergency debug | $0.20 | Best available | + +** Optimization Imperative + +With $50/month, waste is not affordable: +- ❌ No speculative queries +- ❌ No "just curious" premium calls +- ❌ No repeated similar prompts +- ✅ Always use Gemini first +- ✅ Batch similar requests +- ✅ Cache embeddings locally +- ✅ Summarize long contexts + +** Monthly Review + +1. Compare actual vs. projected usage +2. Adjust model routing rules +3. Identify expensive query patterns +4. Plan next month's allocation + +** Break-Even Analysis + +At $50/month = $600/year: +- *Option A:* Continue APIs (flexible, managed) +- *Option B:* Local inference (~$800 hardware, $0 ongoing) + - Break-even: 16 months + - Risk: Hardware failure, maintenance + +*Recommendation:* Stick with APIs until $100+/month, then evaluate hardware. + +** Questions for Human Partner + +1. Is $50 firm or flexible in emergencies? +2. What happens if we hit limit mid-critical-task? +3. Preference for which premium model? (Claude vs GPT-4 vs both) +4. Should I track and report costs per project? +5. Any tasks that are "unlimited budget" critical? +** README.org +#+TITLE: Token Optimization +#+AUTHOR: Amr +#+CREATED: [2026-03-17 Tue] +#+BEGIN_COMMENT +Cost-effective LLM usage through smart routing, context compression, and multi-provider strategies. +#+END_COMMENT + +* Token Optimization + +Strategy and implementation for minimizing LLM costs while maintaining quality. + +* Project Tasks + +See the actionable tasks for this project in [[file:../../gtd.org::*Token Optimization][GTD.org > Projects > Token Optimization]] + +* Key Documents + +- [[file:plan.org][Optimization Plan]] +- [[file:token-optimization.yaml][Configuration]] + +* Current Focus + +- Multi-provider setup (Gemini primary, OpenRouter fallback) +- Usage tracking and budget alerts +- Smart routing by task type +- Context compression techniques +** quick-start.org +#+TITLE: Token Optimization - Quick Start +#+author: Amero Garcia +#+created: [2026-03-16 Mon 14:28] +#+DATE: 2026-03-04 + +* Quick Reference for Daily Use + +** Rule of Thumb + +| What you need | Use this | Cost | +|---------------|----------|------| +| Quick answer, formatting, lookup | Gemini Flash | FREE | +| Code review, analysis | Gemini Pro | FREE | +| Complex problem solving | Claude Haiku / Qwen | $ | +| Critical architecture decision | GPT-4o | $$ | + +** Free Tier Limits (Daily) + +| Provider | Tokens | Requests | Reset | +|----------|--------|----------|-------| +| Google AI Studio | 300,000 | 60/min | Daily | +| OpenRouter Free | Varies | Limited | - | + +** Current Recommendation + +→ *Use Google Gemini exclusively* until hitting 250K tokens/day +→ Then add OpenRouter fallback +→ Only use GPT-4 for final reviews + +** This will reduce token costs by ~90% + +** Next Steps + +1. Configure Gemini as primary (already partially done) +2. Add quota tracking +3. Set alerts at 80% of free limits +4. Implement tiered routing + +** Savings Potential: $100-500/month → $10-50/month +** plan.org +#+TITLE: Token Optimization Strategy +#+author: Amero Garcia +#+created: [2026-03-16 Mon 14:28] +#+DATE: 2026-03-04 +#+FILETAGS: :strategy:token:optimization:cost + +* Executive Summary + +** Goal: Minimize inference costs while maximizing capability + +Current approach: Single default model → Multi-tier, multi-provider strategy + +* Three-Tier Model Strategy + +** Tier 1: Fast/Cheap (80% of queries) +- *Purpose:* Simple tasks, formatting, lookups +- *Models:* Google Gemini Flash, Local models +- *Cost:* $0-0.000001 per 1K tokens +- *Speed:* Fastest + +** Tier 2: Balanced (18% of queries) +- *Purpose:* Complex reasoning, code generation, analysis +- *Models:* Gemini Pro, Claude Haiku, Llama 3 70B +- *Cost:* $0.0001-0.003 per 1K tokens +- *Speed:* Medium + +** Tier 3: High-Performance (2% of queries) +- *Purpose:* Critical decisions, complex architecture, final review +- *Models:* GPT-4, Claude Opus, Gemini Ultra +- *Cost:* $0.01-0.03 per 1K tokens +- *Speed:* Slower + +* Provider Analysis + +** Google AI Studio (Primary Recommended) + +| Model | Free Tier | Rate Limit | Best For | +|-------|-----------|------------|----------| +| Gemini 2.0 Flash | 300K tokens/day | 60 req/min | Quick tasks, coding | +| Gemini 1.5 Flash | 300K tokens/day | 60 req/min | Fast responses | +| Gemini 1.5 Pro | 300K tokens/day | 60 req/min | Complex tasks | + +*Cost: FREE (within limits)* + +** OpenRouter.Aggregated (Secondary) + +| Model | Price/1K tokens | Context | Reliability | +|-------|-----------------|---------|-------------| +| Qwen 3 235B | $0.0001-0.0003 | 128K | High | +| Mistral Large | $0.002-0.006 | 128K | High | +| Llama 4 405B | $0.0002-0.0005 | 128K | Medium | +| Free tier models | $0 | Varies | Variable | + +** OpenAI (Tier 3 only) +- GPT-4: $0.03/1K tokens (expensive) +- GPT-4o: $0.005/1K tokens (better value) +- Use sparingly for critical tasks only + +** Local Inference (Long-term goal) +- Hardware: $1000-5000 initial investment +- Ongoing: $0 (electricity only) +- Models: Llama 3, Mistral, DeepSeek +- Best for: High-volume, privacy-sensitive work + +* Context Optimization Strategies + +** 1. Context Windows by Task Type + +| Task Type | Optimal Context | Compression | Savings | +|-----------|-----------------|-------------|---------| +| Code review | 4K-8K | Truncate old files | 50% | +| Documentation | 8K-16K | Summarize sections | 30% | +| Research | 16K-32K | Chunk + RAG | 70% | +| Architecture | 32K-128K | Maintain full | 0% | + +** 2. Conversation Pruning +- Remove "thinking" blocks from history +- Summarize conversation every 10 turns +- Archive old sessions to external storage + +** 3. RAG vs. Full Context +- *Rule:* < 5K tokens of context → Full +- *Rule:* > 10K tokens of context → Use embeddings/RAG +- *Savings:* 60-80% on large document tasks + +* Request Optimization + +** Batching Strategy +- Group similar requests (3-5 per batch) +- Same model, same parameters +- Shared overhead costs + +** Caching Strategy +- Cache embeddings for repeated contexts +- Store common completions (templates) +- Reuse code snippet suggestions + +** Streaming vs. Non-Stream +- *Streaming:* Better UX, but higher token overhead +- *Non-stream:* More efficient for programmatic use +- *Recommendation:* Non-stream for background tasks + +* Smart Routing Rules + +** Automatic Selection Logic + +``` +IF task_type == "simple_lookup" OR "formatting": + → Gemini Flash (free) + +ELIF task_type == "code_generation" AND complexity < 3: + → Gemini Pro (free tier) + +ELIF task_type == "complex_reasoning" OR "architecture": + → Claude Sonnet or GPT-4o + +ELIF task_type == "final_review" OR "critical_decision": + → GPT-4 or Claude Opus +``` + +** Fallback Chain +1. Try Gemini (free) +2. If rate limited → OpenRouter (cheap) +3. If quality insufficient → GPT-4o +4. If critical failure → GPT-4 + +* Concrete Implementation + +** Config Structure (openclaw.json) + +```json +{ + "models": { + "defaults": { + "primary": "google-gemini-cli/gemini-2.0-flash", + "fallbacks": [ + "openrouter/qwen/qwen3-235b-a22b", + "google-gemini-cli/gemini-1.5-pro", + "openai/gpt-4o" + ] + }, + "providers": { + "google-gemini-cli": { + "freeTier": true, + "dailyLimit": 300000, + "rateLimit": 60 + }, + "openrouter": { + "freeTierModels": ["openrouter/auto"], + "budgetLimit": 500 + }, + "openai": { + "budgetLimit": 200, + "useFor": ["critical", "architecture"] + } + } + } +} +``` + +** Monitoring & Alerts + +- Track daily token usage per provider +- Alert at 80% of free tier limits +- Monthly budget review and adjustment + +* Cost Projections + +** Current Unknown Usage → Optimized + +| Scenario | Monthly Tokens | Current Cost | Optimized Cost | Savings | +|----------|---------------|--------------|----------------|---------| +| Light (< 1M) | 1M | $50-100 | $0-10 | 90% | +| Medium (1-5M) | 3M | $200-500 | $20-100 | 80% | +| Heavy (5-20M) | 10M | $1000-3000 | $200-500 | 80% | + +* Immediate Actions + +** Week 1: Setup +- Configure Gemini as primary provider +- Set up OpenRouter fallback +- Implement basic usage tracking +- Document current baseline + +** Week 2: Implement +- Add smart routing logic +- Implement context compression +- Set up budget alerts +- A/B test model choices + +** Week 3: Optimize +- Analyze usage patterns +- Fine-tune routing rules +- Tune context windows +- Document findings + +** Week 4: Scale +- Full multi-provider setup +- Implement full caching +- Maximize free tier usage +- Plan for paid tiers if needed + +* Long-term: Local Inference Path + +** Minimum Viable Setup +- Hardware: RTX 4090 or Apple Silicon M3 Max +- Software: Ollama + OpenClaw integration +- Cost: ~$2000-4000 one-time +- Break-even: 3-6 months vs. API costs + +** Full Self-Hosted +- Hardware: Dual RTX 4090 or 2x Mac Studio +- Models: Llama 3 70B, Mixtral 8x22B +- Cost: ~$8000-12000 +- For: Privacy, unlimited inference, control diff --git a/src/core.lisp b/src/core.lisp index cdbaf5a..c609451 100644 --- a/src/core.lisp +++ b/src/core.lisp @@ -169,7 +169,8 @@ (setf current-signal (decide-gate current-signal)) (setf current-signal (dispatch-gate current-signal))) (error (c) - (kernel-log "PIPELINE CRASH: ~a" c) + (kernel-log "PIPELINE CRASH: ~a - Initiating Micro-Rollback." c) + (rollback-object-store 0) (let ((sensor (ignore-errors (getf (getf current-signal :payload) :sensor)))) (if (or (> depth 2) (member sensor '(:loop-error :tool-error))) (setf current-signal nil) diff --git a/src/package.lisp b/src/package.lisp index 1c5c73a..85501e6 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -62,6 +62,7 @@ #:load-skill-with-timeout #:topological-sort-skills #:validate-lisp-syntax + #:safety-harness-validate #:find-triggered-skill #:defskill #:*skills-registry* diff --git a/src/safety-harness.lisp b/src/safety-harness.lisp new file mode 100644 index 0000000..eaa8f8b --- /dev/null +++ b/src/safety-harness.lisp @@ -0,0 +1,82 @@ +(in-package :org-agent) + +(defparameter *safety-whitelist* + '(;; Math & Logic + + - * / = < > <= >= 1+ 1- min max + and or not null eq eql equal string= string-equal + ;; List Manipulation + list cons car cdr cadr cddr cdar caar append mapcar remove-if remove-if-not + length reverse sort nth nthcdr push pop + ;; Plists and Hash Tables + getf gethash + ;; Control Flow + let let* if cond when unless case typecase + ;; Strings + format concatenate string-downcase string-upcase search + ;; Kernel specifics + org-agent::kernel-log + org-agent::snapshot-object-store + org-agent::rollback-object-store + org-agent::lookup-object + org-agent::list-objects-by-type + org-agent::ingest-ast + org-agent::find-headline-missing-id + org-agent::context-query-store + org-agent::context-get-active-projects + org-agent::context-get-recent-completed-tasks + org-agent::context-list-all-skills + org-agent::context-get-system-logs + org-agent::context-assemble-global-awareness + org-agent::org-object-id + org-agent::org-object-type + org-agent::org-object-attributes + org-agent::org-object-content + org-agent::org-object-parent-id + org-agent::org-object-children + org-agent::org-object-version + org-agent::org-object-last-sync + org-agent::org-object-hash + ;; Essential macros + declare ignore + ;; Let's also add simple data types + t nil quote function)) + +(defun safety-harness-ast-walk (form) + "Recursively walks the Lisp AST. Returns T if safe, NIL if unsafe." + (cond + ;; Self-evaluating objects (strings, numbers, keywords) are safe. + ((or (stringp form) (numberp form) (keywordp form) (characterp form)) + t) + ;; Symbols must be in the whitelist + ((symbolp form) + (if (member form *safety-whitelist* :test #'string-equal) + t + t)) ;; We allow symbols as potential variables + ;; Lists represent function calls or special forms. + ((listp form) + (let ((head (car form))) + (cond + ((eq head 'quote) t) + ((not (symbolp head)) nil) + ((member head *safety-whitelist* :test #'string-equal) + (every #'safety-harness-ast-walk (cdr form))) + (t + (kernel-log "SAFETY HARNESS: Blocked call to non-whitelisted function ~a" head) + nil)))) + (t nil))) + +(defun safety-harness-validate (code-string) + "Parses a code string and validates it against the safety harness." + (handler-case + (let* ((*read-eval* nil) + (form (read-from-string code-string))) + (safety-harness-ast-walk form)) + (error (c) + (kernel-log "SAFETY HARNESS ERROR: Syntax or read error during validation: ~a" c) + nil))) + +(defskill :skill-safety-harness + :priority 90 + :trigger (lambda (ctx) nil) + :neuro nil + :symbolic nil) diff --git a/tests/pipeline-tests.lisp b/tests/pipeline-tests.lisp index 5525bd4..99d0341 100644 --- a/tests/pipeline-tests.lisp +++ b/tests/pipeline-tests.lisp @@ -97,3 +97,32 @@ (let ((awareness (context-assemble-global-awareness))) (is (search "Project Alpha" awareness)) (is (search "proj-1" awareness)))) + +(test test-micro-rollback + "Verify that a pipeline crash triggers an automatic Object Store rollback." + (clrhash org-agent::*object-store*) + (clrhash org-agent::*history-store*) + (setf org-agent::*object-store-snapshots* nil) + + ;; State A + (ingest-ast (list :type :HEADLINE :properties (list :ID "node-1" :TITLE "State A") :contents nil)) + + (setup-mock-skills) + ;; Skill that crashes in Symbolic Gate + (org-agent::defskill :crashing-skill + :priority 200 + :trigger (lambda (ctx) t) + :neuro (lambda (ctx) (list :type :REQUEST :payload (list :action :eval :code "(error \"BOOM\")"))) + :symbolic (lambda (action ctx) (error "CRASH IN SYSTEM 2"))) + + ;; Run pipeline. This turn will: + ;; 1. Perceive (Take snapshot of State A) + ;; 2. Neuro (Think) + ;; 3. Decide (Crash!) + ;; 4. Rollback to State A. + (process-signal (list :type :EVENT :payload (list :sensor :test))) + + ;; Verify that we are still in State A + (let ((obj (lookup-object "node-1"))) + (is (not (null obj))) + (is (equal (getf (org-object-attributes obj) :TITLE) "State A")))) diff --git a/tests/safety-harness-tests.lisp b/tests/safety-harness-tests.lisp new file mode 100644 index 0000000..6822cbe --- /dev/null +++ b/tests/safety-harness-tests.lisp @@ -0,0 +1,22 @@ +(defpackage :org-agent-safety-tests + (:use :cl :fiveam :org-agent) + (:export #:safety-suite)) +(in-package :org-agent-safety-tests) + +(def-suite safety-suite :description "Tests for the Global Safety Harness.") +(in-suite safety-suite) + +(test test-basic-math-safe + (is (org-agent:safety-harness-validate "(+ 1 2)"))) + +(test test-blocked-eval + (is (not (org-agent:safety-harness-validate "(eval '(+ 1 2))")))) + +(test test-blocked-shell + (is (not (org-agent:safety-harness-validate "(uiop:run-program \"ls\")")))) + +(test test-nested-unsafe + (is (not (org-agent:safety-harness-validate "(let ((x 1)) (delete-file \"test.txt\"))")))) + +(test test-safe-kernel-api + (is (org-agent:safety-harness-validate "(org-agent::lookup-object \"node-1\")")))