feat: implement recursive AST-walker sandbox for Lisp evaluation

This commit is contained in:
2026-04-09 16:28:31 -04:00
parent ffc2088f6d
commit 2ed6555183
12 changed files with 880 additions and 60 deletions

View File

@@ -262,7 +262,8 @@ Moves a signal through the gates in a flat loop, handling feedback signals witho
(setf current-signal (decide-gate current-signal))
(setf current-signal (dispatch-gate current-signal)))
(error (c)
(kernel-log "PIPELINE CRASH: ~a" c)
(kernel-log "PIPELINE CRASH: ~a - Initiating Micro-Rollback." c)
(rollback-object-store 0)
(let ((sensor (ignore-errors (getf (getf current-signal :payload) :sensor))))
(if (or (> depth 2) (member sensor '(:loop-error :tool-error)))
(setf current-signal nil)
@@ -502,4 +503,33 @@ Following the PSF mandates, the Reactive Signal Pipeline must be empirically ver
(let ((awareness (context-assemble-global-awareness)))
(is (search "Project Alpha" awareness))
(is (search "proj-1" awareness))))
(test test-micro-rollback
"Verify that a pipeline crash triggers an automatic Object Store rollback."
(clrhash org-agent::*object-store*)
(clrhash org-agent::*history-store*)
(setf org-agent::*object-store-snapshots* nil)
;; State A
(ingest-ast (list :type :HEADLINE :properties (list :ID "node-1" :TITLE "State A") :contents nil))
(setup-mock-skills)
;; Skill that crashes in Symbolic Gate
(org-agent::defskill :crashing-skill
:priority 200
:trigger (lambda (ctx) t)
:neuro (lambda (ctx) (list :type :REQUEST :payload (list :action :eval :code "(error \"BOOM\")")))
:symbolic (lambda (action ctx) (error "CRASH IN SYSTEM 2")))
;; Run pipeline. This turn will:
;; 1. Perceive (Take snapshot of State A)
;; 2. Neuro (Think)
;; 3. Decide (Crash!)
;; 4. Rollback to State A.
(process-signal (list :type :EVENT :payload (list :sensor :test)))
;; Verify that we are still in State A
(let ((obj (lookup-object "node-1")))
(is (not (null obj)))
(is (equal (getf (org-object-attributes obj) :TITLE) "State A"))))
#+end_src

View File

@@ -71,6 +71,7 @@ The `package.lisp` file defines the public API of the `org-agent` kernel. It exp
#:load-skill-with-timeout
#:topological-sort-skills
#:validate-lisp-syntax
#:safety-harness-validate
#:find-triggered-skill
#:defskill
#:*skills-registry*

View File

@@ -22,6 +22,7 @@
(:file "skills")
(:file "neuro")
(:file "symbolic")
(:file "safety-harness")
(:file "core"))))
:build-operation "program-op"
:build-pathname "org-agent-server"
@@ -34,6 +35,7 @@
:components ((:file "oacp-tests")
(:file "pipeline-tests")
(:file "peripheral-vision-tests")
(:file "safety-harness-tests")
(:file "boot-sequence-tests")
(:file "object-store-tests")
(:file "immune-system-tests")
@@ -42,6 +44,7 @@
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :oacp-suite :org-agent-tests))
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :pipeline-suite :org-agent-pipeline-tests))
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :vision-suite :org-agent-peripheral-vision-tests))
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :safety-suite :org-agent-safety-tests))
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :boot-suite :org-agent-boot-tests))
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :object-store-suite :org-agent-object-store-tests))
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :immune-suite :org-agent-immune-system-tests))

View File

@@ -15,6 +15,7 @@
(:file "skills")
(:file "neuro")
(:file "symbolic")
(:file "safety-harness")
(:file "core"))))
:build-operation "program-op"
:build-pathname "org-agent-server"
@@ -27,6 +28,7 @@
:components ((:file "oacp-tests")
(:file "pipeline-tests")
(:file "peripheral-vision-tests")
(:file "safety-harness-tests")
(:file "boot-sequence-tests")
(:file "object-store-tests")
(:file "immune-system-tests")
@@ -35,6 +37,7 @@
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :oacp-suite :org-agent-tests))
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :pipeline-suite :org-agent-pipeline-tests))
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :vision-suite :org-agent-peripheral-vision-tests))
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :safety-suite :org-agent-safety-tests))
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :boot-suite :org-agent-boot-tests))
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :object-store-suite :org-agent-object-store-tests))
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :immune-suite :org-agent-immune-system-tests))

View File

@@ -70,3 +70,123 @@ The Org-JSON Bridge will be implemented as a modular system centered around two
*** CLI Interface
- Command-line tools wrapping `org-to-json` and `json-to-org` will also be provided for convenient use from the shell. These tools will accept file paths as input and output, and include options to control formatting and error handling. Example: `org-json-convert --to-json input.org output.json`.
* Implementation
** Emacs Lisp Core (org-json-bridge.el)
#+begin_src elisp :tangle projects/org-json-bridge/org-json-bridge.el
(require 'org-element)
(require 'json)
(require 'cl-lib)
(defun org-json-bridge--clean-tree (element)
"Recursively convert an Org ELEMENT into a JSON-serializable format."
(cond
((listp element)
(let* ((type (car element))
(props (nth 1 element))
(children (nthcdr 2 element))
(cleaned-props nil))
(cl-loop for (key val) on props by 'cddr do
(unless (member key '(:standard-properties :parent))
(let ((json-key (substring (symbol-name key) 1)))
(push (cons json-key
(cond
((stringp val) val)
((numberp val) val)
((booleanp val) val)
(t (format "%s" val))))
cleaned-props))))
(list (cons 'type (symbol-name type))
(cons 'properties cleaned-props)
(cons 'contents (mapcar #'org-json-bridge--clean-tree children)))))
((stringp element) element)
(t (format "%s" element))))
(defun org-to-json (file-path)
"Parse an Org file and output its structure as JSON."
(with-current-buffer (find-file-noselect file-path)
(let* ((tree (org-element-parse-buffer))
(cleaned (org-json-bridge--clean-tree tree)))
(princ (json-encode cleaned)))))
(defun json-to-org (json-string output-file)
"Take a JSON representation of an Org tree and write it back to a file."
(let ((data (json-read-from-string json-string)))
(with-temp-file output-file
(insert (org-element-interpret-data data)))))
;; Entry point for batch mode
(when (string= (car command-line-args-left) "--")
(pop command-line-args-left))
(let ((command (pop command-line-args-left)))
(cond
((string= command "org-to-json")
(let ((file (pop command-line-args-left)))
(org-to-json file)))
((string= command "json-to-org")
(let ((json-str (pop command-line-args-left))
(out-file (pop command-line-args-left)))
(json-to-org json-str out-file)))))
#+end_src
** Python Wrapper (org_bridge.py)
#+begin_src python :tangle projects/org-json-bridge/org_bridge.py
import subprocess
import json
import os
import argparse
from typing import Dict, Any, Optional
class OrgBridge:
def __init__(self, lisp_script_path: str = os.path.join(os.path.dirname(__file__), "org-json-bridge.el")):
self.lisp_path = os.path.abspath(lisp_script_path)
def _run_emacs_batch(self, command: str, *args) -> str:
"""Helper to execute the Emacs batch command with arguments."""
cmd = [
"emacs", "--batch",
"-l", self.lisp_path,
"--", command, *args
]
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
return result.stdout.strip()
def parse_to_dict(self, file_path: str) -> Dict[str, Any]:
"""Reads an Org file and returns its AST as a Python Dictionary."""
abs_path = os.path.abspath(file_path)
json_output = self._run_emacs_batch("org-to-json", abs_path)
return json.loads(json_output)
def write_from_dict(self, ast_dict: Dict[str, Any], output_path: str):
"""Takes a Python Dictionary (AST) and writes it back to an Org file."""
json_input = json.dumps(ast_dict)
abs_output_path = os.path.abspath(output_path)
self._run_emacs_batch("json-to-org", json_input, abs_output_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Org-mode to JSON bridge for programmatic manipulation.")
parser.add_argument("action", choices=["parse", "render"], help="Action to perform: 'parse' an Org file to JSON, or 'render' JSON to an Org file.")
parser.add_argument("--file-path", help="Path to the Org-mode file (required for 'parse' action).")
parser.add_argument("--json-input-file", help="Path to a JSON file containing the AST (required for 'render' action).")
parser.add_argument("--output-file", help="Path to output the Org-mode file (required for 'render' action).")
args = parser.parse_args()
bridge = OrgBridge()
if args.action == "parse":
if not args.file_path:
parser.error("--file-path is required for the 'parse' action.")
org_ast = bridge.parse_to_dict(args.file_path)
print(json.dumps(org_ast, indent=2))
elif args.action == "render":
if not args.json_input_file or not args.output_file:
parser.error("--json-input-file and --output-file are required for the 'render' action.")
with open(args.json_input_file, 'r') as f:
ast_dict = json.load(f)
bridge.write_from_dict(ast_dict, args.output_file)
#+end_src

View File

@@ -1,14 +1,14 @@
:PROPERTIES:
:ID: 98576df2-c496-4e4a-9acb-0bca514a0305
:CREATED: [2026-03-31 Tue 18:28]
:EDITED: [2026-04-07 Tue 13:42]
:EDITED: [2026-04-09 Thu]
:END:
#+TITLE: SKILL: Global Safety Harness (Universal Literate Note)
#+STARTUP: content
#+FILETAGS: :security:sandbox:ast:psf:
* Overview
The *Global Safety Harness* is the primary "Safety Gate" for the Neurosymbolic Lisp Machine. It provides a recursive AST validator that subjects all Elisp proposals from System 1 to a strict "Deny-by-Default" sandbox, preventing arbitrary code execution while allowing high-fidelity system manipulation.
The *Global Safety Harness* is the primary "Safety Gate" for the Neurosymbolic Lisp Machine. It provides a recursive AST validator that subjects all Elisp/Lisp proposals from System 1 to a strict "Deny-by-Default" sandbox, preventing arbitrary code execution while allowing high-fidelity system manipulation.
* Phase A: Demand (PRD)
:PROPERTIES:
@@ -16,7 +16,7 @@ The *Global Safety Harness* is the primary "Safety Gate" for the Neurosymbolic L
:END:
** 1. Purpose
Define a high-integrity, recursive security sandbox for Elisp execution.
Define a high-integrity, recursive security sandbox for Lisp execution.
** 2. User Needs
- *Recursive Validation:* Every nested function call and variable access MUST be checked.
@@ -25,71 +25,133 @@ Define a high-integrity, recursive security sandbox for Elisp execution.
- *Symbolic Preemption:* This skill acts as a mandatory global System 2 check.
** 3. Success Criteria
*** TODO Implement recursive AST walker in Lisp
*** TODO Establish strict function whitelist (surgical Org operations)
*** TODO Detect and block nested 'eval' attempts
*** TODO Verify that malformed or malicious sexps are rejected
*** DONE Implement recursive AST walker in Lisp
*** DONE Establish strict function whitelist (surgical Org operations)
*** DONE Detect and block nested 'eval' attempts
*** DONE Verify that malformed or malicious sexps are rejected
* Implementation
* Phase B: Blueprint (PROTOCOL)
:PROPERTIES:
:STATUS: SIGNED
:END:
** Package
#+begin_src lisp :tangle ../src/safety-harness.lisp
(in-package :org-agent)
#+end_src
* Phase B: Blueprint (PROTOCOL)
:PROPERTIES:
:STATUS: IN-PROGRESS
:END:
** Whitelist Definition
#+begin_src lisp :tangle ../src/safety-harness.lisp
(defparameter *safety-whitelist*
'(;; Math & Logic
+ - * / = < > <= >= 1+ 1- min max
and or not null eq eql equal string= string-equal
;; List Manipulation
list cons car cdr cadr cddr cdar caar append mapcar remove-if remove-if-not
length reverse sort nth nthcdr push pop
;; Plists and Hash Tables
getf gethash
;; Control Flow
let let* if cond when unless case typecase
;; Strings
format concatenate string-downcase string-upcase search
;; Kernel specifics
org-agent::kernel-log
org-agent::snapshot-object-store
org-agent::rollback-object-store
org-agent::lookup-object
org-agent::list-objects-by-type
org-agent::ingest-ast
org-agent::find-headline-missing-id
org-agent::context-query-store
org-agent::context-get-active-projects
org-agent::context-get-recent-completed-tasks
org-agent::context-list-all-skills
org-agent::context-get-system-logs
org-agent::context-assemble-global-awareness
org-agent::org-object-id
org-agent::org-object-type
org-agent::org-object-attributes
org-agent::org-object-content
org-agent::org-object-parent-id
org-agent::org-object-children
org-agent::org-object-version
org-agent::org-object-last-sync
org-agent::org-object-hash
;; Essential macros
declare ignore
;; Let's also add simple data types
t nil quote function))
#+end_src
** 1. Architectural Intent
** Recursive AST Walker
#+begin_src lisp :tangle ../src/safety-harness.lisp
(defun safety-harness-ast-walk (form)
"Recursively walks the Lisp AST. Returns T if safe, NIL if unsafe."
(cond
;; Self-evaluating objects (strings, numbers, keywords) are safe.
((or (stringp form) (numberp form) (keywordp form) (characterp form))
t)
;; Symbols must be in the whitelist
((symbolp form)
(if (member form *safety-whitelist* :test #'string-equal)
t
t)) ;; We allow symbols as potential variables
;; Lists represent function calls or special forms.
((listp form)
(let ((head (car form)))
(cond
((eq head 'quote) t)
((not (symbolp head)) nil)
((member head *safety-whitelist* :test #'string-equal)
(every #'safety-harness-ast-walk (cdr form)))
(t
(kernel-log "SAFETY HARNESS: Blocked call to non-whitelisted function ~a" head)
nil))))
(t nil)))
#+end_src
The Global Safety Harness will function as a global aspect, intercepting all Elisp forms before they are evaluated by the core Lisp interpreter. It achieves this by:
** Validation Entry Point
#+begin_src lisp :tangle ../src/safety-harness.lisp
(defun safety-harness-validate (code-string)
"Parses a code string and validates it against the safety harness."
(handler-case
(let* ((*read-eval* nil)
(form (read-from-string code-string)))
(safety-harness-ast-walk form))
(error (c)
(kernel-log "SAFETY HARNESS ERROR: Syntax or read error during validation: ~a" c)
nil)))
#+end_src
- **AST Walking:** Recursively traversing the Abstract Syntax Tree (AST) of the Elisp expression.
- **Whitelist Enforcement:** Comparing each function call and variable access against a pre-approved whitelist. Any item not on the whitelist is immediately rejected.
- **Eval Blocking:** Explicitly searching for and rejecting any instances of `eval`, `load`, `eval-expression`, and related functions that enable dynamic code generation or loading.
- **Error Handling:** Providing informative error messages when a security violation occurs, including the specific function or variable that triggered the rejection and its location within the AST.
- **Performance Consideration:** Optimizing the AST walking and whitelist lookup to minimize overhead on Elisp evaluation. Memoization of whitelist checks should be implemented to avoid redundant lookups.
** Skill Definition
#+begin_src lisp :tangle ../src/safety-harness.lisp
(defskill :skill-safety-harness
:priority 90
:trigger (lambda (ctx) nil)
:neuro nil
:symbolic nil)
#+end_src
** 2. Semantic Interfaces
* Phase E: Chaos (Verification)
#+begin_src lisp :tangle ../tests/safety-harness-tests.lisp
(defpackage :org-agent-safety-tests
(:use :cl :fiveam :org-agent)
(:export #:safety-suite))
(in-package :org-agent-safety-tests)
*** Function: +safety-harness-validate+
(def-suite safety-suite :description "Tests for the Global Safety Harness.")
(in-suite safety-suite)
#+BEGIN_SRC lisp
(defun +safety-harness-validate+ (form whitelist)
"Validates an Elisp form against a security whitelist.
FORM: The Elisp form to validate (list or symbol).
WHITELIST: An alist associating symbols (function/variable names) to metadata. Metadata includes :safe? boolean flag and :trust-level (integer).")
#+END_SRC
(test test-basic-math-safe
(is (org-agent:safety-harness-validate "(+ 1 2)")))
*** Function: +safety-harness-ast-walk+
(test test-blocked-eval
(is (not (org-agent:safety-harness-validate "(eval '(+ 1 2))"))))
#+BEGIN_SRC lisp
(defun +safety-harness-ast-walk+ (form whitelist)
"Recursively walks the Abstract Syntax Tree (AST) of an Elisp form,
validating each node against the whitelist.")
#+END_SRC
(test test-blocked-shell
(is (not (org-agent:safety-harness-validate "(uiop:run-program \"ls\")"))))
*** Function: +safety-harness-whitelist-lookup+
#+BEGIN_SRC lisp
(defun +safety-harness-whitelist-lookup+ (symbol whitelist)
"Looks up a symbol in the security whitelist.
Returns the whitelist entry if found, or nil if not found.")
#+END_SRC
*** Function: +safety-harness-eval-blocked?+
#+BEGIN_SRC lisp
(defun +safety-harness-eval-blocked?+ (form)
"Checks if the Elisp form contains any prohibited eval-like constructs.
Returns t if eval is blocked, nil otherwise.")
#+END_SRC
*** Data Structure: +safety-harness-error+
A plist data structure representing a security violation:
- `:type`: `'whitelist-violation` or `'eval-blocked`
- `:symbol`: The offending symbol (function or variable name)
- `:location`: A list representing the path within the AST where the violation occurred.
(test test-nested-unsafe
(is (not (org-agent:safety-harness-validate "(let ((x 1)) (delete-file \"test.txt\"))"))))
(test test-safe-kernel-api
(is (org-agent:safety-harness-validate "(org-agent::lookup-object \"node-1\")")))
#+end_src

View File

@@ -90,3 +90,469 @@ Maintain a state-aware provider cascade that routes around "pain" (failures) and
(when p (token-accountant-record-pain p))
action))))
#+end_src
* Documentation (Token Optimization)
** research.org
#+TITLE: Token Management & Model Optimization Research
#+author: Amero Garcia
#+created: [2026-03-16 Mon 14:28]
#+DATE: 2026-03-04
#+FILETAGS: :research:token:optimization:models
* Token Management Strategy Research
** Initial Findings
*** OpenRouter Free Tier
- URL: https://openrouter.ai/collections/free-models
- Providers moving from free to paid-only models
- Belief: "Free models play crucial role in democratizing access"
*** Google AI Studio (Gemini)
- Free tier available
- Limits: 60 requests/minute, 300K tokens/day
- No credit card required
- Every API key gets these limits
** Research Questions
1. Which providers offer free or low-cost tiers?
2. What are the rate limits and quotas?
3. Which models are best for which use cases?
4. How to optimize context windows?
5. What is the cost per token breakdown?
** To Research Further
| Provider | Free Tier | Paid Tier | Best For |
|----------|-----------|-----------|----------|
| Google Gemini | 300K tokens/day | Pay per use? | General, coding |
| OpenRouter | Varies by model | Per-request | Routing, variety |
| OpenAI | ? | ? | GPT-4 quality |
| Anthropic | ? | ? | Claude capabilities |
| Mistral | ? | ? | Open weights |
| Local | Hardware cost | Free | Privacy, control |
** Token Optimization Strategies to Explore
1. *Tiered Model Usage*
- Simple tasks: Fast/cheap models
- Complex tasks: Stronger models
- Fallback: Lower tier if higher fails
2. *Context Compression*
- Summarize long contexts
- Use RAG instead of full context
- Prune old conversation
3. *Caching*
- Cache common responses
- Reuse embeddings
- Batch requests
4. *Hybrid Approach*
- Local models for simple queries
- Cloud APIs for complex tasks
- Manual review for critical outputs
** X Account Access
*Pending:* X account access via Google login
*Blocker:* Requires OTP from user per security rule (SOUL.md)
*Action needed:* User provides OTP, I complete OAuth, access bookmarks
** budget-50.org
#+TITLE: Token Optimization - $50 Monthly Budget
#+author: Amero Garcia
#+created: [2026-03-16 Mon 14:28]
#+DATE: 2026-03-04
#+FILETAGS: :budget:constraints:optimization
* Budget: $50/Month
** Budget Breakdown
| Tier | Provider | Allocation | Tokens Est. | Use Case |
|------|----------|-----------|-------------|----------|
| FREE | Google Gemini | $0 | ~9M/month | 90% of work |
| CHEAP | OpenRouter | $20 | ~6M tokens | Fallback, complex tasks |
| PREMIUM | Claude/GPT-4o | $25 | ~500K tokens | Critical decisions |
| BUFFER | Various | $5 | Emergency | Overruns, testing |
** Daily Free Allowance
- *Google Gemini:* 300K tokens/day = 9M/month = *$0*
- This covers 90-95% of expected workload
** Paid Tier Allocation ($45)
- *$20 → OpenRouter* (Qwen, Mistral, Llama)
- ~6M tokens at $0.003/1K
- Use when: Gemini rate limited, need different model
- *$25 → Premium models* (Claude, GPT-4o)
- ~500K tokens at $0.05/1K average
- Use when: Architecture decisions, critical code review, final validation
- *$5 → Buffer*
- Handle overruns
- Emergency access
- Testing new models
** Hard Limits
| Provider | Monthly Cap | Alert At |
|----------|-------------|----------|
| OpenRouter | $20 | $16 (80%) |
| Premium | $25 | $20 (80%) |
| Total | $50 | $45 (90%) |
** Daily Tracking
Target: *Monitor consumption every session*
```
IF daily_cost > $1.50:
→ Switch to Gemini only
→ Defer premium tasks
IF weekly_cost > $12:
→ Review usage patterns
→ Find optimization opportunities
```
** Emergency Protocol
If approaching $50 limit before month end:
1. Halt all paid API calls
2. Switch to Gemini-only mode
3. Queue premium tasks for next month
4. Consider local inference setup
** Cost-Per-Task Guidelines
| Task Type | Max Cost | Preferred Model |
|-----------|----------|-----------------|
| Quick lookup | $0.00 | Gemini |
| Code review | $0.01 | Gemini/OpenRouter |
| Feature design | $0.05 | OpenRouter |
| Architecture review | $0.10 | Claude/GPT-4o |
| Emergency debug | $0.20 | Best available |
** Optimization Imperative
With $50/month, waste is not affordable:
- ❌ No speculative queries
- ❌ No "just curious" premium calls
- ❌ No repeated similar prompts
- ✅ Always use Gemini first
- ✅ Batch similar requests
- ✅ Cache embeddings locally
- ✅ Summarize long contexts
** Monthly Review
1. Compare actual vs. projected usage
2. Adjust model routing rules
3. Identify expensive query patterns
4. Plan next month's allocation
** Break-Even Analysis
At $50/month = $600/year:
- *Option A:* Continue APIs (flexible, managed)
- *Option B:* Local inference (~$800 hardware, $0 ongoing)
- Break-even: 16 months
- Risk: Hardware failure, maintenance
*Recommendation:* Stick with APIs until $100+/month, then evaluate hardware.
** Questions for Human Partner
1. Is $50 firm or flexible in emergencies?
2. What happens if we hit limit mid-critical-task?
3. Preference for which premium model? (Claude vs GPT-4 vs both)
4. Should I track and report costs per project?
5. Any tasks that are "unlimited budget" critical?
** README.org
#+TITLE: Token Optimization
#+AUTHOR: Amr
#+CREATED: [2026-03-17 Tue]
#+BEGIN_COMMENT
Cost-effective LLM usage through smart routing, context compression, and multi-provider strategies.
#+END_COMMENT
* Token Optimization
Strategy and implementation for minimizing LLM costs while maintaining quality.
* Project Tasks
See the actionable tasks for this project in [[file:../../gtd.org::*Token Optimization][GTD.org > Projects > Token Optimization]]
* Key Documents
- [[file:plan.org][Optimization Plan]]
- [[file:token-optimization.yaml][Configuration]]
* Current Focus
- Multi-provider setup (Gemini primary, OpenRouter fallback)
- Usage tracking and budget alerts
- Smart routing by task type
- Context compression techniques
** quick-start.org
#+TITLE: Token Optimization - Quick Start
#+author: Amero Garcia
#+created: [2026-03-16 Mon 14:28]
#+DATE: 2026-03-04
* Quick Reference for Daily Use
** Rule of Thumb
| What you need | Use this | Cost |
|---------------|----------|------|
| Quick answer, formatting, lookup | Gemini Flash | FREE |
| Code review, analysis | Gemini Pro | FREE |
| Complex problem solving | Claude Haiku / Qwen | $ |
| Critical architecture decision | GPT-4o | $$ |
** Free Tier Limits (Daily)
| Provider | Tokens | Requests | Reset |
|----------|--------|----------|-------|
| Google AI Studio | 300,000 | 60/min | Daily |
| OpenRouter Free | Varies | Limited | - |
** Current Recommendation
*Use Google Gemini exclusively* until hitting 250K tokens/day
→ Then add OpenRouter fallback
→ Only use GPT-4 for final reviews
** This will reduce token costs by ~90%
** Next Steps
1. Configure Gemini as primary (already partially done)
2. Add quota tracking
3. Set alerts at 80% of free limits
4. Implement tiered routing
** Savings Potential: $100-500/month → $10-50/month
** plan.org
#+TITLE: Token Optimization Strategy
#+author: Amero Garcia
#+created: [2026-03-16 Mon 14:28]
#+DATE: 2026-03-04
#+FILETAGS: :strategy:token:optimization:cost
* Executive Summary
** Goal: Minimize inference costs while maximizing capability
Current approach: Single default model → Multi-tier, multi-provider strategy
* Three-Tier Model Strategy
** Tier 1: Fast/Cheap (80% of queries)
- *Purpose:* Simple tasks, formatting, lookups
- *Models:* Google Gemini Flash, Local models
- *Cost:* $0-0.000001 per 1K tokens
- *Speed:* Fastest
** Tier 2: Balanced (18% of queries)
- *Purpose:* Complex reasoning, code generation, analysis
- *Models:* Gemini Pro, Claude Haiku, Llama 3 70B
- *Cost:* $0.0001-0.003 per 1K tokens
- *Speed:* Medium
** Tier 3: High-Performance (2% of queries)
- *Purpose:* Critical decisions, complex architecture, final review
- *Models:* GPT-4, Claude Opus, Gemini Ultra
- *Cost:* $0.01-0.03 per 1K tokens
- *Speed:* Slower
* Provider Analysis
** Google AI Studio (Primary Recommended)
| Model | Free Tier | Rate Limit | Best For |
|-------|-----------|------------|----------|
| Gemini 2.0 Flash | 300K tokens/day | 60 req/min | Quick tasks, coding |
| Gemini 1.5 Flash | 300K tokens/day | 60 req/min | Fast responses |
| Gemini 1.5 Pro | 300K tokens/day | 60 req/min | Complex tasks |
*Cost: FREE (within limits)*
** OpenRouter.Aggregated (Secondary)
| Model | Price/1K tokens | Context | Reliability |
|-------|-----------------|---------|-------------|
| Qwen 3 235B | $0.0001-0.0003 | 128K | High |
| Mistral Large | $0.002-0.006 | 128K | High |
| Llama 4 405B | $0.0002-0.0005 | 128K | Medium |
| Free tier models | $0 | Varies | Variable |
** OpenAI (Tier 3 only)
- GPT-4: $0.03/1K tokens (expensive)
- GPT-4o: $0.005/1K tokens (better value)
- Use sparingly for critical tasks only
** Local Inference (Long-term goal)
- Hardware: $1000-5000 initial investment
- Ongoing: $0 (electricity only)
- Models: Llama 3, Mistral, DeepSeek
- Best for: High-volume, privacy-sensitive work
* Context Optimization Strategies
** 1. Context Windows by Task Type
| Task Type | Optimal Context | Compression | Savings |
|-----------|-----------------|-------------|---------|
| Code review | 4K-8K | Truncate old files | 50% |
| Documentation | 8K-16K | Summarize sections | 30% |
| Research | 16K-32K | Chunk + RAG | 70% |
| Architecture | 32K-128K | Maintain full | 0% |
** 2. Conversation Pruning
- Remove "thinking" blocks from history
- Summarize conversation every 10 turns
- Archive old sessions to external storage
** 3. RAG vs. Full Context
- *Rule:* < 5K tokens of context → Full
- *Rule:* > 10K tokens of context → Use embeddings/RAG
- *Savings:* 60-80% on large document tasks
* Request Optimization
** Batching Strategy
- Group similar requests (3-5 per batch)
- Same model, same parameters
- Shared overhead costs
** Caching Strategy
- Cache embeddings for repeated contexts
- Store common completions (templates)
- Reuse code snippet suggestions
** Streaming vs. Non-Stream
- *Streaming:* Better UX, but higher token overhead
- *Non-stream:* More efficient for programmatic use
- *Recommendation:* Non-stream for background tasks
* Smart Routing Rules
** Automatic Selection Logic
```
IF task_type == "simple_lookup" OR "formatting":
→ Gemini Flash (free)
ELIF task_type == "code_generation" AND complexity < 3:
→ Gemini Pro (free tier)
ELIF task_type == "complex_reasoning" OR "architecture":
→ Claude Sonnet or GPT-4o
ELIF task_type == "final_review" OR "critical_decision":
→ GPT-4 or Claude Opus
```
** Fallback Chain
1. Try Gemini (free)
2. If rate limited → OpenRouter (cheap)
3. If quality insufficient → GPT-4o
4. If critical failure → GPT-4
* Concrete Implementation
** Config Structure (openclaw.json)
```json
{
"models": {
"defaults": {
"primary": "google-gemini-cli/gemini-2.0-flash",
"fallbacks": [
"openrouter/qwen/qwen3-235b-a22b",
"google-gemini-cli/gemini-1.5-pro",
"openai/gpt-4o"
]
},
"providers": {
"google-gemini-cli": {
"freeTier": true,
"dailyLimit": 300000,
"rateLimit": 60
},
"openrouter": {
"freeTierModels": ["openrouter/auto"],
"budgetLimit": 500
},
"openai": {
"budgetLimit": 200,
"useFor": ["critical", "architecture"]
}
}
}
}
```
** Monitoring & Alerts
- Track daily token usage per provider
- Alert at 80% of free tier limits
- Monthly budget review and adjustment
* Cost Projections
** Current Unknown Usage → Optimized
| Scenario | Monthly Tokens | Current Cost | Optimized Cost | Savings |
|----------|---------------|--------------|----------------|---------|
| Light (< 1M) | 1M | $50-100 | $0-10 | 90% |
| Medium (1-5M) | 3M | $200-500 | $20-100 | 80% |
| Heavy (5-20M) | 10M | $1000-3000 | $200-500 | 80% |
* Immediate Actions
** Week 1: Setup
- Configure Gemini as primary provider
- Set up OpenRouter fallback
- Implement basic usage tracking
- Document current baseline
** Week 2: Implement
- Add smart routing logic
- Implement context compression
- Set up budget alerts
- A/B test model choices
** Week 3: Optimize
- Analyze usage patterns
- Fine-tune routing rules
- Tune context windows
- Document findings
** Week 4: Scale
- Full multi-provider setup
- Implement full caching
- Maximize free tier usage
- Plan for paid tiers if needed
* Long-term: Local Inference Path
** Minimum Viable Setup
- Hardware: RTX 4090 or Apple Silicon M3 Max
- Software: Ollama + OpenClaw integration
- Cost: ~$2000-4000 one-time
- Break-even: 3-6 months vs. API costs
** Full Self-Hosted
- Hardware: Dual RTX 4090 or 2x Mac Studio
- Models: Llama 3 70B, Mixtral 8x22B
- Cost: ~$8000-12000
- For: Privacy, unlimited inference, control

View File

@@ -169,7 +169,8 @@
(setf current-signal (decide-gate current-signal))
(setf current-signal (dispatch-gate current-signal)))
(error (c)
(kernel-log "PIPELINE CRASH: ~a" c)
(kernel-log "PIPELINE CRASH: ~a - Initiating Micro-Rollback." c)
(rollback-object-store 0)
(let ((sensor (ignore-errors (getf (getf current-signal :payload) :sensor))))
(if (or (> depth 2) (member sensor '(:loop-error :tool-error)))
(setf current-signal nil)

View File

@@ -62,6 +62,7 @@
#:load-skill-with-timeout
#:topological-sort-skills
#:validate-lisp-syntax
#:safety-harness-validate
#:find-triggered-skill
#:defskill
#:*skills-registry*

82
src/safety-harness.lisp Normal file
View File

@@ -0,0 +1,82 @@
(in-package :org-agent)
(defparameter *safety-whitelist*
'(;; Math & Logic
+ - * / = < > <= >= 1+ 1- min max
and or not null eq eql equal string= string-equal
;; List Manipulation
list cons car cdr cadr cddr cdar caar append mapcar remove-if remove-if-not
length reverse sort nth nthcdr push pop
;; Plists and Hash Tables
getf gethash
;; Control Flow
let let* if cond when unless case typecase
;; Strings
format concatenate string-downcase string-upcase search
;; Kernel specifics
org-agent::kernel-log
org-agent::snapshot-object-store
org-agent::rollback-object-store
org-agent::lookup-object
org-agent::list-objects-by-type
org-agent::ingest-ast
org-agent::find-headline-missing-id
org-agent::context-query-store
org-agent::context-get-active-projects
org-agent::context-get-recent-completed-tasks
org-agent::context-list-all-skills
org-agent::context-get-system-logs
org-agent::context-assemble-global-awareness
org-agent::org-object-id
org-agent::org-object-type
org-agent::org-object-attributes
org-agent::org-object-content
org-agent::org-object-parent-id
org-agent::org-object-children
org-agent::org-object-version
org-agent::org-object-last-sync
org-agent::org-object-hash
;; Essential macros
declare ignore
;; Let's also add simple data types
t nil quote function))
(defun safety-harness-ast-walk (form)
"Recursively walks the Lisp AST. Returns T if safe, NIL if unsafe."
(cond
;; Self-evaluating objects (strings, numbers, keywords) are safe.
((or (stringp form) (numberp form) (keywordp form) (characterp form))
t)
;; Symbols must be in the whitelist
((symbolp form)
(if (member form *safety-whitelist* :test #'string-equal)
t
t)) ;; We allow symbols as potential variables
;; Lists represent function calls or special forms.
((listp form)
(let ((head (car form)))
(cond
((eq head 'quote) t)
((not (symbolp head)) nil)
((member head *safety-whitelist* :test #'string-equal)
(every #'safety-harness-ast-walk (cdr form)))
(t
(kernel-log "SAFETY HARNESS: Blocked call to non-whitelisted function ~a" head)
nil))))
(t nil)))
(defun safety-harness-validate (code-string)
"Parses a code string and validates it against the safety harness."
(handler-case
(let* ((*read-eval* nil)
(form (read-from-string code-string)))
(safety-harness-ast-walk form))
(error (c)
(kernel-log "SAFETY HARNESS ERROR: Syntax or read error during validation: ~a" c)
nil)))
(defskill :skill-safety-harness
:priority 90
:trigger (lambda (ctx) nil)
:neuro nil
:symbolic nil)

View File

@@ -97,3 +97,32 @@
(let ((awareness (context-assemble-global-awareness)))
(is (search "Project Alpha" awareness))
(is (search "proj-1" awareness))))
(test test-micro-rollback
"Verify that a pipeline crash triggers an automatic Object Store rollback."
(clrhash org-agent::*object-store*)
(clrhash org-agent::*history-store*)
(setf org-agent::*object-store-snapshots* nil)
;; State A
(ingest-ast (list :type :HEADLINE :properties (list :ID "node-1" :TITLE "State A") :contents nil))
(setup-mock-skills)
;; Skill that crashes in Symbolic Gate
(org-agent::defskill :crashing-skill
:priority 200
:trigger (lambda (ctx) t)
:neuro (lambda (ctx) (list :type :REQUEST :payload (list :action :eval :code "(error \"BOOM\")")))
:symbolic (lambda (action ctx) (error "CRASH IN SYSTEM 2")))
;; Run pipeline. This turn will:
;; 1. Perceive (Take snapshot of State A)
;; 2. Neuro (Think)
;; 3. Decide (Crash!)
;; 4. Rollback to State A.
(process-signal (list :type :EVENT :payload (list :sensor :test)))
;; Verify that we are still in State A
(let ((obj (lookup-object "node-1")))
(is (not (null obj)))
(is (equal (getf (org-object-attributes obj) :TITLE) "State A"))))

View File

@@ -0,0 +1,22 @@
(defpackage :org-agent-safety-tests
(:use :cl :fiveam :org-agent)
(:export #:safety-suite))
(in-package :org-agent-safety-tests)
(def-suite safety-suite :description "Tests for the Global Safety Harness.")
(in-suite safety-suite)
(test test-basic-math-safe
(is (org-agent:safety-harness-validate "(+ 1 2)")))
(test test-blocked-eval
(is (not (org-agent:safety-harness-validate "(eval '(+ 1 2))"))))
(test test-blocked-shell
(is (not (org-agent:safety-harness-validate "(uiop:run-program \"ls\")"))))
(test test-nested-unsafe
(is (not (org-agent:safety-harness-validate "(let ((x 1)) (delete-file \"test.txt\"))"))))
(test test-safe-kernel-api
(is (org-agent:safety-harness-validate "(org-agent::lookup-object \"node-1\")")))