feat: implement recursive AST-walker sandbox for Lisp evaluation
This commit is contained in:
@@ -262,7 +262,8 @@ Moves a signal through the gates in a flat loop, handling feedback signals witho
|
||||
(setf current-signal (decide-gate current-signal))
|
||||
(setf current-signal (dispatch-gate current-signal)))
|
||||
(error (c)
|
||||
(kernel-log "PIPELINE CRASH: ~a" c)
|
||||
(kernel-log "PIPELINE CRASH: ~a - Initiating Micro-Rollback." c)
|
||||
(rollback-object-store 0)
|
||||
(let ((sensor (ignore-errors (getf (getf current-signal :payload) :sensor))))
|
||||
(if (or (> depth 2) (member sensor '(:loop-error :tool-error)))
|
||||
(setf current-signal nil)
|
||||
@@ -502,4 +503,33 @@ Following the PSF mandates, the Reactive Signal Pipeline must be empirically ver
|
||||
(let ((awareness (context-assemble-global-awareness)))
|
||||
(is (search "Project Alpha" awareness))
|
||||
(is (search "proj-1" awareness))))
|
||||
|
||||
(test test-micro-rollback
|
||||
"Verify that a pipeline crash triggers an automatic Object Store rollback."
|
||||
(clrhash org-agent::*object-store*)
|
||||
(clrhash org-agent::*history-store*)
|
||||
(setf org-agent::*object-store-snapshots* nil)
|
||||
|
||||
;; State A
|
||||
(ingest-ast (list :type :HEADLINE :properties (list :ID "node-1" :TITLE "State A") :contents nil))
|
||||
|
||||
(setup-mock-skills)
|
||||
;; Skill that crashes in Symbolic Gate
|
||||
(org-agent::defskill :crashing-skill
|
||||
:priority 200
|
||||
:trigger (lambda (ctx) t)
|
||||
:neuro (lambda (ctx) (list :type :REQUEST :payload (list :action :eval :code "(error \"BOOM\")")))
|
||||
:symbolic (lambda (action ctx) (error "CRASH IN SYSTEM 2")))
|
||||
|
||||
;; Run pipeline. This turn will:
|
||||
;; 1. Perceive (Take snapshot of State A)
|
||||
;; 2. Neuro (Think)
|
||||
;; 3. Decide (Crash!)
|
||||
;; 4. Rollback to State A.
|
||||
(process-signal (list :type :EVENT :payload (list :sensor :test)))
|
||||
|
||||
;; Verify that we are still in State A
|
||||
(let ((obj (lookup-object "node-1")))
|
||||
(is (not (null obj)))
|
||||
(is (equal (getf (org-object-attributes obj) :TITLE) "State A"))))
|
||||
#+end_src
|
||||
|
||||
@@ -71,6 +71,7 @@ The `package.lisp` file defines the public API of the `org-agent` kernel. It exp
|
||||
#:load-skill-with-timeout
|
||||
#:topological-sort-skills
|
||||
#:validate-lisp-syntax
|
||||
#:safety-harness-validate
|
||||
#:find-triggered-skill
|
||||
#:defskill
|
||||
#:*skills-registry*
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
(:file "skills")
|
||||
(:file "neuro")
|
||||
(:file "symbolic")
|
||||
(:file "safety-harness")
|
||||
(:file "core"))))
|
||||
:build-operation "program-op"
|
||||
:build-pathname "org-agent-server"
|
||||
@@ -34,6 +35,7 @@
|
||||
:components ((:file "oacp-tests")
|
||||
(:file "pipeline-tests")
|
||||
(:file "peripheral-vision-tests")
|
||||
(:file "safety-harness-tests")
|
||||
(:file "boot-sequence-tests")
|
||||
(:file "object-store-tests")
|
||||
(:file "immune-system-tests")
|
||||
@@ -42,6 +44,7 @@
|
||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :oacp-suite :org-agent-tests))
|
||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :pipeline-suite :org-agent-pipeline-tests))
|
||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :vision-suite :org-agent-peripheral-vision-tests))
|
||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :safety-suite :org-agent-safety-tests))
|
||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :boot-suite :org-agent-boot-tests))
|
||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :object-store-suite :org-agent-object-store-tests))
|
||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :immune-suite :org-agent-immune-system-tests))
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
(:file "skills")
|
||||
(:file "neuro")
|
||||
(:file "symbolic")
|
||||
(:file "safety-harness")
|
||||
(:file "core"))))
|
||||
:build-operation "program-op"
|
||||
:build-pathname "org-agent-server"
|
||||
@@ -27,6 +28,7 @@
|
||||
:components ((:file "oacp-tests")
|
||||
(:file "pipeline-tests")
|
||||
(:file "peripheral-vision-tests")
|
||||
(:file "safety-harness-tests")
|
||||
(:file "boot-sequence-tests")
|
||||
(:file "object-store-tests")
|
||||
(:file "immune-system-tests")
|
||||
@@ -35,6 +37,7 @@
|
||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :oacp-suite :org-agent-tests))
|
||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :pipeline-suite :org-agent-pipeline-tests))
|
||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :vision-suite :org-agent-peripheral-vision-tests))
|
||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :safety-suite :org-agent-safety-tests))
|
||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :boot-suite :org-agent-boot-tests))
|
||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :object-store-suite :org-agent-object-store-tests))
|
||||
(uiop:symbol-call :fiveam :run! (uiop:find-symbol* :immune-suite :org-agent-immune-system-tests))
|
||||
|
||||
@@ -70,3 +70,123 @@ The Org-JSON Bridge will be implemented as a modular system centered around two
|
||||
*** CLI Interface
|
||||
|
||||
- Command-line tools wrapping `org-to-json` and `json-to-org` will also be provided for convenient use from the shell. These tools will accept file paths as input and output, and include options to control formatting and error handling. Example: `org-json-convert --to-json input.org output.json`.
|
||||
|
||||
* Implementation
|
||||
|
||||
** Emacs Lisp Core (org-json-bridge.el)
|
||||
#+begin_src elisp :tangle projects/org-json-bridge/org-json-bridge.el
|
||||
(require 'org-element)
|
||||
(require 'json)
|
||||
(require 'cl-lib)
|
||||
|
||||
(defun org-json-bridge--clean-tree (element)
|
||||
"Recursively convert an Org ELEMENT into a JSON-serializable format."
|
||||
(cond
|
||||
((listp element)
|
||||
(let* ((type (car element))
|
||||
(props (nth 1 element))
|
||||
(children (nthcdr 2 element))
|
||||
(cleaned-props nil))
|
||||
|
||||
(cl-loop for (key val) on props by 'cddr do
|
||||
(unless (member key '(:standard-properties :parent))
|
||||
(let ((json-key (substring (symbol-name key) 1)))
|
||||
(push (cons json-key
|
||||
(cond
|
||||
((stringp val) val)
|
||||
((numberp val) val)
|
||||
((booleanp val) val)
|
||||
(t (format "%s" val))))
|
||||
cleaned-props))))
|
||||
|
||||
(list (cons 'type (symbol-name type))
|
||||
(cons 'properties cleaned-props)
|
||||
(cons 'contents (mapcar #'org-json-bridge--clean-tree children)))))
|
||||
((stringp element) element)
|
||||
(t (format "%s" element))))
|
||||
|
||||
(defun org-to-json (file-path)
|
||||
"Parse an Org file and output its structure as JSON."
|
||||
(with-current-buffer (find-file-noselect file-path)
|
||||
(let* ((tree (org-element-parse-buffer))
|
||||
(cleaned (org-json-bridge--clean-tree tree)))
|
||||
(princ (json-encode cleaned)))))
|
||||
|
||||
(defun json-to-org (json-string output-file)
|
||||
"Take a JSON representation of an Org tree and write it back to a file."
|
||||
(let ((data (json-read-from-string json-string)))
|
||||
(with-temp-file output-file
|
||||
(insert (org-element-interpret-data data)))))
|
||||
|
||||
;; Entry point for batch mode
|
||||
(when (string= (car command-line-args-left) "--")
|
||||
(pop command-line-args-left))
|
||||
|
||||
(let ((command (pop command-line-args-left)))
|
||||
(cond
|
||||
((string= command "org-to-json")
|
||||
(let ((file (pop command-line-args-left)))
|
||||
(org-to-json file)))
|
||||
((string= command "json-to-org")
|
||||
(let ((json-str (pop command-line-args-left))
|
||||
(out-file (pop command-line-args-left)))
|
||||
(json-to-org json-str out-file)))))
|
||||
#+end_src
|
||||
|
||||
** Python Wrapper (org_bridge.py)
|
||||
#+begin_src python :tangle projects/org-json-bridge/org_bridge.py
|
||||
import subprocess
|
||||
import json
|
||||
import os
|
||||
import argparse
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
class OrgBridge:
|
||||
def __init__(self, lisp_script_path: str = os.path.join(os.path.dirname(__file__), "org-json-bridge.el")):
|
||||
self.lisp_path = os.path.abspath(lisp_script_path)
|
||||
|
||||
def _run_emacs_batch(self, command: str, *args) -> str:
|
||||
"""Helper to execute the Emacs batch command with arguments."""
|
||||
cmd = [
|
||||
"emacs", "--batch",
|
||||
"-l", self.lisp_path,
|
||||
"--", command, *args
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
return result.stdout.strip()
|
||||
|
||||
def parse_to_dict(self, file_path: str) -> Dict[str, Any]:
|
||||
"""Reads an Org file and returns its AST as a Python Dictionary."""
|
||||
abs_path = os.path.abspath(file_path)
|
||||
json_output = self._run_emacs_batch("org-to-json", abs_path)
|
||||
return json.loads(json_output)
|
||||
|
||||
def write_from_dict(self, ast_dict: Dict[str, Any], output_path: str):
|
||||
"""Takes a Python Dictionary (AST) and writes it back to an Org file."""
|
||||
json_input = json.dumps(ast_dict)
|
||||
abs_output_path = os.path.abspath(output_path)
|
||||
self._run_emacs_batch("json-to-org", json_input, abs_output_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Org-mode to JSON bridge for programmatic manipulation.")
|
||||
parser.add_argument("action", choices=["parse", "render"], help="Action to perform: 'parse' an Org file to JSON, or 'render' JSON to an Org file.")
|
||||
parser.add_argument("--file-path", help="Path to the Org-mode file (required for 'parse' action).")
|
||||
parser.add_argument("--json-input-file", help="Path to a JSON file containing the AST (required for 'render' action).")
|
||||
parser.add_argument("--output-file", help="Path to output the Org-mode file (required for 'render' action).")
|
||||
|
||||
args = parser.parse_args()
|
||||
bridge = OrgBridge()
|
||||
|
||||
if args.action == "parse":
|
||||
if not args.file_path:
|
||||
parser.error("--file-path is required for the 'parse' action.")
|
||||
org_ast = bridge.parse_to_dict(args.file_path)
|
||||
print(json.dumps(org_ast, indent=2))
|
||||
elif args.action == "render":
|
||||
if not args.json_input_file or not args.output_file:
|
||||
parser.error("--json-input-file and --output-file are required for the 'render' action.")
|
||||
with open(args.json_input_file, 'r') as f:
|
||||
ast_dict = json.load(f)
|
||||
bridge.write_from_dict(ast_dict, args.output_file)
|
||||
#+end_src
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
:PROPERTIES:
|
||||
:ID: 98576df2-c496-4e4a-9acb-0bca514a0305
|
||||
:CREATED: [2026-03-31 Tue 18:28]
|
||||
:EDITED: [2026-04-07 Tue 13:42]
|
||||
:EDITED: [2026-04-09 Thu]
|
||||
:END:
|
||||
#+TITLE: SKILL: Global Safety Harness (Universal Literate Note)
|
||||
#+STARTUP: content
|
||||
#+FILETAGS: :security:sandbox:ast:psf:
|
||||
|
||||
* Overview
|
||||
The *Global Safety Harness* is the primary "Safety Gate" for the Neurosymbolic Lisp Machine. It provides a recursive AST validator that subjects all Elisp proposals from System 1 to a strict "Deny-by-Default" sandbox, preventing arbitrary code execution while allowing high-fidelity system manipulation.
|
||||
The *Global Safety Harness* is the primary "Safety Gate" for the Neurosymbolic Lisp Machine. It provides a recursive AST validator that subjects all Elisp/Lisp proposals from System 1 to a strict "Deny-by-Default" sandbox, preventing arbitrary code execution while allowing high-fidelity system manipulation.
|
||||
|
||||
* Phase A: Demand (PRD)
|
||||
:PROPERTIES:
|
||||
@@ -16,7 +16,7 @@ The *Global Safety Harness* is the primary "Safety Gate" for the Neurosymbolic L
|
||||
:END:
|
||||
|
||||
** 1. Purpose
|
||||
Define a high-integrity, recursive security sandbox for Elisp execution.
|
||||
Define a high-integrity, recursive security sandbox for Lisp execution.
|
||||
|
||||
** 2. User Needs
|
||||
- *Recursive Validation:* Every nested function call and variable access MUST be checked.
|
||||
@@ -25,71 +25,133 @@ Define a high-integrity, recursive security sandbox for Elisp execution.
|
||||
- *Symbolic Preemption:* This skill acts as a mandatory global System 2 check.
|
||||
|
||||
** 3. Success Criteria
|
||||
*** TODO Implement recursive AST walker in Lisp
|
||||
*** TODO Establish strict function whitelist (surgical Org operations)
|
||||
*** TODO Detect and block nested 'eval' attempts
|
||||
*** TODO Verify that malformed or malicious sexps are rejected
|
||||
*** DONE Implement recursive AST walker in Lisp
|
||||
*** DONE Establish strict function whitelist (surgical Org operations)
|
||||
*** DONE Detect and block nested 'eval' attempts
|
||||
*** DONE Verify that malformed or malicious sexps are rejected
|
||||
|
||||
* Implementation
|
||||
|
||||
* Phase B: Blueprint (PROTOCOL)
|
||||
:PROPERTIES:
|
||||
:STATUS: SIGNED
|
||||
:END:
|
||||
** Package
|
||||
#+begin_src lisp :tangle ../src/safety-harness.lisp
|
||||
(in-package :org-agent)
|
||||
#+end_src
|
||||
|
||||
* Phase B: Blueprint (PROTOCOL)
|
||||
:PROPERTIES:
|
||||
:STATUS: IN-PROGRESS
|
||||
:END:
|
||||
** Whitelist Definition
|
||||
#+begin_src lisp :tangle ../src/safety-harness.lisp
|
||||
(defparameter *safety-whitelist*
|
||||
'(;; Math & Logic
|
||||
+ - * / = < > <= >= 1+ 1- min max
|
||||
and or not null eq eql equal string= string-equal
|
||||
;; List Manipulation
|
||||
list cons car cdr cadr cddr cdar caar append mapcar remove-if remove-if-not
|
||||
length reverse sort nth nthcdr push pop
|
||||
;; Plists and Hash Tables
|
||||
getf gethash
|
||||
;; Control Flow
|
||||
let let* if cond when unless case typecase
|
||||
;; Strings
|
||||
format concatenate string-downcase string-upcase search
|
||||
;; Kernel specifics
|
||||
org-agent::kernel-log
|
||||
org-agent::snapshot-object-store
|
||||
org-agent::rollback-object-store
|
||||
org-agent::lookup-object
|
||||
org-agent::list-objects-by-type
|
||||
org-agent::ingest-ast
|
||||
org-agent::find-headline-missing-id
|
||||
org-agent::context-query-store
|
||||
org-agent::context-get-active-projects
|
||||
org-agent::context-get-recent-completed-tasks
|
||||
org-agent::context-list-all-skills
|
||||
org-agent::context-get-system-logs
|
||||
org-agent::context-assemble-global-awareness
|
||||
org-agent::org-object-id
|
||||
org-agent::org-object-type
|
||||
org-agent::org-object-attributes
|
||||
org-agent::org-object-content
|
||||
org-agent::org-object-parent-id
|
||||
org-agent::org-object-children
|
||||
org-agent::org-object-version
|
||||
org-agent::org-object-last-sync
|
||||
org-agent::org-object-hash
|
||||
;; Essential macros
|
||||
declare ignore
|
||||
;; Let's also add simple data types
|
||||
t nil quote function))
|
||||
#+end_src
|
||||
|
||||
** 1. Architectural Intent
|
||||
** Recursive AST Walker
|
||||
#+begin_src lisp :tangle ../src/safety-harness.lisp
|
||||
(defun safety-harness-ast-walk (form)
|
||||
"Recursively walks the Lisp AST. Returns T if safe, NIL if unsafe."
|
||||
(cond
|
||||
;; Self-evaluating objects (strings, numbers, keywords) are safe.
|
||||
((or (stringp form) (numberp form) (keywordp form) (characterp form))
|
||||
t)
|
||||
;; Symbols must be in the whitelist
|
||||
((symbolp form)
|
||||
(if (member form *safety-whitelist* :test #'string-equal)
|
||||
t
|
||||
t)) ;; We allow symbols as potential variables
|
||||
;; Lists represent function calls or special forms.
|
||||
((listp form)
|
||||
(let ((head (car form)))
|
||||
(cond
|
||||
((eq head 'quote) t)
|
||||
((not (symbolp head)) nil)
|
||||
((member head *safety-whitelist* :test #'string-equal)
|
||||
(every #'safety-harness-ast-walk (cdr form)))
|
||||
(t
|
||||
(kernel-log "SAFETY HARNESS: Blocked call to non-whitelisted function ~a" head)
|
||||
nil))))
|
||||
(t nil)))
|
||||
#+end_src
|
||||
|
||||
The Global Safety Harness will function as a global aspect, intercepting all Elisp forms before they are evaluated by the core Lisp interpreter. It achieves this by:
|
||||
** Validation Entry Point
|
||||
#+begin_src lisp :tangle ../src/safety-harness.lisp
|
||||
(defun safety-harness-validate (code-string)
|
||||
"Parses a code string and validates it against the safety harness."
|
||||
(handler-case
|
||||
(let* ((*read-eval* nil)
|
||||
(form (read-from-string code-string)))
|
||||
(safety-harness-ast-walk form))
|
||||
(error (c)
|
||||
(kernel-log "SAFETY HARNESS ERROR: Syntax or read error during validation: ~a" c)
|
||||
nil)))
|
||||
#+end_src
|
||||
|
||||
- **AST Walking:** Recursively traversing the Abstract Syntax Tree (AST) of the Elisp expression.
|
||||
- **Whitelist Enforcement:** Comparing each function call and variable access against a pre-approved whitelist. Any item not on the whitelist is immediately rejected.
|
||||
- **Eval Blocking:** Explicitly searching for and rejecting any instances of `eval`, `load`, `eval-expression`, and related functions that enable dynamic code generation or loading.
|
||||
- **Error Handling:** Providing informative error messages when a security violation occurs, including the specific function or variable that triggered the rejection and its location within the AST.
|
||||
- **Performance Consideration:** Optimizing the AST walking and whitelist lookup to minimize overhead on Elisp evaluation. Memoization of whitelist checks should be implemented to avoid redundant lookups.
|
||||
** Skill Definition
|
||||
#+begin_src lisp :tangle ../src/safety-harness.lisp
|
||||
(defskill :skill-safety-harness
|
||||
:priority 90
|
||||
:trigger (lambda (ctx) nil)
|
||||
:neuro nil
|
||||
:symbolic nil)
|
||||
#+end_src
|
||||
|
||||
** 2. Semantic Interfaces
|
||||
* Phase E: Chaos (Verification)
|
||||
#+begin_src lisp :tangle ../tests/safety-harness-tests.lisp
|
||||
(defpackage :org-agent-safety-tests
|
||||
(:use :cl :fiveam :org-agent)
|
||||
(:export #:safety-suite))
|
||||
(in-package :org-agent-safety-tests)
|
||||
|
||||
*** Function: +safety-harness-validate+
|
||||
(def-suite safety-suite :description "Tests for the Global Safety Harness.")
|
||||
(in-suite safety-suite)
|
||||
|
||||
#+BEGIN_SRC lisp
|
||||
(defun +safety-harness-validate+ (form whitelist)
|
||||
"Validates an Elisp form against a security whitelist.
|
||||
FORM: The Elisp form to validate (list or symbol).
|
||||
WHITELIST: An alist associating symbols (function/variable names) to metadata. Metadata includes :safe? boolean flag and :trust-level (integer).")
|
||||
#+END_SRC
|
||||
(test test-basic-math-safe
|
||||
(is (org-agent:safety-harness-validate "(+ 1 2)")))
|
||||
|
||||
*** Function: +safety-harness-ast-walk+
|
||||
(test test-blocked-eval
|
||||
(is (not (org-agent:safety-harness-validate "(eval '(+ 1 2))"))))
|
||||
|
||||
#+BEGIN_SRC lisp
|
||||
(defun +safety-harness-ast-walk+ (form whitelist)
|
||||
"Recursively walks the Abstract Syntax Tree (AST) of an Elisp form,
|
||||
validating each node against the whitelist.")
|
||||
#+END_SRC
|
||||
(test test-blocked-shell
|
||||
(is (not (org-agent:safety-harness-validate "(uiop:run-program \"ls\")"))))
|
||||
|
||||
*** Function: +safety-harness-whitelist-lookup+
|
||||
|
||||
#+BEGIN_SRC lisp
|
||||
(defun +safety-harness-whitelist-lookup+ (symbol whitelist)
|
||||
"Looks up a symbol in the security whitelist.
|
||||
Returns the whitelist entry if found, or nil if not found.")
|
||||
#+END_SRC
|
||||
|
||||
*** Function: +safety-harness-eval-blocked?+
|
||||
|
||||
#+BEGIN_SRC lisp
|
||||
(defun +safety-harness-eval-blocked?+ (form)
|
||||
"Checks if the Elisp form contains any prohibited eval-like constructs.
|
||||
Returns t if eval is blocked, nil otherwise.")
|
||||
#+END_SRC
|
||||
|
||||
*** Data Structure: +safety-harness-error+
|
||||
|
||||
A plist data structure representing a security violation:
|
||||
- `:type`: `'whitelist-violation` or `'eval-blocked`
|
||||
- `:symbol`: The offending symbol (function or variable name)
|
||||
- `:location`: A list representing the path within the AST where the violation occurred.
|
||||
(test test-nested-unsafe
|
||||
(is (not (org-agent:safety-harness-validate "(let ((x 1)) (delete-file \"test.txt\"))"))))
|
||||
|
||||
(test test-safe-kernel-api
|
||||
(is (org-agent:safety-harness-validate "(org-agent::lookup-object \"node-1\")")))
|
||||
#+end_src
|
||||
|
||||
@@ -90,3 +90,469 @@ Maintain a state-aware provider cascade that routes around "pain" (failures) and
|
||||
(when p (token-accountant-record-pain p))
|
||||
action))))
|
||||
#+end_src
|
||||
|
||||
* Documentation (Token Optimization)
|
||||
** research.org
|
||||
#+TITLE: Token Management & Model Optimization Research
|
||||
#+author: Amero Garcia
|
||||
#+created: [2026-03-16 Mon 14:28]
|
||||
#+DATE: 2026-03-04
|
||||
#+FILETAGS: :research:token:optimization:models
|
||||
|
||||
* Token Management Strategy Research
|
||||
|
||||
** Initial Findings
|
||||
|
||||
*** OpenRouter Free Tier
|
||||
- URL: https://openrouter.ai/collections/free-models
|
||||
- Providers moving from free to paid-only models
|
||||
- Belief: "Free models play crucial role in democratizing access"
|
||||
|
||||
*** Google AI Studio (Gemini)
|
||||
- Free tier available
|
||||
- Limits: 60 requests/minute, 300K tokens/day
|
||||
- No credit card required
|
||||
- Every API key gets these limits
|
||||
|
||||
** Research Questions
|
||||
|
||||
1. Which providers offer free or low-cost tiers?
|
||||
2. What are the rate limits and quotas?
|
||||
3. Which models are best for which use cases?
|
||||
4. How to optimize context windows?
|
||||
5. What is the cost per token breakdown?
|
||||
|
||||
** To Research Further
|
||||
|
||||
| Provider | Free Tier | Paid Tier | Best For |
|
||||
|----------|-----------|-----------|----------|
|
||||
| Google Gemini | 300K tokens/day | Pay per use? | General, coding |
|
||||
| OpenRouter | Varies by model | Per-request | Routing, variety |
|
||||
| OpenAI | ? | ? | GPT-4 quality |
|
||||
| Anthropic | ? | ? | Claude capabilities |
|
||||
| Mistral | ? | ? | Open weights |
|
||||
| Local | Hardware cost | Free | Privacy, control |
|
||||
|
||||
** Token Optimization Strategies to Explore
|
||||
|
||||
1. *Tiered Model Usage*
|
||||
- Simple tasks: Fast/cheap models
|
||||
- Complex tasks: Stronger models
|
||||
- Fallback: Lower tier if higher fails
|
||||
|
||||
2. *Context Compression*
|
||||
- Summarize long contexts
|
||||
- Use RAG instead of full context
|
||||
- Prune old conversation
|
||||
|
||||
3. *Caching*
|
||||
- Cache common responses
|
||||
- Reuse embeddings
|
||||
- Batch requests
|
||||
|
||||
4. *Hybrid Approach*
|
||||
- Local models for simple queries
|
||||
- Cloud APIs for complex tasks
|
||||
- Manual review for critical outputs
|
||||
|
||||
** X Account Access
|
||||
|
||||
*Pending:* X account access via Google login
|
||||
*Blocker:* Requires OTP from user per security rule (SOUL.md)
|
||||
*Action needed:* User provides OTP, I complete OAuth, access bookmarks
|
||||
** budget-50.org
|
||||
#+TITLE: Token Optimization - $50 Monthly Budget
|
||||
#+author: Amero Garcia
|
||||
#+created: [2026-03-16 Mon 14:28]
|
||||
#+DATE: 2026-03-04
|
||||
#+FILETAGS: :budget:constraints:optimization
|
||||
|
||||
* Budget: $50/Month
|
||||
|
||||
** Budget Breakdown
|
||||
|
||||
| Tier | Provider | Allocation | Tokens Est. | Use Case |
|
||||
|------|----------|-----------|-------------|----------|
|
||||
| FREE | Google Gemini | $0 | ~9M/month | 90% of work |
|
||||
| CHEAP | OpenRouter | $20 | ~6M tokens | Fallback, complex tasks |
|
||||
| PREMIUM | Claude/GPT-4o | $25 | ~500K tokens | Critical decisions |
|
||||
| BUFFER | Various | $5 | Emergency | Overruns, testing |
|
||||
|
||||
** Daily Free Allowance
|
||||
|
||||
- *Google Gemini:* 300K tokens/day = 9M/month = *$0*
|
||||
- This covers 90-95% of expected workload
|
||||
|
||||
** Paid Tier Allocation ($45)
|
||||
|
||||
- *$20 → OpenRouter* (Qwen, Mistral, Llama)
|
||||
- ~6M tokens at $0.003/1K
|
||||
- Use when: Gemini rate limited, need different model
|
||||
|
||||
- *$25 → Premium models* (Claude, GPT-4o)
|
||||
- ~500K tokens at $0.05/1K average
|
||||
- Use when: Architecture decisions, critical code review, final validation
|
||||
|
||||
- *$5 → Buffer*
|
||||
- Handle overruns
|
||||
- Emergency access
|
||||
- Testing new models
|
||||
|
||||
** Hard Limits
|
||||
|
||||
| Provider | Monthly Cap | Alert At |
|
||||
|----------|-------------|----------|
|
||||
| OpenRouter | $20 | $16 (80%) |
|
||||
| Premium | $25 | $20 (80%) |
|
||||
| Total | $50 | $45 (90%) |
|
||||
|
||||
** Daily Tracking
|
||||
|
||||
Target: *Monitor consumption every session*
|
||||
|
||||
```
|
||||
IF daily_cost > $1.50:
|
||||
→ Switch to Gemini only
|
||||
→ Defer premium tasks
|
||||
|
||||
IF weekly_cost > $12:
|
||||
→ Review usage patterns
|
||||
→ Find optimization opportunities
|
||||
```
|
||||
|
||||
** Emergency Protocol
|
||||
|
||||
If approaching $50 limit before month end:
|
||||
1. Halt all paid API calls
|
||||
2. Switch to Gemini-only mode
|
||||
3. Queue premium tasks for next month
|
||||
4. Consider local inference setup
|
||||
|
||||
** Cost-Per-Task Guidelines
|
||||
|
||||
| Task Type | Max Cost | Preferred Model |
|
||||
|-----------|----------|-----------------|
|
||||
| Quick lookup | $0.00 | Gemini |
|
||||
| Code review | $0.01 | Gemini/OpenRouter |
|
||||
| Feature design | $0.05 | OpenRouter |
|
||||
| Architecture review | $0.10 | Claude/GPT-4o |
|
||||
| Emergency debug | $0.20 | Best available |
|
||||
|
||||
** Optimization Imperative
|
||||
|
||||
With $50/month, waste is not affordable:
|
||||
- ❌ No speculative queries
|
||||
- ❌ No "just curious" premium calls
|
||||
- ❌ No repeated similar prompts
|
||||
- ✅ Always use Gemini first
|
||||
- ✅ Batch similar requests
|
||||
- ✅ Cache embeddings locally
|
||||
- ✅ Summarize long contexts
|
||||
|
||||
** Monthly Review
|
||||
|
||||
1. Compare actual vs. projected usage
|
||||
2. Adjust model routing rules
|
||||
3. Identify expensive query patterns
|
||||
4. Plan next month's allocation
|
||||
|
||||
** Break-Even Analysis
|
||||
|
||||
At $50/month = $600/year:
|
||||
- *Option A:* Continue APIs (flexible, managed)
|
||||
- *Option B:* Local inference (~$800 hardware, $0 ongoing)
|
||||
- Break-even: 16 months
|
||||
- Risk: Hardware failure, maintenance
|
||||
|
||||
*Recommendation:* Stick with APIs until $100+/month, then evaluate hardware.
|
||||
|
||||
** Questions for Human Partner
|
||||
|
||||
1. Is $50 firm or flexible in emergencies?
|
||||
2. What happens if we hit limit mid-critical-task?
|
||||
3. Preference for which premium model? (Claude vs GPT-4 vs both)
|
||||
4. Should I track and report costs per project?
|
||||
5. Any tasks that are "unlimited budget" critical?
|
||||
** README.org
|
||||
#+TITLE: Token Optimization
|
||||
#+AUTHOR: Amr
|
||||
#+CREATED: [2026-03-17 Tue]
|
||||
#+BEGIN_COMMENT
|
||||
Cost-effective LLM usage through smart routing, context compression, and multi-provider strategies.
|
||||
#+END_COMMENT
|
||||
|
||||
* Token Optimization
|
||||
|
||||
Strategy and implementation for minimizing LLM costs while maintaining quality.
|
||||
|
||||
* Project Tasks
|
||||
|
||||
See the actionable tasks for this project in [[file:../../gtd.org::*Token Optimization][GTD.org > Projects > Token Optimization]]
|
||||
|
||||
* Key Documents
|
||||
|
||||
- [[file:plan.org][Optimization Plan]]
|
||||
- [[file:token-optimization.yaml][Configuration]]
|
||||
|
||||
* Current Focus
|
||||
|
||||
- Multi-provider setup (Gemini primary, OpenRouter fallback)
|
||||
- Usage tracking and budget alerts
|
||||
- Smart routing by task type
|
||||
- Context compression techniques
|
||||
** quick-start.org
|
||||
#+TITLE: Token Optimization - Quick Start
|
||||
#+author: Amero Garcia
|
||||
#+created: [2026-03-16 Mon 14:28]
|
||||
#+DATE: 2026-03-04
|
||||
|
||||
* Quick Reference for Daily Use
|
||||
|
||||
** Rule of Thumb
|
||||
|
||||
| What you need | Use this | Cost |
|
||||
|---------------|----------|------|
|
||||
| Quick answer, formatting, lookup | Gemini Flash | FREE |
|
||||
| Code review, analysis | Gemini Pro | FREE |
|
||||
| Complex problem solving | Claude Haiku / Qwen | $ |
|
||||
| Critical architecture decision | GPT-4o | $$ |
|
||||
|
||||
** Free Tier Limits (Daily)
|
||||
|
||||
| Provider | Tokens | Requests | Reset |
|
||||
|----------|--------|----------|-------|
|
||||
| Google AI Studio | 300,000 | 60/min | Daily |
|
||||
| OpenRouter Free | Varies | Limited | - |
|
||||
|
||||
** Current Recommendation
|
||||
|
||||
→ *Use Google Gemini exclusively* until hitting 250K tokens/day
|
||||
→ Then add OpenRouter fallback
|
||||
→ Only use GPT-4 for final reviews
|
||||
|
||||
** This will reduce token costs by ~90%
|
||||
|
||||
** Next Steps
|
||||
|
||||
1. Configure Gemini as primary (already partially done)
|
||||
2. Add quota tracking
|
||||
3. Set alerts at 80% of free limits
|
||||
4. Implement tiered routing
|
||||
|
||||
** Savings Potential: $100-500/month → $10-50/month
|
||||
** plan.org
|
||||
#+TITLE: Token Optimization Strategy
|
||||
#+author: Amero Garcia
|
||||
#+created: [2026-03-16 Mon 14:28]
|
||||
#+DATE: 2026-03-04
|
||||
#+FILETAGS: :strategy:token:optimization:cost
|
||||
|
||||
* Executive Summary
|
||||
|
||||
** Goal: Minimize inference costs while maximizing capability
|
||||
|
||||
Current approach: Single default model → Multi-tier, multi-provider strategy
|
||||
|
||||
* Three-Tier Model Strategy
|
||||
|
||||
** Tier 1: Fast/Cheap (80% of queries)
|
||||
- *Purpose:* Simple tasks, formatting, lookups
|
||||
- *Models:* Google Gemini Flash, Local models
|
||||
- *Cost:* $0-0.000001 per 1K tokens
|
||||
- *Speed:* Fastest
|
||||
|
||||
** Tier 2: Balanced (18% of queries)
|
||||
- *Purpose:* Complex reasoning, code generation, analysis
|
||||
- *Models:* Gemini Pro, Claude Haiku, Llama 3 70B
|
||||
- *Cost:* $0.0001-0.003 per 1K tokens
|
||||
- *Speed:* Medium
|
||||
|
||||
** Tier 3: High-Performance (2% of queries)
|
||||
- *Purpose:* Critical decisions, complex architecture, final review
|
||||
- *Models:* GPT-4, Claude Opus, Gemini Ultra
|
||||
- *Cost:* $0.01-0.03 per 1K tokens
|
||||
- *Speed:* Slower
|
||||
|
||||
* Provider Analysis
|
||||
|
||||
** Google AI Studio (Primary Recommended)
|
||||
|
||||
| Model | Free Tier | Rate Limit | Best For |
|
||||
|-------|-----------|------------|----------|
|
||||
| Gemini 2.0 Flash | 300K tokens/day | 60 req/min | Quick tasks, coding |
|
||||
| Gemini 1.5 Flash | 300K tokens/day | 60 req/min | Fast responses |
|
||||
| Gemini 1.5 Pro | 300K tokens/day | 60 req/min | Complex tasks |
|
||||
|
||||
*Cost: FREE (within limits)*
|
||||
|
||||
** OpenRouter.Aggregated (Secondary)
|
||||
|
||||
| Model | Price/1K tokens | Context | Reliability |
|
||||
|-------|-----------------|---------|-------------|
|
||||
| Qwen 3 235B | $0.0001-0.0003 | 128K | High |
|
||||
| Mistral Large | $0.002-0.006 | 128K | High |
|
||||
| Llama 4 405B | $0.0002-0.0005 | 128K | Medium |
|
||||
| Free tier models | $0 | Varies | Variable |
|
||||
|
||||
** OpenAI (Tier 3 only)
|
||||
- GPT-4: $0.03/1K tokens (expensive)
|
||||
- GPT-4o: $0.005/1K tokens (better value)
|
||||
- Use sparingly for critical tasks only
|
||||
|
||||
** Local Inference (Long-term goal)
|
||||
- Hardware: $1000-5000 initial investment
|
||||
- Ongoing: $0 (electricity only)
|
||||
- Models: Llama 3, Mistral, DeepSeek
|
||||
- Best for: High-volume, privacy-sensitive work
|
||||
|
||||
* Context Optimization Strategies
|
||||
|
||||
** 1. Context Windows by Task Type
|
||||
|
||||
| Task Type | Optimal Context | Compression | Savings |
|
||||
|-----------|-----------------|-------------|---------|
|
||||
| Code review | 4K-8K | Truncate old files | 50% |
|
||||
| Documentation | 8K-16K | Summarize sections | 30% |
|
||||
| Research | 16K-32K | Chunk + RAG | 70% |
|
||||
| Architecture | 32K-128K | Maintain full | 0% |
|
||||
|
||||
** 2. Conversation Pruning
|
||||
- Remove "thinking" blocks from history
|
||||
- Summarize conversation every 10 turns
|
||||
- Archive old sessions to external storage
|
||||
|
||||
** 3. RAG vs. Full Context
|
||||
- *Rule:* < 5K tokens of context → Full
|
||||
- *Rule:* > 10K tokens of context → Use embeddings/RAG
|
||||
- *Savings:* 60-80% on large document tasks
|
||||
|
||||
* Request Optimization
|
||||
|
||||
** Batching Strategy
|
||||
- Group similar requests (3-5 per batch)
|
||||
- Same model, same parameters
|
||||
- Shared overhead costs
|
||||
|
||||
** Caching Strategy
|
||||
- Cache embeddings for repeated contexts
|
||||
- Store common completions (templates)
|
||||
- Reuse code snippet suggestions
|
||||
|
||||
** Streaming vs. Non-Stream
|
||||
- *Streaming:* Better UX, but higher token overhead
|
||||
- *Non-stream:* More efficient for programmatic use
|
||||
- *Recommendation:* Non-stream for background tasks
|
||||
|
||||
* Smart Routing Rules
|
||||
|
||||
** Automatic Selection Logic
|
||||
|
||||
```
|
||||
IF task_type == "simple_lookup" OR "formatting":
|
||||
→ Gemini Flash (free)
|
||||
|
||||
ELIF task_type == "code_generation" AND complexity < 3:
|
||||
→ Gemini Pro (free tier)
|
||||
|
||||
ELIF task_type == "complex_reasoning" OR "architecture":
|
||||
→ Claude Sonnet or GPT-4o
|
||||
|
||||
ELIF task_type == "final_review" OR "critical_decision":
|
||||
→ GPT-4 or Claude Opus
|
||||
```
|
||||
|
||||
** Fallback Chain
|
||||
1. Try Gemini (free)
|
||||
2. If rate limited → OpenRouter (cheap)
|
||||
3. If quality insufficient → GPT-4o
|
||||
4. If critical failure → GPT-4
|
||||
|
||||
* Concrete Implementation
|
||||
|
||||
** Config Structure (openclaw.json)
|
||||
|
||||
```json
|
||||
{
|
||||
"models": {
|
||||
"defaults": {
|
||||
"primary": "google-gemini-cli/gemini-2.0-flash",
|
||||
"fallbacks": [
|
||||
"openrouter/qwen/qwen3-235b-a22b",
|
||||
"google-gemini-cli/gemini-1.5-pro",
|
||||
"openai/gpt-4o"
|
||||
]
|
||||
},
|
||||
"providers": {
|
||||
"google-gemini-cli": {
|
||||
"freeTier": true,
|
||||
"dailyLimit": 300000,
|
||||
"rateLimit": 60
|
||||
},
|
||||
"openrouter": {
|
||||
"freeTierModels": ["openrouter/auto"],
|
||||
"budgetLimit": 500
|
||||
},
|
||||
"openai": {
|
||||
"budgetLimit": 200,
|
||||
"useFor": ["critical", "architecture"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
** Monitoring & Alerts
|
||||
|
||||
- Track daily token usage per provider
|
||||
- Alert at 80% of free tier limits
|
||||
- Monthly budget review and adjustment
|
||||
|
||||
* Cost Projections
|
||||
|
||||
** Current Unknown Usage → Optimized
|
||||
|
||||
| Scenario | Monthly Tokens | Current Cost | Optimized Cost | Savings |
|
||||
|----------|---------------|--------------|----------------|---------|
|
||||
| Light (< 1M) | 1M | $50-100 | $0-10 | 90% |
|
||||
| Medium (1-5M) | 3M | $200-500 | $20-100 | 80% |
|
||||
| Heavy (5-20M) | 10M | $1000-3000 | $200-500 | 80% |
|
||||
|
||||
* Immediate Actions
|
||||
|
||||
** Week 1: Setup
|
||||
- Configure Gemini as primary provider
|
||||
- Set up OpenRouter fallback
|
||||
- Implement basic usage tracking
|
||||
- Document current baseline
|
||||
|
||||
** Week 2: Implement
|
||||
- Add smart routing logic
|
||||
- Implement context compression
|
||||
- Set up budget alerts
|
||||
- A/B test model choices
|
||||
|
||||
** Week 3: Optimize
|
||||
- Analyze usage patterns
|
||||
- Fine-tune routing rules
|
||||
- Tune context windows
|
||||
- Document findings
|
||||
|
||||
** Week 4: Scale
|
||||
- Full multi-provider setup
|
||||
- Implement full caching
|
||||
- Maximize free tier usage
|
||||
- Plan for paid tiers if needed
|
||||
|
||||
* Long-term: Local Inference Path
|
||||
|
||||
** Minimum Viable Setup
|
||||
- Hardware: RTX 4090 or Apple Silicon M3 Max
|
||||
- Software: Ollama + OpenClaw integration
|
||||
- Cost: ~$2000-4000 one-time
|
||||
- Break-even: 3-6 months vs. API costs
|
||||
|
||||
** Full Self-Hosted
|
||||
- Hardware: Dual RTX 4090 or 2x Mac Studio
|
||||
- Models: Llama 3 70B, Mixtral 8x22B
|
||||
- Cost: ~$8000-12000
|
||||
- For: Privacy, unlimited inference, control
|
||||
|
||||
@@ -169,7 +169,8 @@
|
||||
(setf current-signal (decide-gate current-signal))
|
||||
(setf current-signal (dispatch-gate current-signal)))
|
||||
(error (c)
|
||||
(kernel-log "PIPELINE CRASH: ~a" c)
|
||||
(kernel-log "PIPELINE CRASH: ~a - Initiating Micro-Rollback." c)
|
||||
(rollback-object-store 0)
|
||||
(let ((sensor (ignore-errors (getf (getf current-signal :payload) :sensor))))
|
||||
(if (or (> depth 2) (member sensor '(:loop-error :tool-error)))
|
||||
(setf current-signal nil)
|
||||
|
||||
@@ -62,6 +62,7 @@
|
||||
#:load-skill-with-timeout
|
||||
#:topological-sort-skills
|
||||
#:validate-lisp-syntax
|
||||
#:safety-harness-validate
|
||||
#:find-triggered-skill
|
||||
#:defskill
|
||||
#:*skills-registry*
|
||||
|
||||
82
src/safety-harness.lisp
Normal file
82
src/safety-harness.lisp
Normal file
@@ -0,0 +1,82 @@
|
||||
(in-package :org-agent)
|
||||
|
||||
(defparameter *safety-whitelist*
|
||||
'(;; Math & Logic
|
||||
+ - * / = < > <= >= 1+ 1- min max
|
||||
and or not null eq eql equal string= string-equal
|
||||
;; List Manipulation
|
||||
list cons car cdr cadr cddr cdar caar append mapcar remove-if remove-if-not
|
||||
length reverse sort nth nthcdr push pop
|
||||
;; Plists and Hash Tables
|
||||
getf gethash
|
||||
;; Control Flow
|
||||
let let* if cond when unless case typecase
|
||||
;; Strings
|
||||
format concatenate string-downcase string-upcase search
|
||||
;; Kernel specifics
|
||||
org-agent::kernel-log
|
||||
org-agent::snapshot-object-store
|
||||
org-agent::rollback-object-store
|
||||
org-agent::lookup-object
|
||||
org-agent::list-objects-by-type
|
||||
org-agent::ingest-ast
|
||||
org-agent::find-headline-missing-id
|
||||
org-agent::context-query-store
|
||||
org-agent::context-get-active-projects
|
||||
org-agent::context-get-recent-completed-tasks
|
||||
org-agent::context-list-all-skills
|
||||
org-agent::context-get-system-logs
|
||||
org-agent::context-assemble-global-awareness
|
||||
org-agent::org-object-id
|
||||
org-agent::org-object-type
|
||||
org-agent::org-object-attributes
|
||||
org-agent::org-object-content
|
||||
org-agent::org-object-parent-id
|
||||
org-agent::org-object-children
|
||||
org-agent::org-object-version
|
||||
org-agent::org-object-last-sync
|
||||
org-agent::org-object-hash
|
||||
;; Essential macros
|
||||
declare ignore
|
||||
;; Let's also add simple data types
|
||||
t nil quote function))
|
||||
|
||||
(defun safety-harness-ast-walk (form)
|
||||
"Recursively walks the Lisp AST. Returns T if safe, NIL if unsafe."
|
||||
(cond
|
||||
;; Self-evaluating objects (strings, numbers, keywords) are safe.
|
||||
((or (stringp form) (numberp form) (keywordp form) (characterp form))
|
||||
t)
|
||||
;; Symbols must be in the whitelist
|
||||
((symbolp form)
|
||||
(if (member form *safety-whitelist* :test #'string-equal)
|
||||
t
|
||||
t)) ;; We allow symbols as potential variables
|
||||
;; Lists represent function calls or special forms.
|
||||
((listp form)
|
||||
(let ((head (car form)))
|
||||
(cond
|
||||
((eq head 'quote) t)
|
||||
((not (symbolp head)) nil)
|
||||
((member head *safety-whitelist* :test #'string-equal)
|
||||
(every #'safety-harness-ast-walk (cdr form)))
|
||||
(t
|
||||
(kernel-log "SAFETY HARNESS: Blocked call to non-whitelisted function ~a" head)
|
||||
nil))))
|
||||
(t nil)))
|
||||
|
||||
(defun safety-harness-validate (code-string)
|
||||
"Parses a code string and validates it against the safety harness."
|
||||
(handler-case
|
||||
(let* ((*read-eval* nil)
|
||||
(form (read-from-string code-string)))
|
||||
(safety-harness-ast-walk form))
|
||||
(error (c)
|
||||
(kernel-log "SAFETY HARNESS ERROR: Syntax or read error during validation: ~a" c)
|
||||
nil)))
|
||||
|
||||
(defskill :skill-safety-harness
|
||||
:priority 90
|
||||
:trigger (lambda (ctx) nil)
|
||||
:neuro nil
|
||||
:symbolic nil)
|
||||
@@ -97,3 +97,32 @@
|
||||
(let ((awareness (context-assemble-global-awareness)))
|
||||
(is (search "Project Alpha" awareness))
|
||||
(is (search "proj-1" awareness))))
|
||||
|
||||
(test test-micro-rollback
|
||||
"Verify that a pipeline crash triggers an automatic Object Store rollback."
|
||||
(clrhash org-agent::*object-store*)
|
||||
(clrhash org-agent::*history-store*)
|
||||
(setf org-agent::*object-store-snapshots* nil)
|
||||
|
||||
;; State A
|
||||
(ingest-ast (list :type :HEADLINE :properties (list :ID "node-1" :TITLE "State A") :contents nil))
|
||||
|
||||
(setup-mock-skills)
|
||||
;; Skill that crashes in Symbolic Gate
|
||||
(org-agent::defskill :crashing-skill
|
||||
:priority 200
|
||||
:trigger (lambda (ctx) t)
|
||||
:neuro (lambda (ctx) (list :type :REQUEST :payload (list :action :eval :code "(error \"BOOM\")")))
|
||||
:symbolic (lambda (action ctx) (error "CRASH IN SYSTEM 2")))
|
||||
|
||||
;; Run pipeline. This turn will:
|
||||
;; 1. Perceive (Take snapshot of State A)
|
||||
;; 2. Neuro (Think)
|
||||
;; 3. Decide (Crash!)
|
||||
;; 4. Rollback to State A.
|
||||
(process-signal (list :type :EVENT :payload (list :sensor :test)))
|
||||
|
||||
;; Verify that we are still in State A
|
||||
(let ((obj (lookup-object "node-1")))
|
||||
(is (not (null obj)))
|
||||
(is (equal (getf (org-object-attributes obj) :TITLE) "State A"))))
|
||||
|
||||
22
tests/safety-harness-tests.lisp
Normal file
22
tests/safety-harness-tests.lisp
Normal file
@@ -0,0 +1,22 @@
|
||||
(defpackage :org-agent-safety-tests
|
||||
(:use :cl :fiveam :org-agent)
|
||||
(:export #:safety-suite))
|
||||
(in-package :org-agent-safety-tests)
|
||||
|
||||
(def-suite safety-suite :description "Tests for the Global Safety Harness.")
|
||||
(in-suite safety-suite)
|
||||
|
||||
(test test-basic-math-safe
|
||||
(is (org-agent:safety-harness-validate "(+ 1 2)")))
|
||||
|
||||
(test test-blocked-eval
|
||||
(is (not (org-agent:safety-harness-validate "(eval '(+ 1 2))"))))
|
||||
|
||||
(test test-blocked-shell
|
||||
(is (not (org-agent:safety-harness-validate "(uiop:run-program \"ls\")"))))
|
||||
|
||||
(test test-nested-unsafe
|
||||
(is (not (org-agent:safety-harness-validate "(let ((x 1)) (delete-file \"test.txt\"))"))))
|
||||
|
||||
(test test-safe-kernel-api
|
||||
(is (org-agent:safety-harness-validate "(org-agent::lookup-object \"node-1\")")))
|
||||
Reference in New Issue
Block a user