#!/usr/bin/env python3 """Convert brain Org-mode files to markdown + YAML frontmatter and sync into gbrain.""" import subprocess, re, os, sys BRAIN = "/root/brain" GBRAIN_SRC = "/mnt/hermes/brain" PANDOC = "/usr/bin/pandoc" BUN = os.path.expanduser("~/.bun/bin/gbrain") def extract_org_properties(src_path): """Extract :PROPERTIES: drawer and #+title/#+filetags from an org file.""" props = {} with open(src_path) as f: content = f.read() # Extract title m = re.search(r'^#\+title:\s+(.+)$', content, re.MULTILINE) if m: props['title'] = m.group(1).strip() # Extract tags m = re.search(r'^#\+filetags:\s+(.+)$', content, re.MULTILINE) if m: tags = [t.strip(':') for t in m.group(1).split()] props['tags'] = tags # Extract ID from PROPERTIES drawer m = re.search(r':ID:\s+([^\s]+)', content) if m: props['org_id'] = m.group(1) # Extract CREATED m = re.search(r':CREATED:\s+\[([^\]]+)\]', content) if m: props['created'] = m.group(1) return props def strip_org_header(src_path): """Strip the Org-mode header block (PROPERTIES drawer + #+ directives) before feeding to pandoc, so it doesn't produce raw {=org} blocks.""" with open(src_path) as f: lines = f.readlines() # Find first non-header line in_properties = False start = 0 for i, line in enumerate(lines): if line.strip() == ':PROPERTIES:': in_properties = True if in_properties and line.strip() == ':END:': in_properties = False start = i + 1 continue if not in_properties: # Skip #+ lines if line.startswith('#+'): start = i + 1 continue # First real content if line.strip(): start = i break start = i + 1 return ''.join(lines[start:]) def pandoc_convert(clean_body): """Convert org body to markdown via pandoc (stdin mode).""" result = subprocess.run( [PANDOC, "-f", "org", "-t", "markdown-smart"], input=clean_body, capture_output=True, text=True ) if result.returncode != 0: print(f" ERROR pandoc: {result.stderr[:200]}") return None return result.stdout.strip() def build_frontmatter(props): """Build YAML frontmatter string from extracted properties.""" lines = ['---'] if 'title' in props: lines.append(f'title: "{props["title"]}"') if 'tags' in props: tags_str = ', '.join(props['tags']) lines.append(f'tags: [{tags_str}]') if 'created' in props: lines.append(f'created: {props["created"]}') lines.append('---') return '\n'.join(lines) def postprocess_links(md_text): """Convert pandoc's markdown links to gbrain-friendly format.""" # Pandoc converts [[file:foo.org][desc]] to [desc](foo.org) # Strip .org extensions from relative links md_text = re.sub(r'\(([a-zA-Z0-9_-]+)\.org\)', r'(\1)', md_text) return md_text ROUTING = { # Concepts — triad architecture, security, economics theory "triad-overview": "concepts", "agora": "concepts", "stoa": "concepts", "triad-index": "concepts", "domain-gate-packages": "concepts", "verification-appliance": "concepts", "verification-monopoly": "concepts", "infrastructure-lock-in": "concepts", "evaluation-harness": "concepts", "collective-regression-suite": "concepts", "lisp-machine-security": "concepts", "common-logic-iso-24707": "concepts", "self-driving-lisp-machine": "concepts", "lisp-economics": "concepts", "sufficiency-flip": "concepts", "time-estimates": "concepts", "cost-structure": "concepts", "gate-rule-encoding": "concepts", "biology-parallels": "concepts", "comparison-with-symbolics": "concepts", "upgrade-lifecycle": "concepts", "ai-industry-impact": "concepts", "moats": "concepts", "patent-strategy": "concepts", "licensing": "concepts", "verified-skill-marketplace": "concepts", "compute-marketplace": "concepts", "agora-usernames": "concepts", "pds-as-a-service": "concepts", "investment-thesis": "concepts", "compliance-framework-mapping": "concepts", # Ideas — strategy, competitive analysis "competitive-analysis-2026-05": "ideas", "passepartout-economics": "ideas", } def main(): # Ensure MECE directories exist for d in ["concepts", "ideas"]: os.makedirs(f"{GBRAIN_SRC}/{d}", exist_ok=True) imported = [] for slug, category in ROUTING.items(): src_path = f"{BRAIN}/ideas/{slug}.org" if not os.path.exists(src_path): print(f" SKIP {slug}: not found") continue dst_dir = f"{GBRAIN_SRC}/{category}" dst_path = f"{dst_dir}/{slug}.md" # Extract frontmatter from org properties props = extract_org_properties(src_path) # Strip org header and convert body to markdown clean = strip_org_header(src_path) md = pandoc_convert(clean) if md is None: continue md = postprocess_links(md) # Assemble: YAML frontmatter + markdown body frontmatter = build_frontmatter(props) full = frontmatter + '\n\n' + md + '\n' with open(dst_path, 'w') as f: f.write(full) imported.append(f"{category}/{slug}.md") print(f" OK {category}/{slug}") print(f"\nConverted {len(imported)} files.") # Commit to git subprocess.run(["git", "-C", GBRAIN_SRC, "add", "-A"], capture_output=True) subprocess.run( ["git", "-C", GBRAIN_SRC, "commit", "--allow-empty", "-m", "gbrain: sync converted org-mode brain files"], capture_output=True, text=True ) # Import into gbrain print("\nImporting into gbrain...") env = {**os.environ, "PATH": f"{os.path.expanduser('~')}/.bun/bin:{os.environ['PATH']}"} result = subprocess.run( [BUN, "import", GBRAIN_SRC], capture_output=True, text=True, env=env ) # Show last 20 lines of stdout (skip noise) out_lines = result.stdout.strip().split('\n') for line in out_lines[-25:]: if line.strip() and 'batch caps' not in line and 'max_batch_tokens' not in line: print(f" {line}") if result.returncode != 0: print(f" gbrain import exit code: {result.returncode}") return # Embed print("\nGenerating embeddings...") result2 = subprocess.run( [BUN, "embed", "--all"], capture_output=True, text=True, env=env ) for line in result2.stdout.strip().split('\n')[-10:]: if line.strip(): print(f" {line}") if __name__ == "__main__": main()