#!/usr/bin/env python3 """Convert brain Org-mode files to markdown + YAML frontmatter and sync into gbrain.""" import subprocess, re, os, sys, glob BRAIN = "/root/brain" GBRAIN_SRC = "/mnt/hermes/brain" PANDOC = "/usr/bin/pandoc" BUN = os.path.expanduser("~/.bun/bin/gbrain") ROUTING = { # Concepts — triad architecture, security, economics theory "triad-overview": "concepts", "agora": "concepts", "stoa": "concepts", "triad-index": "concepts", "domain-gate-packages": "concepts", "verification-appliance": "concepts", "verification-monopoly": "concepts", "infrastructure-lock-in": "concepts", "evaluation-harness": "concepts", "collective-regression-suite": "concepts", "lisp-machine-security": "concepts", "common-logic-iso-24707": "concepts", "self-driving-lisp-machine": "concepts", "lisp-economics": "concepts", "sufficiency-flip": "concepts", "time-estimates": "concepts", "cost-structure": "concepts", "gate-rule-encoding": "concepts", "biology-parallels": "concepts", "comparison-with-symbolics": "concepts", "upgrade-lifecycle": "concepts", "ai-industry-impact": "concepts", "moats": "concepts", "patent-strategy": "concepts", "licensing": "concepts", "verified-skill-marketplace": "concepts", "compute-marketplace": "concepts", "agora-usernames": "concepts", "pds-as-a-service": "concepts", "investment-thesis": "concepts", "compliance-framework-mapping": "concepts", # Ideas — strategy, competitive analysis "orders-of-magnitude-time": "concepts", "revenue-hub": "concepts", "agora-contracts": "concepts", "triad-systemic-effects": "concepts", "competitive-analysis-2026-05": "ideas", "passepartout-economics": "ideas", } def find_org_files(): """Scan ideas/ recursively for all .org files, return (slug, rel_path, abs_path).""" files = [] base = f"{BRAIN}/ideas" for root, dirs, filenames in os.walk(base): for fn in filenames: if not fn.endswith('.org'): continue abs_path = os.path.join(root, fn) rel = os.path.relpath(abs_path, base) name = fn[:-4] files.append((name, rel, abs_path)) return files def gbrain_target(rel_path): """Derive gbrain target path from org relative path.""" parts = rel_path.split('/') if len(parts) == 1: slug = parts[0][:-4] category = ROUTING.get(slug, "concepts") return f"{GBRAIN_SRC}/{category}/{slug}.md" else: subdir = parts[0] slug = parts[1][:-4] return f"{GBRAIN_SRC}/concepts/{subdir}/{slug}.md" def gbrain_slug(rel_path): """Return the gbrain slug (e.g. 'concepts/time-estimates') for an org rel_path.""" parts = rel_path.split('/') if len(parts) == 1: slug = parts[0][:-4] category = ROUTING.get(slug, "concepts") return f"{category}/{slug}" else: subdir = parts[0] slug = parts[1][:-4] return f"concepts/{subdir}/{slug}" def build_slug_map(): """Build mapping: org slug (filename without .org) → gbrain slug.""" mapping = {} for slug, rel_path, abs_path in find_org_files(): mapping[slug] = gbrain_slug(rel_path) return mapping def extract_org_links_and_body(src_path): """Read the full org file, extract PROPERTIES + #+ directives, and return (props, clean_body) where clean_body has header stripped.""" with open(src_path) as f: content = f.read() props = {} # Extract title m = re.search(r'^#\+title:\s+(.+)$', content, re.MULTILINE) if m: props['title'] = m.group(1).strip() # Extract tags m = re.search(r'^#\+filetags:\s+(.+)$', content, re.MULTILINE) if m: tags = [t.strip(':') for t in m.group(1).split()] props['tags'] = tags # Extract ID from PROPERTIES drawer m = re.search(r':ID:\s+([^\s]+)', content) if m: props['org_id'] = m.group(1) # Extract CREATED m = re.search(r':CREATED:\s+\[([^\]]+)\]', content) if m: created_raw = m.group(1) # e.g. "2026-05-23 Sat" # Extract just the date portion date_m = re.match(r'(\d{4}-\d{2}-\d{2})', created_raw) if date_m: props['created'] = date_m.group(1) # Strip header for body lines = content.split('\n') in_properties = False start = 0 for i, line in enumerate(lines): if line.strip() == ':PROPERTIES:': in_properties = True if in_properties and line.strip() == ':END:': in_properties = False start = i + 1 continue if not in_properties: if line.startswith('#+'): start = i + 1 continue if line.strip(): start = i break start = i + 1 body = '\n'.join(lines[start:]) return props, body def resolve_org_link(match, slug_map): """Replace [[file:target.org][desc]] with [[file:gbrain_path/target.org][desc]] when target is a known org slug. Preserves original target if unknown.""" full = match.group(0) target = match.group(1) desc = match.group(2) if match.lastindex >= 2 else target if target in slug_map: gbrain_path = slug_map[target] return f"[[file:{gbrain_path}.org][{desc}]]" return full def convert_body(body_text, slug_map): """Pre-process org body to inject gbrain path prefixes into cross-references, then convert to markdown via pandoc. Returns (md_body, link_refs) where link_refs is a list of {slug, type} dicts.""" link_refs = [] # Find all [[file:X.org][desc]] cross-references and collect them org_link_re = re.compile(r'\[\[file:([^\]]+?)\.org\]\[([^\]]*?)\]\]') for m in org_link_re.finditer(body_text): target = m.group(1) if target in slug_map: link_refs.append({ "slug": slug_map[target], "type": "references", "name": slug_map[target], }) # Inject directory prefixes into org links so pandoc produces proper paths def replace_link(m): target = m.group(1) desc = m.group(2) if target in slug_map: return f"[[file:{slug_map[target]}.org][{desc}]]" return m.group(0) processed_body = org_link_re.sub(replace_link, body_text) # Convert to markdown result = subprocess.run( [PANDOC, "-f", "org", "-t", "markdown-smart"], input=processed_body, capture_output=True, text=True ) if result.returncode != 0: print(f" ERROR pandoc: {result.stderr[:200]}") return None, [] md = result.stdout.strip() # Pandoc converts [[file:concepts/foo.org][desc]] to [desc](concepts/foo.org) # Strip .org extensions md = re.sub(r'\(([a-zA-Z0-9_/-]+)\.org\)', r'(\1)', md) return md, link_refs def build_frontmatter(props, link_refs=None): """Build YAML frontmatter string from properties and link references.""" lines = ['---'] if 'title' in props: lines.append(f'title: "{props["title"]}"') if 'tags' in props: tags_str = ', '.join(props['tags']) lines.append(f'tags: [{tags_str}]') if 'created' in props: lines.append(f'created: {props["created"]}') if link_refs: for lr in link_refs: # Deduplicate by slug pass # Deduplicate seen = set() unique_links = [] for lr in link_refs: k = lr['slug'] if k not in seen: seen.add(k) unique_links.append(lr) if unique_links: lines.append('links:') for lr in unique_links: lines.append(f' - slug: "{lr["slug"]}"') lines.append(f' type: "{lr["type"]}"') lines.append('---') return '\n'.join(lines) def add_timeline_entry(md_body, props): """If the page has a CREATED date, prepend a timeline bullet.""" if 'created' in props and 'title' in props: date = props['created'] title = props['title'] line = f"- **{date}** | Created — {title}\n\n" return line + md_body return md_body def main(): # Pre-build slug map for all org files slug_map = build_slug_map() imported = [] for slug, rel_path, src_path in find_org_files(): dst_path = gbrain_target(rel_path) os.makedirs(os.path.dirname(dst_path), exist_ok=True) # Extract properties and body from org file props, org_body = extract_org_links_and_body(src_path) # Convert body to markdown, collecting links along the way md, link_refs = convert_body(org_body, slug_map) if md is None: continue # Build frontmatter with links frontmatter = build_frontmatter(props, link_refs) # Add timeline entry if date exists md = add_timeline_entry(md, props) full = frontmatter + '\n\n' + md + '\n' with open(dst_path, 'w') as f: f.write(full) rel_dst = os.path.relpath(dst_path, GBRAIN_SRC) imported.append(rel_dst) print(f" OK {rel_dst}") print(f"\nConverted {len(imported)} files.") # Commit to git subprocess.run(["git", "-C", GBRAIN_SRC, "add", "-A"], capture_output=True) subprocess.run( ["git", "-C", GBRAIN_SRC, "commit", "--allow-empty", "-m", "gbrain: sync converted org-mode brain files"], capture_output=True, text=True ) # Import into gbrain print("\nImporting into gbrain...") env = {**os.environ, "PATH": f"{os.path.expanduser('~')}/.bun/bin:{os.environ['PATH']}"} result = subprocess.run( [BUN, "import", GBRAIN_SRC], capture_output=True, text=True, env=env ) out_lines = result.stdout.strip().split('\n') for line in out_lines[-25:]: if line.strip() and 'batch caps' not in line and 'max_batch_tokens' not in line: print(f" {line}") if result.returncode != 0: print(f" gbrain import exit code: {result.returncode}") return # Embed print("\nGenerating embeddings...") result2 = subprocess.run( [BUN, "embed", "--all"], capture_output=True, text=True, env=env ) for line in result2.stdout.strip().split('\n')[-10:]: if line.strip(): print(f" {line}") # Extract links from frontmatter (now that pages are imported with links:) print("\nExtracting links from frontmatter...") result3 = subprocess.run( [BUN, "extract", "links", "--source", "db", "--include-frontmatter", "--dir", GBRAIN_SRC], capture_output=True, text=True, env=env ) for line in result3.stdout.strip().split('\n')[-10:]: if line.strip(): print(f" {line}") # Extract timeline from body print("\nExtracting timeline...") result4 = subprocess.run( [BUN, "extract", "timeline", "--source", "db", "--dir", GBRAIN_SRC], capture_output=True, text=True, env=env ) for line in result4.stdout.strip().split('\n')[-10:]: if line.strip(): print(f" {line}") # Stats print("\nBrain stats:") result5 = subprocess.run( [BUN, "stats"], capture_output=True, text=True, env=env ) for line in result5.stdout.strip().split('\n')[-15:]: if line.strip(): print(f" {line}") if __name__ == "__main__": main()