gbrain: sync converted org-mode brain files
This commit is contained in:
212
scripts/org-to-gbrain.py
Normal file
212
scripts/org-to-gbrain.py
Normal file
@@ -0,0 +1,212 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Convert brain Org-mode files to markdown + YAML frontmatter and sync into gbrain."""
|
||||
import subprocess, re, os, sys
|
||||
|
||||
BRAIN = "/root/brain"
|
||||
GBRAIN_SRC = "/mnt/hermes/brain"
|
||||
PANDOC = "/usr/bin/pandoc"
|
||||
BUN = os.path.expanduser("~/.bun/bin/gbrain")
|
||||
|
||||
def extract_org_properties(src_path):
|
||||
"""Extract :PROPERTIES: drawer and #+title/#+filetags from an org file."""
|
||||
props = {}
|
||||
with open(src_path) as f:
|
||||
content = f.read()
|
||||
|
||||
# Extract title
|
||||
m = re.search(r'^#\+title:\s+(.+)$', content, re.MULTILINE)
|
||||
if m:
|
||||
props['title'] = m.group(1).strip()
|
||||
|
||||
# Extract tags
|
||||
m = re.search(r'^#\+filetags:\s+(.+)$', content, re.MULTILINE)
|
||||
if m:
|
||||
tags = [t.strip(':') for t in m.group(1).split()]
|
||||
props['tags'] = tags
|
||||
|
||||
# Extract ID from PROPERTIES drawer
|
||||
m = re.search(r':ID:\s+([^\s]+)', content)
|
||||
if m:
|
||||
props['org_id'] = m.group(1)
|
||||
|
||||
# Extract CREATED
|
||||
m = re.search(r':CREATED:\s+\[([^\]]+)\]', content)
|
||||
if m:
|
||||
props['created'] = m.group(1)
|
||||
|
||||
return props
|
||||
|
||||
def strip_org_header(src_path):
|
||||
"""Strip the Org-mode header block (PROPERTIES drawer + #+ directives)
|
||||
before feeding to pandoc, so it doesn't produce raw {=org} blocks."""
|
||||
with open(src_path) as f:
|
||||
lines = f.readlines()
|
||||
|
||||
# Find first non-header line
|
||||
in_properties = False
|
||||
start = 0
|
||||
for i, line in enumerate(lines):
|
||||
if line.strip() == ':PROPERTIES:':
|
||||
in_properties = True
|
||||
if in_properties and line.strip() == ':END:':
|
||||
in_properties = False
|
||||
start = i + 1
|
||||
continue
|
||||
if not in_properties:
|
||||
# Skip #+ lines
|
||||
if line.startswith('#+'):
|
||||
start = i + 1
|
||||
continue
|
||||
# First real content
|
||||
if line.strip():
|
||||
start = i
|
||||
break
|
||||
start = i + 1
|
||||
|
||||
return ''.join(lines[start:])
|
||||
|
||||
def pandoc_convert(clean_body):
|
||||
"""Convert org body to markdown via pandoc (stdin mode)."""
|
||||
result = subprocess.run(
|
||||
[PANDOC, "-f", "org", "-t", "markdown-smart"],
|
||||
input=clean_body, capture_output=True, text=True
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(f" ERROR pandoc: {result.stderr[:200]}")
|
||||
return None
|
||||
return result.stdout.strip()
|
||||
|
||||
def build_frontmatter(props):
|
||||
"""Build YAML frontmatter string from extracted properties."""
|
||||
lines = ['---']
|
||||
if 'title' in props:
|
||||
lines.append(f'title: "{props["title"]}"')
|
||||
if 'tags' in props:
|
||||
tags_str = ', '.join(props['tags'])
|
||||
lines.append(f'tags: [{tags_str}]')
|
||||
if 'created' in props:
|
||||
lines.append(f'created: {props["created"]}')
|
||||
lines.append('---')
|
||||
return '\n'.join(lines)
|
||||
|
||||
def postprocess_links(md_text):
|
||||
"""Convert pandoc's markdown links to gbrain-friendly format."""
|
||||
# Pandoc converts [[file:foo.org][desc]] to [desc](foo.org)
|
||||
# Strip .org extensions from relative links
|
||||
md_text = re.sub(r'\(([a-zA-Z0-9_-]+)\.org\)', r'(\1)', md_text)
|
||||
return md_text
|
||||
|
||||
ROUTING = {
|
||||
# Concepts — triad architecture, security, economics theory
|
||||
"triad-overview": "concepts",
|
||||
"agora": "concepts",
|
||||
"stoa": "concepts",
|
||||
"triad-index": "concepts",
|
||||
"domain-gate-packages": "concepts",
|
||||
"verification-appliance": "concepts",
|
||||
"verification-monopoly": "concepts",
|
||||
"infrastructure-lock-in": "concepts",
|
||||
"evaluation-harness": "concepts",
|
||||
"collective-regression-suite": "concepts",
|
||||
"lisp-machine-security": "concepts",
|
||||
"common-logic-iso-24707": "concepts",
|
||||
"self-driving-lisp-machine": "concepts",
|
||||
"lisp-economics": "concepts",
|
||||
"sufficiency-flip": "concepts",
|
||||
"time-estimates": "concepts",
|
||||
"cost-structure": "concepts",
|
||||
"gate-rule-encoding": "concepts",
|
||||
"biology-parallels": "concepts",
|
||||
"comparison-with-symbolics": "concepts",
|
||||
"upgrade-lifecycle": "concepts",
|
||||
"ai-industry-impact": "concepts",
|
||||
"moats": "concepts",
|
||||
"patent-strategy": "concepts",
|
||||
"licensing": "concepts",
|
||||
"verified-skill-marketplace": "concepts",
|
||||
"compute-marketplace": "concepts",
|
||||
"agora-usernames": "concepts",
|
||||
"pds-as-a-service": "concepts",
|
||||
"investment-thesis": "concepts",
|
||||
"compliance-framework-mapping": "concepts",
|
||||
# Ideas — strategy, competitive analysis
|
||||
"competitive-analysis-2026-05": "ideas",
|
||||
"passepartout-economics": "ideas",
|
||||
}
|
||||
|
||||
def main():
|
||||
# Ensure MECE directories exist
|
||||
for d in ["concepts", "ideas"]:
|
||||
os.makedirs(f"{GBRAIN_SRC}/{d}", exist_ok=True)
|
||||
|
||||
imported = []
|
||||
|
||||
for slug, category in ROUTING.items():
|
||||
src_path = f"{BRAIN}/ideas/{slug}.org"
|
||||
if not os.path.exists(src_path):
|
||||
print(f" SKIP {slug}: not found")
|
||||
continue
|
||||
|
||||
dst_dir = f"{GBRAIN_SRC}/{category}"
|
||||
dst_path = f"{dst_dir}/{slug}.md"
|
||||
|
||||
# Extract frontmatter from org properties
|
||||
props = extract_org_properties(src_path)
|
||||
|
||||
# Strip org header and convert body to markdown
|
||||
clean = strip_org_header(src_path)
|
||||
md = pandoc_convert(clean)
|
||||
if md is None:
|
||||
continue
|
||||
|
||||
md = postprocess_links(md)
|
||||
|
||||
# Assemble: YAML frontmatter + markdown body
|
||||
frontmatter = build_frontmatter(props)
|
||||
full = frontmatter + '\n\n' + md + '\n'
|
||||
|
||||
with open(dst_path, 'w') as f:
|
||||
f.write(full)
|
||||
|
||||
imported.append(f"{category}/{slug}.md")
|
||||
print(f" OK {category}/{slug}")
|
||||
|
||||
print(f"\nConverted {len(imported)} files.")
|
||||
|
||||
# Commit to git
|
||||
subprocess.run(["git", "-C", GBRAIN_SRC, "add", "-A"], capture_output=True)
|
||||
subprocess.run(
|
||||
["git", "-C", GBRAIN_SRC, "commit", "--allow-empty",
|
||||
"-m", "gbrain: sync converted org-mode brain files"],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
|
||||
# Import into gbrain
|
||||
print("\nImporting into gbrain...")
|
||||
env = {**os.environ, "PATH": f"{os.path.expanduser('~')}/.bun/bin:{os.environ['PATH']}"}
|
||||
result = subprocess.run(
|
||||
[BUN, "import", GBRAIN_SRC],
|
||||
capture_output=True, text=True, env=env
|
||||
)
|
||||
# Show last 20 lines of stdout (skip noise)
|
||||
out_lines = result.stdout.strip().split('\n')
|
||||
for line in out_lines[-25:]:
|
||||
if line.strip() and 'batch caps' not in line and 'max_batch_tokens' not in line:
|
||||
print(f" {line}")
|
||||
|
||||
if result.returncode != 0:
|
||||
print(f" gbrain import exit code: {result.returncode}")
|
||||
return
|
||||
|
||||
# Embed
|
||||
print("\nGenerating embeddings...")
|
||||
result2 = subprocess.run(
|
||||
[BUN, "embed", "--all"],
|
||||
capture_output=True, text=True, env=env
|
||||
)
|
||||
for line in result2.stdout.strip().split('\n')[-10:]:
|
||||
if line.strip():
|
||||
print(f" {line}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user