gbrain: sync converted org-mode brain files
This commit is contained in:
@@ -7,130 +7,6 @@ GBRAIN_SRC = "/mnt/hermes/brain"
|
||||
PANDOC = "/usr/bin/pandoc"
|
||||
BUN = os.path.expanduser("~/.bun/bin/gbrain")
|
||||
|
||||
def find_org_files():
|
||||
"""Scan ideas/ recursively for all .org files, return (slug, rel_path, abs_path)."""
|
||||
files = []
|
||||
base = f"{BRAIN}/ideas"
|
||||
for root, dirs, filenames in os.walk(base):
|
||||
for fn in filenames:
|
||||
if not fn.endswith('.org'):
|
||||
continue
|
||||
abs_path = os.path.join(root, fn)
|
||||
rel = os.path.relpath(abs_path, base)
|
||||
# rel is like "compliance/hipaa.org" or "triad-overview.org"
|
||||
name = fn[:-4] # remove .org
|
||||
files.append((name, rel, abs_path))
|
||||
return files
|
||||
|
||||
def gbrain_target(rel_path):
|
||||
"""Derive gbrain target path from org relative path.
|
||||
|
||||
ideas/compliance/hipaa.org → concepts/compliance/hipaa.md
|
||||
ideas/triad-overview.org → concepts/triad-overview.md (via routing dict)
|
||||
ideas/competitive-analysis...→ ideas/competitive-analysis.md
|
||||
"""
|
||||
parts = rel_path.split('/')
|
||||
|
||||
if len(parts) == 1:
|
||||
# Flat file in ideas/ root — use ROUTING dict
|
||||
slug = parts[0][:-4] if parts[0].endswith('.org') else parts[0][:-4]
|
||||
category = ROUTING.get(slug, "concepts")
|
||||
return f"{GBRAIN_SRC}/{category}/{slug}.md"
|
||||
else:
|
||||
# In a subdirectory: ideas/compliance/foo.org → concepts/compliance/foo.md
|
||||
subdir = parts[0]
|
||||
slug = parts[1][:-4] if parts[1].endswith('.org') else parts[1][:-4]
|
||||
return f"{GBRAIN_SRC}/concepts/{subdir}/{slug}.md"
|
||||
|
||||
def extract_org_properties(src_path):
|
||||
"""Extract :PROPERTIES: drawer and #+title/#+filetags from an org file."""
|
||||
props = {}
|
||||
with open(src_path) as f:
|
||||
content = f.read()
|
||||
|
||||
# Extract title
|
||||
m = re.search(r'^#\+title:\s+(.+)$', content, re.MULTILINE)
|
||||
if m:
|
||||
props['title'] = m.group(1).strip()
|
||||
|
||||
# Extract tags
|
||||
m = re.search(r'^#\+filetags:\s+(.+)$', content, re.MULTILINE)
|
||||
if m:
|
||||
tags = [t.strip(':') for t in m.group(1).split()]
|
||||
props['tags'] = tags
|
||||
|
||||
# Extract ID from PROPERTIES drawer
|
||||
m = re.search(r':ID:\s+([^\s]+)', content)
|
||||
if m:
|
||||
props['org_id'] = m.group(1)
|
||||
|
||||
# Extract CREATED
|
||||
m = re.search(r':CREATED:\s+\[([^\]]+)\]', content)
|
||||
if m:
|
||||
props['created'] = m.group(1)
|
||||
|
||||
return props
|
||||
|
||||
def strip_org_header(src_path):
|
||||
"""Strip the Org-mode header block (PROPERTIES drawer + #+ directives)
|
||||
before feeding to pandoc, so it doesn't produce raw {=org} blocks."""
|
||||
with open(src_path) as f:
|
||||
lines = f.readlines()
|
||||
|
||||
# Find first non-header line
|
||||
in_properties = False
|
||||
start = 0
|
||||
for i, line in enumerate(lines):
|
||||
if line.strip() == ':PROPERTIES:':
|
||||
in_properties = True
|
||||
if in_properties and line.strip() == ':END:':
|
||||
in_properties = False
|
||||
start = i + 1
|
||||
continue
|
||||
if not in_properties:
|
||||
# Skip #+ lines
|
||||
if line.startswith('#+'):
|
||||
start = i + 1
|
||||
continue
|
||||
# First real content
|
||||
if line.strip():
|
||||
start = i
|
||||
break
|
||||
start = i + 1
|
||||
|
||||
return ''.join(lines[start:])
|
||||
|
||||
def pandoc_convert(clean_body):
|
||||
"""Convert org body to markdown via pandoc (stdin mode)."""
|
||||
result = subprocess.run(
|
||||
[PANDOC, "-f", "org", "-t", "markdown-smart"],
|
||||
input=clean_body, capture_output=True, text=True
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(f" ERROR pandoc: {result.stderr[:200]}")
|
||||
return None
|
||||
return result.stdout.strip()
|
||||
|
||||
def build_frontmatter(props):
|
||||
"""Build YAML frontmatter string from extracted properties."""
|
||||
lines = ['---']
|
||||
if 'title' in props:
|
||||
lines.append(f'title: "{props["title"]}"')
|
||||
if 'tags' in props:
|
||||
tags_str = ', '.join(props['tags'])
|
||||
lines.append(f'tags: [{tags_str}]')
|
||||
if 'created' in props:
|
||||
lines.append(f'created: {props["created"]}')
|
||||
lines.append('---')
|
||||
return '\n'.join(lines)
|
||||
|
||||
def postprocess_links(md_text):
|
||||
"""Convert pandoc's markdown links to gbrain-friendly format."""
|
||||
# Pandoc converts [[file:foo.org][desc]] to [desc](foo.org)
|
||||
# Strip .org extensions from relative links
|
||||
md_text = re.sub(r'\(([a-zA-Z0-9_-]+)\.org\)', r'(\1)', md_text)
|
||||
return md_text
|
||||
|
||||
ROUTING = {
|
||||
# Concepts — triad architecture, security, economics theory
|
||||
"triad-overview": "concepts",
|
||||
@@ -170,34 +46,230 @@ ROUTING = {
|
||||
"passepartout-economics": "ideas",
|
||||
}
|
||||
|
||||
def find_org_files():
|
||||
"""Scan ideas/ recursively for all .org files, return (slug, rel_path, abs_path)."""
|
||||
files = []
|
||||
base = f"{BRAIN}/ideas"
|
||||
for root, dirs, filenames in os.walk(base):
|
||||
for fn in filenames:
|
||||
if not fn.endswith('.org'):
|
||||
continue
|
||||
abs_path = os.path.join(root, fn)
|
||||
rel = os.path.relpath(abs_path, base)
|
||||
name = fn[:-4]
|
||||
files.append((name, rel, abs_path))
|
||||
return files
|
||||
|
||||
def gbrain_target(rel_path):
|
||||
"""Derive gbrain target path from org relative path."""
|
||||
parts = rel_path.split('/')
|
||||
if len(parts) == 1:
|
||||
slug = parts[0][:-4]
|
||||
category = ROUTING.get(slug, "concepts")
|
||||
return f"{GBRAIN_SRC}/{category}/{slug}.md"
|
||||
else:
|
||||
subdir = parts[0]
|
||||
slug = parts[1][:-4]
|
||||
return f"{GBRAIN_SRC}/concepts/{subdir}/{slug}.md"
|
||||
|
||||
def gbrain_slug(rel_path):
|
||||
"""Return the gbrain slug (e.g. 'concepts/time-estimates') for an org rel_path."""
|
||||
parts = rel_path.split('/')
|
||||
if len(parts) == 1:
|
||||
slug = parts[0][:-4]
|
||||
category = ROUTING.get(slug, "concepts")
|
||||
return f"{category}/{slug}"
|
||||
else:
|
||||
subdir = parts[0]
|
||||
slug = parts[1][:-4]
|
||||
return f"concepts/{subdir}/{slug}"
|
||||
|
||||
def build_slug_map():
|
||||
"""Build mapping: org slug (filename without .org) → gbrain slug."""
|
||||
mapping = {}
|
||||
for slug, rel_path, abs_path in find_org_files():
|
||||
mapping[slug] = gbrain_slug(rel_path)
|
||||
return mapping
|
||||
|
||||
def extract_org_links_and_body(src_path):
|
||||
"""Read the full org file, extract PROPERTIES + #+ directives, and
|
||||
return (props, clean_body) where clean_body has header stripped."""
|
||||
with open(src_path) as f:
|
||||
content = f.read()
|
||||
|
||||
props = {}
|
||||
|
||||
# Extract title
|
||||
m = re.search(r'^#\+title:\s+(.+)$', content, re.MULTILINE)
|
||||
if m:
|
||||
props['title'] = m.group(1).strip()
|
||||
|
||||
# Extract tags
|
||||
m = re.search(r'^#\+filetags:\s+(.+)$', content, re.MULTILINE)
|
||||
if m:
|
||||
tags = [t.strip(':') for t in m.group(1).split()]
|
||||
props['tags'] = tags
|
||||
|
||||
# Extract ID from PROPERTIES drawer
|
||||
m = re.search(r':ID:\s+([^\s]+)', content)
|
||||
if m:
|
||||
props['org_id'] = m.group(1)
|
||||
|
||||
# Extract CREATED
|
||||
m = re.search(r':CREATED:\s+\[([^\]]+)\]', content)
|
||||
if m:
|
||||
created_raw = m.group(1) # e.g. "2026-05-23 Sat"
|
||||
# Extract just the date portion
|
||||
date_m = re.match(r'(\d{4}-\d{2}-\d{2})', created_raw)
|
||||
if date_m:
|
||||
props['created'] = date_m.group(1)
|
||||
|
||||
# Strip header for body
|
||||
lines = content.split('\n')
|
||||
in_properties = False
|
||||
start = 0
|
||||
for i, line in enumerate(lines):
|
||||
if line.strip() == ':PROPERTIES:':
|
||||
in_properties = True
|
||||
if in_properties and line.strip() == ':END:':
|
||||
in_properties = False
|
||||
start = i + 1
|
||||
continue
|
||||
if not in_properties:
|
||||
if line.startswith('#+'):
|
||||
start = i + 1
|
||||
continue
|
||||
if line.strip():
|
||||
start = i
|
||||
break
|
||||
start = i + 1
|
||||
|
||||
body = '\n'.join(lines[start:])
|
||||
return props, body
|
||||
|
||||
def resolve_org_link(match, slug_map):
|
||||
"""Replace [[file:target.org][desc]] with [[file:gbrain_path/target.org][desc]]
|
||||
when target is a known org slug. Preserves original target if unknown."""
|
||||
full = match.group(0)
|
||||
target = match.group(1)
|
||||
desc = match.group(2) if match.lastindex >= 2 else target
|
||||
|
||||
if target in slug_map:
|
||||
gbrain_path = slug_map[target]
|
||||
return f"[[file:{gbrain_path}.org][{desc}]]"
|
||||
return full
|
||||
|
||||
def convert_body(body_text, slug_map):
|
||||
"""Pre-process org body to inject gbrain path prefixes into cross-references,
|
||||
then convert to markdown via pandoc. Returns (md_body, link_refs) where
|
||||
link_refs is a list of {slug, type} dicts."""
|
||||
link_refs = []
|
||||
|
||||
# Find all [[file:X.org][desc]] cross-references and collect them
|
||||
org_link_re = re.compile(r'\[\[file:([^\]]+?)\.org\]\[([^\]]*?)\]\]')
|
||||
for m in org_link_re.finditer(body_text):
|
||||
target = m.group(1)
|
||||
if target in slug_map:
|
||||
link_refs.append({
|
||||
"slug": slug_map[target],
|
||||
"type": "references",
|
||||
"name": slug_map[target],
|
||||
})
|
||||
|
||||
# Inject directory prefixes into org links so pandoc produces proper paths
|
||||
def replace_link(m):
|
||||
target = m.group(1)
|
||||
desc = m.group(2)
|
||||
if target in slug_map:
|
||||
return f"[[file:{slug_map[target]}.org][{desc}]]"
|
||||
return m.group(0)
|
||||
|
||||
processed_body = org_link_re.sub(replace_link, body_text)
|
||||
|
||||
# Convert to markdown
|
||||
result = subprocess.run(
|
||||
[PANDOC, "-f", "org", "-t", "markdown-smart"],
|
||||
input=processed_body, capture_output=True, text=True
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(f" ERROR pandoc: {result.stderr[:200]}")
|
||||
return None, []
|
||||
|
||||
md = result.stdout.strip()
|
||||
|
||||
# Pandoc converts [[file:concepts/foo.org][desc]] to [desc](concepts/foo.org)
|
||||
# Strip .org extensions
|
||||
md = re.sub(r'\(([a-zA-Z0-9_/-]+)\.org\)', r'(\1)', md)
|
||||
|
||||
return md, link_refs
|
||||
|
||||
def build_frontmatter(props, link_refs=None):
|
||||
"""Build YAML frontmatter string from properties and link references."""
|
||||
lines = ['---']
|
||||
if 'title' in props:
|
||||
lines.append(f'title: "{props["title"]}"')
|
||||
if 'tags' in props:
|
||||
tags_str = ', '.join(props['tags'])
|
||||
lines.append(f'tags: [{tags_str}]')
|
||||
if 'created' in props:
|
||||
lines.append(f'created: {props["created"]}')
|
||||
if link_refs:
|
||||
for lr in link_refs:
|
||||
# Deduplicate by slug
|
||||
pass
|
||||
# Deduplicate
|
||||
seen = set()
|
||||
unique_links = []
|
||||
for lr in link_refs:
|
||||
k = lr['slug']
|
||||
if k not in seen:
|
||||
seen.add(k)
|
||||
unique_links.append(lr)
|
||||
if unique_links:
|
||||
lines.append('links:')
|
||||
for lr in unique_links:
|
||||
lines.append(f' - slug: "{lr["slug"]}"')
|
||||
lines.append(f' type: "{lr["type"]}"')
|
||||
lines.append('---')
|
||||
return '\n'.join(lines)
|
||||
|
||||
def add_timeline_entry(md_body, props):
|
||||
"""If the page has a CREATED date, prepend a timeline bullet."""
|
||||
if 'created' in props and 'title' in props:
|
||||
date = props['created']
|
||||
title = props['title']
|
||||
line = f"- **{date}** | Created — {title}\n\n"
|
||||
return line + md_body
|
||||
return md_body
|
||||
|
||||
def main():
|
||||
# Pre-build slug map for all org files
|
||||
slug_map = build_slug_map()
|
||||
imported = []
|
||||
|
||||
for slug, rel_path, src_path in find_org_files():
|
||||
dst_path = gbrain_target(rel_path)
|
||||
|
||||
# Create parent directories
|
||||
os.makedirs(os.path.dirname(dst_path), exist_ok=True)
|
||||
|
||||
# Extract frontmatter from org properties
|
||||
props = extract_org_properties(src_path)
|
||||
# Extract properties and body from org file
|
||||
props, org_body = extract_org_links_and_body(src_path)
|
||||
|
||||
# Strip org header and convert body to markdown
|
||||
clean = strip_org_header(src_path)
|
||||
md = pandoc_convert(clean)
|
||||
# Convert body to markdown, collecting links along the way
|
||||
md, link_refs = convert_body(org_body, slug_map)
|
||||
if md is None:
|
||||
continue
|
||||
|
||||
md = postprocess_links(md)
|
||||
# Build frontmatter with links
|
||||
frontmatter = build_frontmatter(props, link_refs)
|
||||
|
||||
# Add timeline entry if date exists
|
||||
md = add_timeline_entry(md, props)
|
||||
|
||||
# Assemble: YAML frontmatter + markdown body
|
||||
frontmatter = build_frontmatter(props)
|
||||
full = frontmatter + '\n\n' + md + '\n'
|
||||
|
||||
with open(dst_path, 'w') as f:
|
||||
f.write(full)
|
||||
|
||||
# Show relative path for clarity
|
||||
rel_dst = os.path.relpath(dst_path, GBRAIN_SRC)
|
||||
imported.append(rel_dst)
|
||||
print(f" OK {rel_dst}")
|
||||
@@ -219,7 +291,6 @@ def main():
|
||||
[BUN, "import", GBRAIN_SRC],
|
||||
capture_output=True, text=True, env=env
|
||||
)
|
||||
# Show last 20 lines of stdout (skip noise)
|
||||
out_lines = result.stdout.strip().split('\n')
|
||||
for line in out_lines[-25:]:
|
||||
if line.strip() and 'batch caps' not in line and 'max_batch_tokens' not in line:
|
||||
@@ -239,5 +310,36 @@ def main():
|
||||
if line.strip():
|
||||
print(f" {line}")
|
||||
|
||||
# Extract links from frontmatter (now that pages are imported with links:)
|
||||
print("\nExtracting links from frontmatter...")
|
||||
result3 = subprocess.run(
|
||||
[BUN, "extract", "links", "--source", "db", "--include-frontmatter",
|
||||
"--dir", GBRAIN_SRC],
|
||||
capture_output=True, text=True, env=env
|
||||
)
|
||||
for line in result3.stdout.strip().split('\n')[-10:]:
|
||||
if line.strip():
|
||||
print(f" {line}")
|
||||
|
||||
# Extract timeline from body
|
||||
print("\nExtracting timeline...")
|
||||
result4 = subprocess.run(
|
||||
[BUN, "extract", "timeline", "--source", "db", "--dir", GBRAIN_SRC],
|
||||
capture_output=True, text=True, env=env
|
||||
)
|
||||
for line in result4.stdout.strip().split('\n')[-10:]:
|
||||
if line.strip():
|
||||
print(f" {line}")
|
||||
|
||||
# Stats
|
||||
print("\nBrain stats:")
|
||||
result5 = subprocess.run(
|
||||
[BUN, "stats"],
|
||||
capture_output=True, text=True, env=env
|
||||
)
|
||||
for line in result5.stdout.strip().split('\n')[-15:]:
|
||||
if line.strip():
|
||||
print(f" {line}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user