gbrain: sync converted org-mode brain files
This commit is contained in:
@@ -7,130 +7,6 @@ GBRAIN_SRC = "/mnt/hermes/brain"
|
|||||||
PANDOC = "/usr/bin/pandoc"
|
PANDOC = "/usr/bin/pandoc"
|
||||||
BUN = os.path.expanduser("~/.bun/bin/gbrain")
|
BUN = os.path.expanduser("~/.bun/bin/gbrain")
|
||||||
|
|
||||||
def find_org_files():
|
|
||||||
"""Scan ideas/ recursively for all .org files, return (slug, rel_path, abs_path)."""
|
|
||||||
files = []
|
|
||||||
base = f"{BRAIN}/ideas"
|
|
||||||
for root, dirs, filenames in os.walk(base):
|
|
||||||
for fn in filenames:
|
|
||||||
if not fn.endswith('.org'):
|
|
||||||
continue
|
|
||||||
abs_path = os.path.join(root, fn)
|
|
||||||
rel = os.path.relpath(abs_path, base)
|
|
||||||
# rel is like "compliance/hipaa.org" or "triad-overview.org"
|
|
||||||
name = fn[:-4] # remove .org
|
|
||||||
files.append((name, rel, abs_path))
|
|
||||||
return files
|
|
||||||
|
|
||||||
def gbrain_target(rel_path):
|
|
||||||
"""Derive gbrain target path from org relative path.
|
|
||||||
|
|
||||||
ideas/compliance/hipaa.org → concepts/compliance/hipaa.md
|
|
||||||
ideas/triad-overview.org → concepts/triad-overview.md (via routing dict)
|
|
||||||
ideas/competitive-analysis...→ ideas/competitive-analysis.md
|
|
||||||
"""
|
|
||||||
parts = rel_path.split('/')
|
|
||||||
|
|
||||||
if len(parts) == 1:
|
|
||||||
# Flat file in ideas/ root — use ROUTING dict
|
|
||||||
slug = parts[0][:-4] if parts[0].endswith('.org') else parts[0][:-4]
|
|
||||||
category = ROUTING.get(slug, "concepts")
|
|
||||||
return f"{GBRAIN_SRC}/{category}/{slug}.md"
|
|
||||||
else:
|
|
||||||
# In a subdirectory: ideas/compliance/foo.org → concepts/compliance/foo.md
|
|
||||||
subdir = parts[0]
|
|
||||||
slug = parts[1][:-4] if parts[1].endswith('.org') else parts[1][:-4]
|
|
||||||
return f"{GBRAIN_SRC}/concepts/{subdir}/{slug}.md"
|
|
||||||
|
|
||||||
def extract_org_properties(src_path):
|
|
||||||
"""Extract :PROPERTIES: drawer and #+title/#+filetags from an org file."""
|
|
||||||
props = {}
|
|
||||||
with open(src_path) as f:
|
|
||||||
content = f.read()
|
|
||||||
|
|
||||||
# Extract title
|
|
||||||
m = re.search(r'^#\+title:\s+(.+)$', content, re.MULTILINE)
|
|
||||||
if m:
|
|
||||||
props['title'] = m.group(1).strip()
|
|
||||||
|
|
||||||
# Extract tags
|
|
||||||
m = re.search(r'^#\+filetags:\s+(.+)$', content, re.MULTILINE)
|
|
||||||
if m:
|
|
||||||
tags = [t.strip(':') for t in m.group(1).split()]
|
|
||||||
props['tags'] = tags
|
|
||||||
|
|
||||||
# Extract ID from PROPERTIES drawer
|
|
||||||
m = re.search(r':ID:\s+([^\s]+)', content)
|
|
||||||
if m:
|
|
||||||
props['org_id'] = m.group(1)
|
|
||||||
|
|
||||||
# Extract CREATED
|
|
||||||
m = re.search(r':CREATED:\s+\[([^\]]+)\]', content)
|
|
||||||
if m:
|
|
||||||
props['created'] = m.group(1)
|
|
||||||
|
|
||||||
return props
|
|
||||||
|
|
||||||
def strip_org_header(src_path):
|
|
||||||
"""Strip the Org-mode header block (PROPERTIES drawer + #+ directives)
|
|
||||||
before feeding to pandoc, so it doesn't produce raw {=org} blocks."""
|
|
||||||
with open(src_path) as f:
|
|
||||||
lines = f.readlines()
|
|
||||||
|
|
||||||
# Find first non-header line
|
|
||||||
in_properties = False
|
|
||||||
start = 0
|
|
||||||
for i, line in enumerate(lines):
|
|
||||||
if line.strip() == ':PROPERTIES:':
|
|
||||||
in_properties = True
|
|
||||||
if in_properties and line.strip() == ':END:':
|
|
||||||
in_properties = False
|
|
||||||
start = i + 1
|
|
||||||
continue
|
|
||||||
if not in_properties:
|
|
||||||
# Skip #+ lines
|
|
||||||
if line.startswith('#+'):
|
|
||||||
start = i + 1
|
|
||||||
continue
|
|
||||||
# First real content
|
|
||||||
if line.strip():
|
|
||||||
start = i
|
|
||||||
break
|
|
||||||
start = i + 1
|
|
||||||
|
|
||||||
return ''.join(lines[start:])
|
|
||||||
|
|
||||||
def pandoc_convert(clean_body):
|
|
||||||
"""Convert org body to markdown via pandoc (stdin mode)."""
|
|
||||||
result = subprocess.run(
|
|
||||||
[PANDOC, "-f", "org", "-t", "markdown-smart"],
|
|
||||||
input=clean_body, capture_output=True, text=True
|
|
||||||
)
|
|
||||||
if result.returncode != 0:
|
|
||||||
print(f" ERROR pandoc: {result.stderr[:200]}")
|
|
||||||
return None
|
|
||||||
return result.stdout.strip()
|
|
||||||
|
|
||||||
def build_frontmatter(props):
|
|
||||||
"""Build YAML frontmatter string from extracted properties."""
|
|
||||||
lines = ['---']
|
|
||||||
if 'title' in props:
|
|
||||||
lines.append(f'title: "{props["title"]}"')
|
|
||||||
if 'tags' in props:
|
|
||||||
tags_str = ', '.join(props['tags'])
|
|
||||||
lines.append(f'tags: [{tags_str}]')
|
|
||||||
if 'created' in props:
|
|
||||||
lines.append(f'created: {props["created"]}')
|
|
||||||
lines.append('---')
|
|
||||||
return '\n'.join(lines)
|
|
||||||
|
|
||||||
def postprocess_links(md_text):
|
|
||||||
"""Convert pandoc's markdown links to gbrain-friendly format."""
|
|
||||||
# Pandoc converts [[file:foo.org][desc]] to [desc](foo.org)
|
|
||||||
# Strip .org extensions from relative links
|
|
||||||
md_text = re.sub(r'\(([a-zA-Z0-9_-]+)\.org\)', r'(\1)', md_text)
|
|
||||||
return md_text
|
|
||||||
|
|
||||||
ROUTING = {
|
ROUTING = {
|
||||||
# Concepts — triad architecture, security, economics theory
|
# Concepts — triad architecture, security, economics theory
|
||||||
"triad-overview": "concepts",
|
"triad-overview": "concepts",
|
||||||
@@ -170,34 +46,230 @@ ROUTING = {
|
|||||||
"passepartout-economics": "ideas",
|
"passepartout-economics": "ideas",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def find_org_files():
|
||||||
|
"""Scan ideas/ recursively for all .org files, return (slug, rel_path, abs_path)."""
|
||||||
|
files = []
|
||||||
|
base = f"{BRAIN}/ideas"
|
||||||
|
for root, dirs, filenames in os.walk(base):
|
||||||
|
for fn in filenames:
|
||||||
|
if not fn.endswith('.org'):
|
||||||
|
continue
|
||||||
|
abs_path = os.path.join(root, fn)
|
||||||
|
rel = os.path.relpath(abs_path, base)
|
||||||
|
name = fn[:-4]
|
||||||
|
files.append((name, rel, abs_path))
|
||||||
|
return files
|
||||||
|
|
||||||
|
def gbrain_target(rel_path):
|
||||||
|
"""Derive gbrain target path from org relative path."""
|
||||||
|
parts = rel_path.split('/')
|
||||||
|
if len(parts) == 1:
|
||||||
|
slug = parts[0][:-4]
|
||||||
|
category = ROUTING.get(slug, "concepts")
|
||||||
|
return f"{GBRAIN_SRC}/{category}/{slug}.md"
|
||||||
|
else:
|
||||||
|
subdir = parts[0]
|
||||||
|
slug = parts[1][:-4]
|
||||||
|
return f"{GBRAIN_SRC}/concepts/{subdir}/{slug}.md"
|
||||||
|
|
||||||
|
def gbrain_slug(rel_path):
|
||||||
|
"""Return the gbrain slug (e.g. 'concepts/time-estimates') for an org rel_path."""
|
||||||
|
parts = rel_path.split('/')
|
||||||
|
if len(parts) == 1:
|
||||||
|
slug = parts[0][:-4]
|
||||||
|
category = ROUTING.get(slug, "concepts")
|
||||||
|
return f"{category}/{slug}"
|
||||||
|
else:
|
||||||
|
subdir = parts[0]
|
||||||
|
slug = parts[1][:-4]
|
||||||
|
return f"concepts/{subdir}/{slug}"
|
||||||
|
|
||||||
|
def build_slug_map():
|
||||||
|
"""Build mapping: org slug (filename without .org) → gbrain slug."""
|
||||||
|
mapping = {}
|
||||||
|
for slug, rel_path, abs_path in find_org_files():
|
||||||
|
mapping[slug] = gbrain_slug(rel_path)
|
||||||
|
return mapping
|
||||||
|
|
||||||
|
def extract_org_links_and_body(src_path):
|
||||||
|
"""Read the full org file, extract PROPERTIES + #+ directives, and
|
||||||
|
return (props, clean_body) where clean_body has header stripped."""
|
||||||
|
with open(src_path) as f:
|
||||||
|
content = f.read()
|
||||||
|
|
||||||
|
props = {}
|
||||||
|
|
||||||
|
# Extract title
|
||||||
|
m = re.search(r'^#\+title:\s+(.+)$', content, re.MULTILINE)
|
||||||
|
if m:
|
||||||
|
props['title'] = m.group(1).strip()
|
||||||
|
|
||||||
|
# Extract tags
|
||||||
|
m = re.search(r'^#\+filetags:\s+(.+)$', content, re.MULTILINE)
|
||||||
|
if m:
|
||||||
|
tags = [t.strip(':') for t in m.group(1).split()]
|
||||||
|
props['tags'] = tags
|
||||||
|
|
||||||
|
# Extract ID from PROPERTIES drawer
|
||||||
|
m = re.search(r':ID:\s+([^\s]+)', content)
|
||||||
|
if m:
|
||||||
|
props['org_id'] = m.group(1)
|
||||||
|
|
||||||
|
# Extract CREATED
|
||||||
|
m = re.search(r':CREATED:\s+\[([^\]]+)\]', content)
|
||||||
|
if m:
|
||||||
|
created_raw = m.group(1) # e.g. "2026-05-23 Sat"
|
||||||
|
# Extract just the date portion
|
||||||
|
date_m = re.match(r'(\d{4}-\d{2}-\d{2})', created_raw)
|
||||||
|
if date_m:
|
||||||
|
props['created'] = date_m.group(1)
|
||||||
|
|
||||||
|
# Strip header for body
|
||||||
|
lines = content.split('\n')
|
||||||
|
in_properties = False
|
||||||
|
start = 0
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
if line.strip() == ':PROPERTIES:':
|
||||||
|
in_properties = True
|
||||||
|
if in_properties and line.strip() == ':END:':
|
||||||
|
in_properties = False
|
||||||
|
start = i + 1
|
||||||
|
continue
|
||||||
|
if not in_properties:
|
||||||
|
if line.startswith('#+'):
|
||||||
|
start = i + 1
|
||||||
|
continue
|
||||||
|
if line.strip():
|
||||||
|
start = i
|
||||||
|
break
|
||||||
|
start = i + 1
|
||||||
|
|
||||||
|
body = '\n'.join(lines[start:])
|
||||||
|
return props, body
|
||||||
|
|
||||||
|
def resolve_org_link(match, slug_map):
|
||||||
|
"""Replace [[file:target.org][desc]] with [[file:gbrain_path/target.org][desc]]
|
||||||
|
when target is a known org slug. Preserves original target if unknown."""
|
||||||
|
full = match.group(0)
|
||||||
|
target = match.group(1)
|
||||||
|
desc = match.group(2) if match.lastindex >= 2 else target
|
||||||
|
|
||||||
|
if target in slug_map:
|
||||||
|
gbrain_path = slug_map[target]
|
||||||
|
return f"[[file:{gbrain_path}.org][{desc}]]"
|
||||||
|
return full
|
||||||
|
|
||||||
|
def convert_body(body_text, slug_map):
|
||||||
|
"""Pre-process org body to inject gbrain path prefixes into cross-references,
|
||||||
|
then convert to markdown via pandoc. Returns (md_body, link_refs) where
|
||||||
|
link_refs is a list of {slug, type} dicts."""
|
||||||
|
link_refs = []
|
||||||
|
|
||||||
|
# Find all [[file:X.org][desc]] cross-references and collect them
|
||||||
|
org_link_re = re.compile(r'\[\[file:([^\]]+?)\.org\]\[([^\]]*?)\]\]')
|
||||||
|
for m in org_link_re.finditer(body_text):
|
||||||
|
target = m.group(1)
|
||||||
|
if target in slug_map:
|
||||||
|
link_refs.append({
|
||||||
|
"slug": slug_map[target],
|
||||||
|
"type": "references",
|
||||||
|
"name": slug_map[target],
|
||||||
|
})
|
||||||
|
|
||||||
|
# Inject directory prefixes into org links so pandoc produces proper paths
|
||||||
|
def replace_link(m):
|
||||||
|
target = m.group(1)
|
||||||
|
desc = m.group(2)
|
||||||
|
if target in slug_map:
|
||||||
|
return f"[[file:{slug_map[target]}.org][{desc}]]"
|
||||||
|
return m.group(0)
|
||||||
|
|
||||||
|
processed_body = org_link_re.sub(replace_link, body_text)
|
||||||
|
|
||||||
|
# Convert to markdown
|
||||||
|
result = subprocess.run(
|
||||||
|
[PANDOC, "-f", "org", "-t", "markdown-smart"],
|
||||||
|
input=processed_body, capture_output=True, text=True
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
print(f" ERROR pandoc: {result.stderr[:200]}")
|
||||||
|
return None, []
|
||||||
|
|
||||||
|
md = result.stdout.strip()
|
||||||
|
|
||||||
|
# Pandoc converts [[file:concepts/foo.org][desc]] to [desc](concepts/foo.org)
|
||||||
|
# Strip .org extensions
|
||||||
|
md = re.sub(r'\(([a-zA-Z0-9_/-]+)\.org\)', r'(\1)', md)
|
||||||
|
|
||||||
|
return md, link_refs
|
||||||
|
|
||||||
|
def build_frontmatter(props, link_refs=None):
|
||||||
|
"""Build YAML frontmatter string from properties and link references."""
|
||||||
|
lines = ['---']
|
||||||
|
if 'title' in props:
|
||||||
|
lines.append(f'title: "{props["title"]}"')
|
||||||
|
if 'tags' in props:
|
||||||
|
tags_str = ', '.join(props['tags'])
|
||||||
|
lines.append(f'tags: [{tags_str}]')
|
||||||
|
if 'created' in props:
|
||||||
|
lines.append(f'created: {props["created"]}')
|
||||||
|
if link_refs:
|
||||||
|
for lr in link_refs:
|
||||||
|
# Deduplicate by slug
|
||||||
|
pass
|
||||||
|
# Deduplicate
|
||||||
|
seen = set()
|
||||||
|
unique_links = []
|
||||||
|
for lr in link_refs:
|
||||||
|
k = lr['slug']
|
||||||
|
if k not in seen:
|
||||||
|
seen.add(k)
|
||||||
|
unique_links.append(lr)
|
||||||
|
if unique_links:
|
||||||
|
lines.append('links:')
|
||||||
|
for lr in unique_links:
|
||||||
|
lines.append(f' - slug: "{lr["slug"]}"')
|
||||||
|
lines.append(f' type: "{lr["type"]}"')
|
||||||
|
lines.append('---')
|
||||||
|
return '\n'.join(lines)
|
||||||
|
|
||||||
|
def add_timeline_entry(md_body, props):
|
||||||
|
"""If the page has a CREATED date, prepend a timeline bullet."""
|
||||||
|
if 'created' in props and 'title' in props:
|
||||||
|
date = props['created']
|
||||||
|
title = props['title']
|
||||||
|
line = f"- **{date}** | Created — {title}\n\n"
|
||||||
|
return line + md_body
|
||||||
|
return md_body
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
# Pre-build slug map for all org files
|
||||||
|
slug_map = build_slug_map()
|
||||||
imported = []
|
imported = []
|
||||||
|
|
||||||
for slug, rel_path, src_path in find_org_files():
|
for slug, rel_path, src_path in find_org_files():
|
||||||
dst_path = gbrain_target(rel_path)
|
dst_path = gbrain_target(rel_path)
|
||||||
|
|
||||||
# Create parent directories
|
|
||||||
os.makedirs(os.path.dirname(dst_path), exist_ok=True)
|
os.makedirs(os.path.dirname(dst_path), exist_ok=True)
|
||||||
|
|
||||||
# Extract frontmatter from org properties
|
# Extract properties and body from org file
|
||||||
props = extract_org_properties(src_path)
|
props, org_body = extract_org_links_and_body(src_path)
|
||||||
|
|
||||||
# Strip org header and convert body to markdown
|
# Convert body to markdown, collecting links along the way
|
||||||
clean = strip_org_header(src_path)
|
md, link_refs = convert_body(org_body, slug_map)
|
||||||
md = pandoc_convert(clean)
|
|
||||||
if md is None:
|
if md is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
md = postprocess_links(md)
|
# Build frontmatter with links
|
||||||
|
frontmatter = build_frontmatter(props, link_refs)
|
||||||
|
|
||||||
|
# Add timeline entry if date exists
|
||||||
|
md = add_timeline_entry(md, props)
|
||||||
|
|
||||||
# Assemble: YAML frontmatter + markdown body
|
|
||||||
frontmatter = build_frontmatter(props)
|
|
||||||
full = frontmatter + '\n\n' + md + '\n'
|
full = frontmatter + '\n\n' + md + '\n'
|
||||||
|
|
||||||
with open(dst_path, 'w') as f:
|
with open(dst_path, 'w') as f:
|
||||||
f.write(full)
|
f.write(full)
|
||||||
|
|
||||||
# Show relative path for clarity
|
|
||||||
rel_dst = os.path.relpath(dst_path, GBRAIN_SRC)
|
rel_dst = os.path.relpath(dst_path, GBRAIN_SRC)
|
||||||
imported.append(rel_dst)
|
imported.append(rel_dst)
|
||||||
print(f" OK {rel_dst}")
|
print(f" OK {rel_dst}")
|
||||||
@@ -219,7 +291,6 @@ def main():
|
|||||||
[BUN, "import", GBRAIN_SRC],
|
[BUN, "import", GBRAIN_SRC],
|
||||||
capture_output=True, text=True, env=env
|
capture_output=True, text=True, env=env
|
||||||
)
|
)
|
||||||
# Show last 20 lines of stdout (skip noise)
|
|
||||||
out_lines = result.stdout.strip().split('\n')
|
out_lines = result.stdout.strip().split('\n')
|
||||||
for line in out_lines[-25:]:
|
for line in out_lines[-25:]:
|
||||||
if line.strip() and 'batch caps' not in line and 'max_batch_tokens' not in line:
|
if line.strip() and 'batch caps' not in line and 'max_batch_tokens' not in line:
|
||||||
@@ -239,5 +310,36 @@ def main():
|
|||||||
if line.strip():
|
if line.strip():
|
||||||
print(f" {line}")
|
print(f" {line}")
|
||||||
|
|
||||||
|
# Extract links from frontmatter (now that pages are imported with links:)
|
||||||
|
print("\nExtracting links from frontmatter...")
|
||||||
|
result3 = subprocess.run(
|
||||||
|
[BUN, "extract", "links", "--source", "db", "--include-frontmatter",
|
||||||
|
"--dir", GBRAIN_SRC],
|
||||||
|
capture_output=True, text=True, env=env
|
||||||
|
)
|
||||||
|
for line in result3.stdout.strip().split('\n')[-10:]:
|
||||||
|
if line.strip():
|
||||||
|
print(f" {line}")
|
||||||
|
|
||||||
|
# Extract timeline from body
|
||||||
|
print("\nExtracting timeline...")
|
||||||
|
result4 = subprocess.run(
|
||||||
|
[BUN, "extract", "timeline", "--source", "db", "--dir", GBRAIN_SRC],
|
||||||
|
capture_output=True, text=True, env=env
|
||||||
|
)
|
||||||
|
for line in result4.stdout.strip().split('\n')[-10:]:
|
||||||
|
if line.strip():
|
||||||
|
print(f" {line}")
|
||||||
|
|
||||||
|
# Stats
|
||||||
|
print("\nBrain stats:")
|
||||||
|
result5 = subprocess.run(
|
||||||
|
[BUN, "stats"],
|
||||||
|
capture_output=True, text=True, env=env
|
||||||
|
)
|
||||||
|
for line in result5.stdout.strip().split('\n')[-15:]:
|
||||||
|
if line.strip():
|
||||||
|
print(f" {line}")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
Reference in New Issue
Block a user