akryldigital commited on
Commit
80e3b05
Β·
verified Β·
1 Parent(s): fc8726c

added reproducibility for the diagrams

Browse files
Files changed (1) hide show
  1. tools/render_mermaid.py +235 -0
tools/render_mermaid.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Render every Mermaid block in docs/ to a PNG, embed via <img>.
2
+
3
+ Why we need this:
4
+ HF Space's markdown viewer does NOT render Mermaid (unlike GitHub),
5
+ so the architecture diagrams in docs/ show as raw code blocks for
6
+ the client. This script renders each Mermaid block to a static PNG
7
+ and replaces the block in the markdown with an <img> tag β€” the
8
+ diagrams now render correctly everywhere (HF Space, PDF, Word,
9
+ GitHub, etc.).
10
+
11
+ The original Mermaid source is preserved verbatim inside an HTML
12
+ comment immediately above the <img>, so future edits can be made
13
+ by editing the source and re-running this script.
14
+
15
+ Idempotency:
16
+ Re-running is safe. The script detects already-rendered blocks
17
+ (those wrapped in our marker comments) and re-renders them in
18
+ place rather than producing duplicates.
19
+
20
+ Usage:
21
+ python tools/render_mermaid.py [--only PATH] [--theme default]
22
+
23
+ --only PATH Process a single .md file (use during testing).
24
+ Default: walk all docs/**/*.md.
25
+ --theme NAME Mermaid theme: default | neutral | dark | forest.
26
+ Default: default (light, clean).
27
+ --width PX Render width. Default: 2000 (high-DPI quality).
28
+
29
+ Requires:
30
+ tools/.mermaid/node_modules/.bin/mmdc (run `npm install` in
31
+ tools/.mermaid first; package.json is checked into repo).
32
+ """
33
+ from __future__ import annotations
34
+
35
+ import argparse
36
+ import os
37
+ import re
38
+ import subprocess
39
+ import sys
40
+ from pathlib import Path
41
+ from typing import List, Tuple
42
+
43
+ ROOT = Path(__file__).resolve().parent.parent
44
+ DOCS_DIR = ROOT / "docs"
45
+ ASSETS_DIR = DOCS_DIR / "assets" / "diagrams"
46
+ MMDC_BIN = ROOT / "tools" / ".mermaid" / "node_modules" / ".bin" / "mmdc"
47
+
48
+ # Markers used to wrap a rendered block so we can find and re-render
49
+ # idempotently on subsequent runs. The HTML comment stores the original
50
+ # Mermaid source verbatim so anyone can edit + re-render.
51
+ BEGIN_MARK = "<!-- MERMAID-RENDERED:BEGIN"
52
+ END_MARK = "MERMAID-RENDERED:END -->"
53
+
54
+ # Match an un-rendered ```mermaid ... ``` block.
55
+ MERMAID_BLOCK = re.compile(
56
+ r"^```mermaid\s*\n(?P<body>.*?)\n```$",
57
+ re.DOTALL | re.MULTILINE,
58
+ )
59
+
60
+ # Match a previously-rendered block (so we can re-render in place).
61
+ RENDERED_BLOCK = re.compile(
62
+ re.escape(BEGIN_MARK) + r"\s*\n(?P<body>.*?)\n"
63
+ + re.escape(END_MARK) + r"\s*\n!\[[^\]]*\]\([^)]+\)",
64
+ re.DOTALL,
65
+ )
66
+
67
+
68
+ def md_path_to_slug(md_path: Path) -> str:
69
+ """Stable slug for filenames. e.g. docs/architecture/02-context-and-scope.md
70
+ -> architecture__02-context-and-scope.
71
+ """
72
+ rel = md_path.relative_to(DOCS_DIR).with_suffix("")
73
+ return str(rel).replace("/", "__")
74
+
75
+
76
+ def render_block(mermaid_src: str, png_out: Path, theme: str, width: int) -> Tuple[bool, str]:
77
+ """Render one Mermaid source string to PNG via mmdc.
78
+
79
+ Returns (ok, message). Writes to png_out on success.
80
+ """
81
+ if not MMDC_BIN.exists():
82
+ return False, f"mmdc not installed at {MMDC_BIN}. Run 'cd tools/.mermaid && npm install'."
83
+
84
+ # mmdc reads from a file; write source to a tmp .mmd file.
85
+ tmp_mmd = png_out.with_suffix(".mmd")
86
+ tmp_mmd.parent.mkdir(parents=True, exist_ok=True)
87
+ tmp_mmd.write_text(mermaid_src.strip() + "\n", encoding="utf-8")
88
+
89
+ # Puppeteer config for headless rendering. Use a large viewport so
90
+ # text stays legible after the PNG is shown at typical doc width.
91
+ cmd = [
92
+ str(MMDC_BIN),
93
+ "-i", str(tmp_mmd),
94
+ "-o", str(png_out),
95
+ "-t", theme,
96
+ "-b", "white",
97
+ "--width", str(width),
98
+ # Note: we deliberately let mmdc auto-size height to preserve
99
+ # the diagram's natural aspect ratio (no clipping).
100
+ ]
101
+ try:
102
+ result = subprocess.run(
103
+ cmd, capture_output=True, text=True, timeout=120,
104
+ )
105
+ except subprocess.TimeoutExpired:
106
+ tmp_mmd.unlink(missing_ok=True)
107
+ return False, "mmdc timed out after 120s"
108
+ finally:
109
+ # Keep the .mmd around so 'git status' shows what was rendered;
110
+ # add it to .gitignore separately if you don't want it tracked.
111
+ pass
112
+
113
+ if result.returncode != 0:
114
+ return False, f"mmdc failed (exit {result.returncode}):\nSTDERR: {result.stderr[:500]}"
115
+ if not png_out.exists():
116
+ return False, f"mmdc reported success but no PNG produced at {png_out}"
117
+
118
+ # Clean up the .mmd helper file once render is verified.
119
+ tmp_mmd.unlink(missing_ok=True)
120
+ return True, f"rendered β†’ {png_out.relative_to(ROOT)}"
121
+
122
+
123
+ def build_replacement(mermaid_src: str, img_relpath: str) -> str:
124
+ """Produce the markdown that replaces a mermaid code block.
125
+
126
+ Layout (visible in both GitHub and HF Space markdown renderers):
127
+ <!-- MERMAID-RENDERED:BEGIN
128
+ <verbatim source>
129
+ MERMAID-RENDERED:END -->
130
+ ![Diagram](relative/path/to.png)
131
+ """
132
+ return (
133
+ f"{BEGIN_MARK}\n"
134
+ f"{mermaid_src.strip()}\n"
135
+ f"{END_MARK}\n"
136
+ f"![Diagram]({img_relpath})"
137
+ )
138
+
139
+
140
+ def process_md_file(md_path: Path, theme: str, width: int) -> Tuple[int, int, List[str]]:
141
+ """Render every Mermaid block in one .md file and update it in place.
142
+
143
+ Returns (rendered_count, total_blocks, log_lines).
144
+ """
145
+ text = md_path.read_text(encoding="utf-8")
146
+ original = text
147
+ slug = md_path_to_slug(md_path)
148
+ logs: List[str] = []
149
+
150
+ # 1) Already-rendered blocks: re-render them in-place (idempotent).
151
+ def _replace_rendered(match: re.Match) -> str:
152
+ idx = _replace_rendered.idx # type: ignore[attr-defined]
153
+ _replace_rendered.idx += 1 # type: ignore[attr-defined]
154
+ body = match.group("body").strip()
155
+ png_name = f"{slug}__{idx:02d}.png"
156
+ png_out = ASSETS_DIR / png_name
157
+ ok, msg = render_block(body, png_out, theme, width)
158
+ logs.append(f" [{idx:02d}] re-render: {msg}")
159
+ if not ok:
160
+ return match.group(0) # leave untouched on failure
161
+ rel = os.path.relpath(png_out, start=md_path.parent)
162
+ return build_replacement(body, rel)
163
+
164
+ _replace_rendered.idx = 1 # type: ignore[attr-defined]
165
+ text = RENDERED_BLOCK.sub(_replace_rendered, text)
166
+
167
+ # 2) Un-rendered ```mermaid blocks: render fresh, starting after the
168
+ # already-rendered count so indices remain stable per file.
169
+ def _replace_mermaid(match: re.Match) -> str:
170
+ idx = _replace_mermaid.idx # type: ignore[attr-defined]
171
+ _replace_mermaid.idx += 1 # type: ignore[attr-defined]
172
+ body = match.group("body").strip()
173
+ png_name = f"{slug}__{idx:02d}.png"
174
+ png_out = ASSETS_DIR / png_name
175
+ ok, msg = render_block(body, png_out, theme, width)
176
+ logs.append(f" [{idx:02d}] fresh render: {msg}")
177
+ if not ok:
178
+ return match.group(0) # leave untouched on failure
179
+ rel = os.path.relpath(png_out, start=md_path.parent)
180
+ return build_replacement(body, rel)
181
+
182
+ _replace_mermaid.idx = _replace_rendered.idx # type: ignore[attr-defined]
183
+ text = MERMAID_BLOCK.sub(_replace_mermaid, text)
184
+
185
+ total = _replace_mermaid.idx - 1 # type: ignore[attr-defined]
186
+ if text != original:
187
+ md_path.write_text(text, encoding="utf-8")
188
+ rendered = sum(1 for line in logs if "rendered β†’" in line or "β†’" in line)
189
+ return rendered, total, logs
190
+ return 0, total, logs
191
+
192
+
193
+ def main() -> int:
194
+ parser = argparse.ArgumentParser(description=__doc__)
195
+ parser.add_argument("--only", type=str, default=None,
196
+ help="Process only this .md file (path relative to repo root).")
197
+ parser.add_argument("--theme", type=str, default="default",
198
+ choices=["default", "neutral", "dark", "forest"])
199
+ parser.add_argument("--width", type=int, default=2000,
200
+ help="PNG render width in pixels (default: 2000).")
201
+ args = parser.parse_args()
202
+
203
+ if not MMDC_BIN.exists():
204
+ print(f"❌ mmdc not found at {MMDC_BIN}")
205
+ print(" Run: cd tools/.mermaid && npm install")
206
+ return 1
207
+
208
+ ASSETS_DIR.mkdir(parents=True, exist_ok=True)
209
+
210
+ if args.only:
211
+ candidates = [ROOT / args.only]
212
+ if not candidates[0].exists():
213
+ print(f"❌ file not found: {candidates[0]}")
214
+ return 1
215
+ else:
216
+ candidates = sorted(DOCS_DIR.rglob("*.md"))
217
+
218
+ total_rendered = 0
219
+ total_blocks = 0
220
+ for md_path in candidates:
221
+ rendered, blocks, logs = process_md_file(md_path, args.theme, args.width)
222
+ if blocks > 0:
223
+ print(f"πŸ“„ {md_path.relative_to(ROOT)} ({blocks} block{'s' if blocks != 1 else ''})")
224
+ for line in logs:
225
+ print(line)
226
+ total_rendered += rendered
227
+ total_blocks += blocks
228
+
229
+ print(f"\nβœ… Done: {total_rendered}/{total_blocks} diagrams rendered "
230
+ f"into {ASSETS_DIR.relative_to(ROOT)}/")
231
+ return 0
232
+
233
+
234
+ if __name__ == "__main__":
235
+ sys.exit(main())