New mdsplit version.

This commit is contained in:
Sekun 2024-10-24 20:58:06 +02:00
parent 17c72ba06e
commit a9f92aa16a

197
prog/mdsplit.py Normal file → Executable file
View file

@ -7,7 +7,9 @@
# /// # ///
import argparse import argparse
import re from collections import defaultdict
from dataclasses import dataclass
from enum import Enum
from pathlib import Path from pathlib import Path
from slugify import slugify from slugify import slugify
@ -18,6 +20,15 @@ parser = argparse.ArgumentParser(
parser.add_argument("mdfile", help="The org file", type=Path) parser.add_argument("mdfile", help="The org file", type=Path)
parser.add_argument("mdbook", help="mdbook root diretory", type=Path) parser.add_argument("mdbook", help="mdbook root diretory", type=Path)
parser.add_argument(
"-d",
"--max-depth",
help="Max depth for headings",
type=int,
default=1,
dest="depth",
)
args = parser.parse_args() args = parser.parse_args()
if not args.mdfile.is_file(): if not args.mdfile.is_file():
@ -28,70 +39,152 @@ if not args.mdbook.is_dir():
"`mdbook` must be a root mdbook directory initialiezd with `mdbook init`" "`mdbook` must be a root mdbook directory initialiezd with `mdbook init`"
) )
with open(args.mdfile) as f: if args.depth < 1:
data = f.read() raise ValueError("`depth` must be >= 1")
data = data.split("```")
for i, d in enumerate(data[:]):
if i % 2 == 0:
continue
data[i] = "```" + re.sub(r"^(#+) ", r"\1", data[i], flags=re.MULTILINE) + "```"
data = "".join(data).splitlines(keepends=True) @dataclass
class ExtraTitle:
level: int
title: str
output = Path(parser.mdbook) / "src"
splitn = [idx for (idx, d) in enumerate(data) if d.startswith("# ")] class DType(Enum):
splitn = list(zip(splitn[:], splitn[1:] + [None])) CODE = 1
BODY = 2
TITLE = 3
summaries = []
for idx, (start, end) in enumerate(splitn[:], start=1):
d = data[start:end]
title = d[0][2:].rstrip()
num = f"{idx:02d}"
basename = f"{num}.{slugify(title)}.md"
summary = {"title": title, "basename": basename, "subs": []}
subcontent = d[1:] @dataclass
class Content:
content: str
dtype: DType
extra: None | ExtraTitle = None
name = d[0]
splitn = [idx for (idx, d) in enumerate(subcontent) if d.startswith("## ")] @dataclass
class Config:
inside_code: bool
with open(output / basename, "w") as f:
print(f"# {title}", file=f)
if splitn: OUTPUT_DIR: Path = Path("book/src")
d = "".join(subcontent[: splitn[0]])
print(d, file=f)
else:
print("".join(subcontent), file=f)
splitn = list(zip(splitn[:], splitn[1:] + [None])) MAX_LEVEL: int = args.depth
for jdx, (start, end) in enumerate(splitn[:], start=1): with open(args.mdfile, "r") as f:
d = subcontent[start:end] print(args.mdfile)
title = d[0][2:].rstrip() mdlines: list[str] = f.readlines()
basename = f"{idx:02d}.{jdx:02d}.{slugify(title)}.md"
summary["subs"].append(
{ parsed_lines: list[Content] = []
"title": title,
"basename": basename, config: Config = Config(inside_code=False)
}
def if_parse_begin_code(
line: str,
config: Config,
parsed_lines: list[Content],
):
if line.startswith("```"):
config.inside_code = True
parsed_lines.append(Content(content=line, dtype=DType.CODE))
return True
return False
def if_parse_title(line: str, config: dict[str, bool], parsed_lines: list[Content]):
if line.startswith("#"):
title = " ".join(line.split(" ")[1:])
level = len(line.split(" ")[0])
parsed_lines.append(
Content(
content=line,
dtype=DType.TITLE,
extra=ExtraTitle(level=level, title=title),
)
)
return True
return False
def parse_code(line, config, parse_lines):
parsed_lines.append(Content(content=line, dtype=DType.CODE))
return True
def parse_content(line, config, parse_lines):
parsed_lines.append(Content(content=line, dtype=DType.BODY))
return True
def if_parse_end_code(line, config, parse_lines):
if line.startswith("```"):
parsed_lines.append(Content(content=line, dtype=DType.CODE))
config.inside_code = False
return True
return False
def do_parse_nocode(line, config, parse_lines):
return any(
f(line, config, parse_lines)
for f in (
if_parse_begin_code,
if_parse_title,
parse_content,
)
) )
with open(output / basename, "w") as f:
print("".join(d), file=f)
summaries.append(summary) for line in mdlines:
if not config.inside_code:
do_parse_nocode(line, config, parsed_lines)
else:
any(f(line, config, parsed_lines) for f in (if_parse_end_code, parse_code))
with open(output / "SUMMARY.md", "w") as f:
print("# SUMMARY", file=f) filepath_suffix = "_prelude.md"
for item in summaries: filepath = OUTPUT_DIR / ("0" + filepath_suffix)
title = item["title"]
basename = item["basename"] num_titles = defaultdict(int)
print(f"- [{title}](./{basename})", file=f) summaries = []
for sub in item["subs"]:
title = sub["title"] for parsed_line in parsed_lines:
basename = sub["basename"] if parsed_line.dtype == DType.TITLE and parsed_line.extra.level <= MAX_LEVEL:
print(f" - [{title}](./{basename})", file=f) level = parsed_line.extra.level
num_titles[level] += 1
num_title = num_titles[level]
title = parsed_line.extra.title.strip()
slug_title = slugify(title)
filepath_suffix = f"_{slug_title}.md"
# reset key of num_titles if key > level
keys = {k for k in num_titles.keys() if k > level}
for key in keys:
del num_titles[key]
filepath_prefix = ".".join(
f"{i:02d}" for _, i in sorted(num_titles.items(), key=lambda x: x[0])
)
basename = filepath_prefix + filepath_suffix
filepath = OUTPUT_DIR / basename
summaries.append((level, title, basename))
with open(filepath, "a") as f:
f.write(parsed_line.content)
else:
with open(filepath, "a") as f:
f.write(parsed_line.content)
with open(OUTPUT_DIR / "SUMMARY.md", "w") as f:
for level, title, basename in summaries:
print(level * " ", f"- [{title}](./{basename})", file=f)