New mdsplit version.
This commit is contained in:
parent
17c72ba06e
commit
a9f92aa16a
1 changed files with 146 additions and 53 deletions
197
prog/mdsplit.py
Normal file → Executable file
197
prog/mdsplit.py
Normal file → Executable file
|
@ -7,7 +7,9 @@
|
||||||
# ///
|
# ///
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import re
|
from collections import defaultdict
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from enum import Enum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from slugify import slugify
|
from slugify import slugify
|
||||||
|
@ -18,6 +20,15 @@ parser = argparse.ArgumentParser(
|
||||||
|
|
||||||
parser.add_argument("mdfile", help="The org file", type=Path)
|
parser.add_argument("mdfile", help="The org file", type=Path)
|
||||||
parser.add_argument("mdbook", help="mdbook root diretory", type=Path)
|
parser.add_argument("mdbook", help="mdbook root diretory", type=Path)
|
||||||
|
parser.add_argument(
|
||||||
|
"-d",
|
||||||
|
"--max-depth",
|
||||||
|
help="Max depth for headings",
|
||||||
|
type=int,
|
||||||
|
default=1,
|
||||||
|
dest="depth",
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if not args.mdfile.is_file():
|
if not args.mdfile.is_file():
|
||||||
|
@ -28,70 +39,152 @@ if not args.mdbook.is_dir():
|
||||||
"`mdbook` must be a root mdbook directory initialiezd with `mdbook init`"
|
"`mdbook` must be a root mdbook directory initialiezd with `mdbook init`"
|
||||||
)
|
)
|
||||||
|
|
||||||
with open(args.mdfile) as f:
|
if args.depth < 1:
|
||||||
data = f.read()
|
raise ValueError("`depth` must be >= 1")
|
||||||
|
|
||||||
data = data.split("```")
|
|
||||||
for i, d in enumerate(data[:]):
|
|
||||||
if i % 2 == 0:
|
|
||||||
continue
|
|
||||||
data[i] = "```" + re.sub(r"^(#+) ", r"\1", data[i], flags=re.MULTILINE) + "```"
|
|
||||||
|
|
||||||
data = "".join(data).splitlines(keepends=True)
|
@dataclass
|
||||||
|
class ExtraTitle:
|
||||||
|
level: int
|
||||||
|
title: str
|
||||||
|
|
||||||
output = Path(parser.mdbook) / "src"
|
|
||||||
|
|
||||||
splitn = [idx for (idx, d) in enumerate(data) if d.startswith("# ")]
|
class DType(Enum):
|
||||||
splitn = list(zip(splitn[:], splitn[1:] + [None]))
|
CODE = 1
|
||||||
|
BODY = 2
|
||||||
|
TITLE = 3
|
||||||
|
|
||||||
summaries = []
|
|
||||||
for idx, (start, end) in enumerate(splitn[:], start=1):
|
|
||||||
d = data[start:end]
|
|
||||||
title = d[0][2:].rstrip()
|
|
||||||
num = f"{idx:02d}"
|
|
||||||
basename = f"{num}.{slugify(title)}.md"
|
|
||||||
summary = {"title": title, "basename": basename, "subs": []}
|
|
||||||
|
|
||||||
subcontent = d[1:]
|
@dataclass
|
||||||
|
class Content:
|
||||||
|
content: str
|
||||||
|
dtype: DType
|
||||||
|
extra: None | ExtraTitle = None
|
||||||
|
|
||||||
name = d[0]
|
|
||||||
|
|
||||||
splitn = [idx for (idx, d) in enumerate(subcontent) if d.startswith("## ")]
|
@dataclass
|
||||||
|
class Config:
|
||||||
|
inside_code: bool
|
||||||
|
|
||||||
with open(output / basename, "w") as f:
|
|
||||||
print(f"# {title}", file=f)
|
|
||||||
|
|
||||||
if splitn:
|
OUTPUT_DIR: Path = Path("book/src")
|
||||||
d = "".join(subcontent[: splitn[0]])
|
|
||||||
print(d, file=f)
|
|
||||||
else:
|
|
||||||
print("".join(subcontent), file=f)
|
|
||||||
|
|
||||||
splitn = list(zip(splitn[:], splitn[1:] + [None]))
|
MAX_LEVEL: int = args.depth
|
||||||
|
|
||||||
for jdx, (start, end) in enumerate(splitn[:], start=1):
|
with open(args.mdfile, "r") as f:
|
||||||
d = subcontent[start:end]
|
print(args.mdfile)
|
||||||
title = d[0][2:].rstrip()
|
mdlines: list[str] = f.readlines()
|
||||||
basename = f"{idx:02d}.{jdx:02d}.{slugify(title)}.md"
|
|
||||||
summary["subs"].append(
|
|
||||||
{
|
parsed_lines: list[Content] = []
|
||||||
"title": title,
|
|
||||||
"basename": basename,
|
config: Config = Config(inside_code=False)
|
||||||
}
|
|
||||||
|
|
||||||
|
def if_parse_begin_code(
|
||||||
|
line: str,
|
||||||
|
config: Config,
|
||||||
|
parsed_lines: list[Content],
|
||||||
|
):
|
||||||
|
if line.startswith("```"):
|
||||||
|
config.inside_code = True
|
||||||
|
parsed_lines.append(Content(content=line, dtype=DType.CODE))
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def if_parse_title(line: str, config: dict[str, bool], parsed_lines: list[Content]):
|
||||||
|
if line.startswith("#"):
|
||||||
|
title = " ".join(line.split(" ")[1:])
|
||||||
|
level = len(line.split(" ")[0])
|
||||||
|
parsed_lines.append(
|
||||||
|
Content(
|
||||||
|
content=line,
|
||||||
|
dtype=DType.TITLE,
|
||||||
|
extra=ExtraTitle(level=level, title=title),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def parse_code(line, config, parse_lines):
|
||||||
|
parsed_lines.append(Content(content=line, dtype=DType.CODE))
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def parse_content(line, config, parse_lines):
|
||||||
|
parsed_lines.append(Content(content=line, dtype=DType.BODY))
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def if_parse_end_code(line, config, parse_lines):
|
||||||
|
if line.startswith("```"):
|
||||||
|
parsed_lines.append(Content(content=line, dtype=DType.CODE))
|
||||||
|
config.inside_code = False
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def do_parse_nocode(line, config, parse_lines):
|
||||||
|
return any(
|
||||||
|
f(line, config, parse_lines)
|
||||||
|
for f in (
|
||||||
|
if_parse_begin_code,
|
||||||
|
if_parse_title,
|
||||||
|
parse_content,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
with open(output / basename, "w") as f:
|
|
||||||
print("".join(d), file=f)
|
|
||||||
|
|
||||||
summaries.append(summary)
|
for line in mdlines:
|
||||||
|
if not config.inside_code:
|
||||||
|
do_parse_nocode(line, config, parsed_lines)
|
||||||
|
else:
|
||||||
|
any(f(line, config, parsed_lines) for f in (if_parse_end_code, parse_code))
|
||||||
|
|
||||||
with open(output / "SUMMARY.md", "w") as f:
|
|
||||||
print("# SUMMARY", file=f)
|
filepath_suffix = "_prelude.md"
|
||||||
for item in summaries:
|
filepath = OUTPUT_DIR / ("0" + filepath_suffix)
|
||||||
title = item["title"]
|
|
||||||
basename = item["basename"]
|
num_titles = defaultdict(int)
|
||||||
print(f"- [{title}](./{basename})", file=f)
|
summaries = []
|
||||||
for sub in item["subs"]:
|
|
||||||
title = sub["title"]
|
for parsed_line in parsed_lines:
|
||||||
basename = sub["basename"]
|
if parsed_line.dtype == DType.TITLE and parsed_line.extra.level <= MAX_LEVEL:
|
||||||
print(f" - [{title}](./{basename})", file=f)
|
level = parsed_line.extra.level
|
||||||
|
|
||||||
|
num_titles[level] += 1
|
||||||
|
|
||||||
|
num_title = num_titles[level]
|
||||||
|
title = parsed_line.extra.title.strip()
|
||||||
|
slug_title = slugify(title)
|
||||||
|
filepath_suffix = f"_{slug_title}.md"
|
||||||
|
|
||||||
|
# reset key of num_titles if key > level
|
||||||
|
keys = {k for k in num_titles.keys() if k > level}
|
||||||
|
for key in keys:
|
||||||
|
del num_titles[key]
|
||||||
|
|
||||||
|
filepath_prefix = ".".join(
|
||||||
|
f"{i:02d}" for _, i in sorted(num_titles.items(), key=lambda x: x[0])
|
||||||
|
)
|
||||||
|
|
||||||
|
basename = filepath_prefix + filepath_suffix
|
||||||
|
filepath = OUTPUT_DIR / basename
|
||||||
|
|
||||||
|
summaries.append((level, title, basename))
|
||||||
|
|
||||||
|
with open(filepath, "a") as f:
|
||||||
|
f.write(parsed_line.content)
|
||||||
|
|
||||||
|
else:
|
||||||
|
with open(filepath, "a") as f:
|
||||||
|
f.write(parsed_line.content)
|
||||||
|
|
||||||
|
with open(OUTPUT_DIR / "SUMMARY.md", "w") as f:
|
||||||
|
for level, title, basename in summaries:
|
||||||
|
print(level * " ", f"- [{title}](./{basename})", file=f)
|
||||||
|
|
Loading…
Reference in a new issue