重构 format_thesis_docx.py 增强排版功能;新增 markdown_to_docx.py 转换工具;移除旧版排版文档 Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
98 lines
2.8 KiB
Python
98 lines
2.8 KiB
Python
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
import re
|
|
from docx import Document
|
|
|
|
|
|
INPUT_MD = Path("/Users/apple/code/bs/mying/example/萌贝母婴商城毕业论文初稿-2026版.md")
|
|
OUTPUT_DOCX = Path("/Users/apple/code/bs/mying/example/萌贝母婴商城毕业论文初稿-2026版.docx")
|
|
|
|
|
|
def is_table_separator(line: str) -> bool:
|
|
stripped = line.strip()
|
|
if not stripped.startswith("|"):
|
|
return False
|
|
core = stripped.strip("|").replace(" ", "")
|
|
return bool(core) and all(ch in "-:|" for ch in core)
|
|
|
|
|
|
def split_table_row(line: str) -> list[str]:
|
|
raw = line.strip().strip("|")
|
|
return [cell.strip() for cell in raw.split("|")]
|
|
|
|
|
|
def convert_markdown_to_docx(md_text: str, doc: Document) -> None:
|
|
lines = md_text.splitlines()
|
|
i = 0
|
|
|
|
while i < len(lines):
|
|
line = lines[i]
|
|
stripped = line.strip()
|
|
|
|
if not stripped:
|
|
doc.add_paragraph("")
|
|
i += 1
|
|
continue
|
|
|
|
# Table block
|
|
if stripped.startswith("|") and i + 1 < len(lines) and is_table_separator(lines[i + 1]):
|
|
headers = split_table_row(lines[i])
|
|
i += 2
|
|
rows: list[list[str]] = []
|
|
while i < len(lines):
|
|
row_line = lines[i].strip()
|
|
if not row_line.startswith("|"):
|
|
break
|
|
rows.append(split_table_row(lines[i]))
|
|
i += 1
|
|
|
|
cols = max(1, len(headers))
|
|
table = doc.add_table(rows=1, cols=cols)
|
|
for c in range(cols):
|
|
table.cell(0, c).text = headers[c] if c < len(headers) else ""
|
|
|
|
for row in rows:
|
|
cells = table.add_row().cells
|
|
for c in range(cols):
|
|
cells[c].text = row[c] if c < len(row) else ""
|
|
continue
|
|
|
|
# Heading
|
|
heading_match = re.match(r"^(#{1,6})\s+(.*)$", stripped)
|
|
if heading_match:
|
|
level = min(4, len(heading_match.group(1)))
|
|
text = heading_match.group(2).strip()
|
|
doc.add_heading(text, level=level)
|
|
i += 1
|
|
continue
|
|
|
|
# Ordered list
|
|
if re.match(r"^\d+\.\s+", stripped):
|
|
text = re.sub(r"^\d+\.\s+", "", stripped)
|
|
doc.add_paragraph(text, style="List Number")
|
|
i += 1
|
|
continue
|
|
|
|
# Unordered list
|
|
if stripped.startswith("- "):
|
|
doc.add_paragraph(stripped[2:].strip(), style="List Bullet")
|
|
i += 1
|
|
continue
|
|
|
|
# Plain paragraph
|
|
doc.add_paragraph(stripped)
|
|
i += 1
|
|
|
|
|
|
def main() -> None:
|
|
md_text = INPUT_MD.read_text(encoding="utf-8")
|
|
doc = Document()
|
|
convert_markdown_to_docx(md_text, doc)
|
|
doc.save(OUTPUT_DOCX)
|
|
print(OUTPUT_DOCX)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|