Files
cuimengxue/example/markdown_to_docx.py
王子琦 38741f80dd feat: 完善论文排版脚本
重构 format_thesis_docx.py 增强排版功能;新增 markdown_to_docx.py 转换工具;移除旧版排版文档

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-02-28 13:20:04 +08:00

98 lines
2.8 KiB
Python

from __future__ import annotations
from pathlib import Path
import re
from docx import Document
INPUT_MD = Path("/Users/apple/code/bs/mying/example/萌贝母婴商城毕业论文初稿-2026版.md")
OUTPUT_DOCX = Path("/Users/apple/code/bs/mying/example/萌贝母婴商城毕业论文初稿-2026版.docx")
def is_table_separator(line: str) -> bool:
stripped = line.strip()
if not stripped.startswith("|"):
return False
core = stripped.strip("|").replace(" ", "")
return bool(core) and all(ch in "-:|" for ch in core)
def split_table_row(line: str) -> list[str]:
raw = line.strip().strip("|")
return [cell.strip() for cell in raw.split("|")]
def convert_markdown_to_docx(md_text: str, doc: Document) -> None:
lines = md_text.splitlines()
i = 0
while i < len(lines):
line = lines[i]
stripped = line.strip()
if not stripped:
doc.add_paragraph("")
i += 1
continue
# Table block
if stripped.startswith("|") and i + 1 < len(lines) and is_table_separator(lines[i + 1]):
headers = split_table_row(lines[i])
i += 2
rows: list[list[str]] = []
while i < len(lines):
row_line = lines[i].strip()
if not row_line.startswith("|"):
break
rows.append(split_table_row(lines[i]))
i += 1
cols = max(1, len(headers))
table = doc.add_table(rows=1, cols=cols)
for c in range(cols):
table.cell(0, c).text = headers[c] if c < len(headers) else ""
for row in rows:
cells = table.add_row().cells
for c in range(cols):
cells[c].text = row[c] if c < len(row) else ""
continue
# Heading
heading_match = re.match(r"^(#{1,6})\s+(.*)$", stripped)
if heading_match:
level = min(4, len(heading_match.group(1)))
text = heading_match.group(2).strip()
doc.add_heading(text, level=level)
i += 1
continue
# Ordered list
if re.match(r"^\d+\.\s+", stripped):
text = re.sub(r"^\d+\.\s+", "", stripped)
doc.add_paragraph(text, style="List Number")
i += 1
continue
# Unordered list
if stripped.startswith("- "):
doc.add_paragraph(stripped[2:].strip(), style="List Bullet")
i += 1
continue
# Plain paragraph
doc.add_paragraph(stripped)
i += 1
def main() -> None:
md_text = INPUT_MD.read_text(encoding="utf-8")
doc = Document()
convert_markdown_to_docx(md_text, doc)
doc.save(OUTPUT_DOCX)
print(OUTPUT_DOCX)
if __name__ == "__main__":
main()