feat: 完善论文排版脚本

重构 format_thesis_docx.py 增强排版功能；新增 markdown_to_docx.py 转换工具；移除旧版排版文档 Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-02-28 13:20:04 +08:00
parent 4dd3fac720
commit 38741f80dd
3 changed files with 283 additions and 42 deletions
--- a/example/markdown_to_docx.py
+++ b/example/markdown_to_docx.py
@@ -0,0 +1,97 @@
+from __future__ import annotations
+
+from pathlib import Path
+import re
+from docx import Document
+
+
+INPUT_MD = Path("/Users/apple/code/bs/mying/example/萌贝母婴商城毕业论文初稿-2026版.md")
+OUTPUT_DOCX = Path("/Users/apple/code/bs/mying/example/萌贝母婴商城毕业论文初稿-2026版.docx")
+
+
+def is_table_separator(line: str) -> bool:
+    stripped = line.strip()
+    if not stripped.startswith("|"):
+        return False
+    core = stripped.strip("|").replace(" ", "")
+    return bool(core) and all(ch in "-:|" for ch in core)
+
+
+def split_table_row(line: str) -> list[str]:
+    raw = line.strip().strip("|")
+    return [cell.strip() for cell in raw.split("|")]
+
+
+def convert_markdown_to_docx(md_text: str, doc: Document) -> None:
+    lines = md_text.splitlines()
+    i = 0
+
+    while i < len(lines):
+        line = lines[i]
+        stripped = line.strip()
+
+        if not stripped:
+            doc.add_paragraph("")
+            i += 1
+            continue
+
+        # Table block
+        if stripped.startswith("|") and i + 1 < len(lines) and is_table_separator(lines[i + 1]):
+            headers = split_table_row(lines[i])
+            i += 2
+            rows: list[list[str]] = []
+            while i < len(lines):
+                row_line = lines[i].strip()
+                if not row_line.startswith("|"):
+                    break
+                rows.append(split_table_row(lines[i]))
+                i += 1
+
+            cols = max(1, len(headers))
+            table = doc.add_table(rows=1, cols=cols)
+            for c in range(cols):
+                table.cell(0, c).text = headers[c] if c < len(headers) else ""
+
+            for row in rows:
+                cells = table.add_row().cells
+                for c in range(cols):
+                    cells[c].text = row[c] if c < len(row) else ""
+            continue
+
+        # Heading
+        heading_match = re.match(r"^(#{1,6})\s+(.*)$", stripped)
+        if heading_match:
+            level = min(4, len(heading_match.group(1)))
+            text = heading_match.group(2).strip()
+            doc.add_heading(text, level=level)
+            i += 1
+            continue
+
+        # Ordered list
+        if re.match(r"^\d+\.\s+", stripped):
+            text = re.sub(r"^\d+\.\s+", "", stripped)
+            doc.add_paragraph(text, style="List Number")
+            i += 1
+            continue
+
+        # Unordered list
+        if stripped.startswith("- "):
+            doc.add_paragraph(stripped[2:].strip(), style="List Bullet")
+            i += 1
+            continue
+
+        # Plain paragraph
+        doc.add_paragraph(stripped)
+        i += 1
+
+
+def main() -> None:
+    md_text = INPUT_MD.read_text(encoding="utf-8")
+    doc = Document()
+    convert_markdown_to_docx(md_text, doc)
+    doc.save(OUTPUT_DOCX)
+    print(OUTPUT_DOCX)
+
+
+if __name__ == "__main__":
+    main()