feat: 完善论文排版脚本
重构 format_thesis_docx.py 增强排版功能;新增 markdown_to_docx.py 转换工具;移除旧版排版文档 Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
97
example/markdown_to_docx.py
Normal file
97
example/markdown_to_docx.py
Normal file
@@ -0,0 +1,97 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
import re
|
||||
from docx import Document
|
||||
|
||||
|
||||
INPUT_MD = Path("/Users/apple/code/bs/mying/example/萌贝母婴商城毕业论文初稿-2026版.md")
|
||||
OUTPUT_DOCX = Path("/Users/apple/code/bs/mying/example/萌贝母婴商城毕业论文初稿-2026版.docx")
|
||||
|
||||
|
||||
def is_table_separator(line: str) -> bool:
|
||||
stripped = line.strip()
|
||||
if not stripped.startswith("|"):
|
||||
return False
|
||||
core = stripped.strip("|").replace(" ", "")
|
||||
return bool(core) and all(ch in "-:|" for ch in core)
|
||||
|
||||
|
||||
def split_table_row(line: str) -> list[str]:
|
||||
raw = line.strip().strip("|")
|
||||
return [cell.strip() for cell in raw.split("|")]
|
||||
|
||||
|
||||
def convert_markdown_to_docx(md_text: str, doc: Document) -> None:
|
||||
lines = md_text.splitlines()
|
||||
i = 0
|
||||
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
stripped = line.strip()
|
||||
|
||||
if not stripped:
|
||||
doc.add_paragraph("")
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Table block
|
||||
if stripped.startswith("|") and i + 1 < len(lines) and is_table_separator(lines[i + 1]):
|
||||
headers = split_table_row(lines[i])
|
||||
i += 2
|
||||
rows: list[list[str]] = []
|
||||
while i < len(lines):
|
||||
row_line = lines[i].strip()
|
||||
if not row_line.startswith("|"):
|
||||
break
|
||||
rows.append(split_table_row(lines[i]))
|
||||
i += 1
|
||||
|
||||
cols = max(1, len(headers))
|
||||
table = doc.add_table(rows=1, cols=cols)
|
||||
for c in range(cols):
|
||||
table.cell(0, c).text = headers[c] if c < len(headers) else ""
|
||||
|
||||
for row in rows:
|
||||
cells = table.add_row().cells
|
||||
for c in range(cols):
|
||||
cells[c].text = row[c] if c < len(row) else ""
|
||||
continue
|
||||
|
||||
# Heading
|
||||
heading_match = re.match(r"^(#{1,6})\s+(.*)$", stripped)
|
||||
if heading_match:
|
||||
level = min(4, len(heading_match.group(1)))
|
||||
text = heading_match.group(2).strip()
|
||||
doc.add_heading(text, level=level)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Ordered list
|
||||
if re.match(r"^\d+\.\s+", stripped):
|
||||
text = re.sub(r"^\d+\.\s+", "", stripped)
|
||||
doc.add_paragraph(text, style="List Number")
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Unordered list
|
||||
if stripped.startswith("- "):
|
||||
doc.add_paragraph(stripped[2:].strip(), style="List Bullet")
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Plain paragraph
|
||||
doc.add_paragraph(stripped)
|
||||
i += 1
|
||||
|
||||
|
||||
def main() -> None:
|
||||
md_text = INPUT_MD.read_text(encoding="utf-8")
|
||||
doc = Document()
|
||||
convert_markdown_to_docx(md_text, doc)
|
||||
doc.save(OUTPUT_DOCX)
|
||||
print(OUTPUT_DOCX)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user