from __future__ import annotations from pathlib import Path import re from docx import Document INPUT_MD = Path("/Users/apple/code/bs/mying/example/萌贝母婴商城毕业论文初稿-2026版.md") OUTPUT_DOCX = Path("/Users/apple/code/bs/mying/example/萌贝母婴商城毕业论文初稿-2026版.docx") def is_table_separator(line: str) -> bool: stripped = line.strip() if not stripped.startswith("|"): return False core = stripped.strip("|").replace(" ", "") return bool(core) and all(ch in "-:|" for ch in core) def split_table_row(line: str) -> list[str]: raw = line.strip().strip("|") return [cell.strip() for cell in raw.split("|")] def convert_markdown_to_docx(md_text: str, doc: Document) -> None: lines = md_text.splitlines() i = 0 while i < len(lines): line = lines[i] stripped = line.strip() if not stripped: doc.add_paragraph("") i += 1 continue # Table block if stripped.startswith("|") and i + 1 < len(lines) and is_table_separator(lines[i + 1]): headers = split_table_row(lines[i]) i += 2 rows: list[list[str]] = [] while i < len(lines): row_line = lines[i].strip() if not row_line.startswith("|"): break rows.append(split_table_row(lines[i])) i += 1 cols = max(1, len(headers)) table = doc.add_table(rows=1, cols=cols) for c in range(cols): table.cell(0, c).text = headers[c] if c < len(headers) else "" for row in rows: cells = table.add_row().cells for c in range(cols): cells[c].text = row[c] if c < len(row) else "" continue # Heading heading_match = re.match(r"^(#{1,6})\s+(.*)$", stripped) if heading_match: level = min(4, len(heading_match.group(1))) text = heading_match.group(2).strip() doc.add_heading(text, level=level) i += 1 continue # Ordered list if re.match(r"^\d+\.\s+", stripped): text = re.sub(r"^\d+\.\s+", "", stripped) doc.add_paragraph(text, style="List Number") i += 1 continue # Unordered list if stripped.startswith("- "): doc.add_paragraph(stripped[2:].strip(), style="List Bullet") i += 1 continue # Plain paragraph doc.add_paragraph(stripped) i += 1 def main() -> None: md_text = INPUT_MD.read_text(encoding="utf-8") doc = Document() convert_markdown_to_docx(md_text, doc) doc.save(OUTPUT_DOCX) print(OUTPUT_DOCX) if __name__ == "__main__": main()