#!/usr/bin/env python3 """ Generate formatted DOCX thesis from markdown files. Matches the style of the reference document (2106090117-佟欣鑫-论文.docx). Formatting spec (from reference analysis): - Page: A4 (11906x16838 twips), margins 2.5cm all sides - Normal text: 宋体/Times New Roman, 小四(12pt/sz=24), line spacing 1.5x(360twips), first-line indent 2chars - Heading 1 (章): 黑体/Times New Roman, 二号(22pt/sz=44), bold, centered, spacing before/after - Heading 2 (节): 黑体/Arial, 小三(15pt/sz=32 half-pt), bold, left - Heading 3 (小节): 黑体, 四号(14pt/sz=28), bold, left - Title (摘要/Abstract): 黑体, 小三(15pt/sz=32), bold, centered - Caption: 黑体, 五号(10.5pt/sz=20) - Header: 大连科技学院2026届本科毕业设计(论文) - Footer: page numbers - TOC styles: toc1=黑体14pt, toc2=宋体14pt indent """ import os import re import sys from docx import Document from docx.shared import Pt, Cm, Inches, Twips, RGBColor from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.enum.table import WD_TABLE_ALIGNMENT from docx.enum.section import WD_ORIENT from docx.oxml.ns import qn, nsdecls from docx.oxml import parse_xml from lxml import etree THESIS_DIR = os.path.dirname(os.path.abspath(__file__)) DIAGRAMS_DIR = os.path.join(THESIS_DIR, 'diagrams') # ─── Helper functions ─── def set_run_font(run, cn_font='宋体', en_font='Times New Roman', size=Pt(12), bold=False, italic=False): """Set font for a run with both Chinese and English fonts.""" run.font.size = size run.font.bold = bold run.font.italic = italic run.font.name = en_font r = run._element rPr = r.find(qn('w:rPr')) if rPr is None: rPr = parse_xml(f'') r.insert(0, rPr) rFonts = rPr.find(qn('w:rFonts')) if rFonts is None: rFonts = parse_xml(f'') rPr.insert(0, rFonts) rFonts.set(qn('w:eastAsia'), cn_font) rFonts.set(qn('w:ascii'), en_font) rFonts.set(qn('w:hAnsi'), en_font) def set_paragraph_spacing(paragraph, line_spacing=360, before=0, after=0, first_line_chars=None, first_line=None): """Set paragraph spacing and indentation.""" pPr = paragraph._element.find(qn('w:pPr')) if pPr is None: pPr = parse_xml(f'') paragraph._element.insert(0, pPr) # Spacing spacing = pPr.find(qn('w:spacing')) if spacing is None: spacing = parse_xml(f'') pPr.append(spacing) if line_spacing: spacing.set(qn('w:line'), str(line_spacing)) spacing.set(qn('w:lineRule'), 'auto') if before: spacing.set(qn('w:before'), str(before)) if after: spacing.set(qn('w:after'), str(after)) # Indentation if first_line_chars or first_line: ind = pPr.find(qn('w:ind')) if ind is None: ind = parse_xml(f'') pPr.append(ind) if first_line_chars: ind.set(qn('w:firstLineChars'), str(first_line_chars)) if first_line: ind.set(qn('w:firstLine'), str(first_line)) def add_body_paragraph(doc, text, cn_font='宋体', en_font='Times New Roman', size=Pt(12), bold=False, alignment=None, first_line_indent=True, line_spacing=360): """Add a normal body paragraph.""" p = doc.add_paragraph() if alignment: p.alignment = alignment run = p.add_run(text) set_run_font(run, cn_font, en_font, size, bold) if first_line_indent: set_paragraph_spacing(p, line_spacing=line_spacing, first_line_chars=200, first_line=480) else: set_paragraph_spacing(p, line_spacing=line_spacing) return p def add_heading_chapter(doc, text): """Add chapter heading (第X章) - 黑体 二号 bold centered.""" p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER run = p.add_run(text) set_run_font(run, '黑体', 'Times New Roman', Pt(22), bold=True) set_paragraph_spacing(p, line_spacing=360, before=312, after=312) return p def add_heading_section(doc, text): """Add section heading (X.X) - 黑体 小三 bold left.""" p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.LEFT run = p.add_run(text) set_run_font(run, '黑体', 'Times New Roman', Pt(15), bold=True) set_paragraph_spacing(p, line_spacing=360, before=156, after=156) return p def add_heading_subsection(doc, text): """Add subsection heading (X.X.X) - 黑体 四号 bold left.""" p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.LEFT run = p.add_run(text) set_run_font(run, '黑体', 'Times New Roman', Pt(14), bold=True) set_paragraph_spacing(p, line_spacing=360, before=78, after=78) return p def add_title(doc, text): """Add a title (摘要, Abstract, etc.) - 黑体 小三 bold centered.""" p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER run = p.add_run(text) set_run_font(run, '黑体', 'Times New Roman', Pt(15), bold=True) set_paragraph_spacing(p, line_spacing=360, before=240, after=60) return p def add_caption(doc, text): """Add figure/table caption - 黑体 五号 centered.""" p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER run = p.add_run(text) set_run_font(run, '黑体', 'Times New Roman', Pt(10.5), bold=False) set_paragraph_spacing(p, line_spacing=360, before=60, after=60) return p def add_image(doc, image_path, width=None): """Add an image centered.""" p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER run = p.add_run() if os.path.exists(image_path): if width: run.add_picture(image_path, width=width) else: # Auto-size but max width ~14cm from PIL import Image try: img = Image.open(image_path) w, h = img.size max_width = Cm(14) aspect = h / w if Cm(w * 2.54 / 96) > max_width: run.add_picture(image_path, width=max_width) else: run.add_picture(image_path, width=Cm(min(w * 2.54 / 96, 14))) except ImportError: run.add_picture(image_path, width=Cm(14)) else: run.add_text(f'[图片缺失: {image_path}]') set_paragraph_spacing(p, line_spacing=360) return p def add_table_from_md(doc, headers, rows): """Add a formatted table from markdown table data.""" table = doc.add_table(rows=1 + len(rows), cols=len(headers)) table.alignment = WD_TABLE_ALIGNMENT.CENTER # Set table style tbl = table._tbl tblPr = tbl.find(qn('w:tblPr')) if tblPr is None: tblPr = parse_xml(f'') tbl.insert(0, tblPr) borders = parse_xml( f'' '' '' '' '' '' '' '' ) tblPr.append(borders) # Header row - gray background, bold text, vertical center for i, h in enumerate(headers): cell = table.cell(0, i) cell.text = '' # Gray background shading = parse_xml(f'') cell._element.find(qn('w:tcPr')).append(shading) if cell._element.find(qn('w:tcPr')) is not None else None tcPr = cell._element.find(qn('w:tcPr')) if tcPr is None: tcPr = parse_xml(f'') cell._element.insert(0, tcPr) shading = tcPr.find(qn('w:shd')) if shading is None: shading = parse_xml(f'') tcPr.append(shading) else: shading.set(qn('w:fill'), 'D9D9D9') # Vertical center vAlign = tcPr.find(qn('w:vAlign')) if vAlign is None: vAlign = parse_xml(f'') tcPr.append(vAlign) p = cell.paragraphs[0] p.alignment = WD_ALIGN_PARAGRAPH.CENTER run = p.add_run(h.strip()) set_run_font(run, '黑体', 'Times New Roman', Pt(10.5), bold=True) set_paragraph_spacing(p, line_spacing=300) # Data rows - vertical center for r_idx, row in enumerate(rows): for c_idx, cell_text in enumerate(row): if c_idx < len(headers): cell = table.cell(r_idx + 1, c_idx) cell.text = '' # Vertical center tcPr = cell._element.find(qn('w:tcPr')) if tcPr is None: tcPr = parse_xml(f'') cell._element.insert(0, tcPr) vAlign = tcPr.find(qn('w:vAlign')) if vAlign is None: vAlign = parse_xml(f'') tcPr.append(vAlign) p = cell.paragraphs[0] p.alignment = WD_ALIGN_PARAGRAPH.CENTER run = p.add_run(cell_text.strip()) set_run_font(run, '宋体', 'Times New Roman', Pt(10.5), bold=False) set_paragraph_spacing(p, line_spacing=300) return table def setup_page(doc): """Set up page size, margins, headers, footers.""" section = doc.sections[0] section.page_width = Twips(11906) section.page_height = Twips(16838) section.top_margin = Cm(2.5) section.bottom_margin = Cm(2.5) section.left_margin = Cm(2.5) section.right_margin = Cm(2.5) section.header_distance = Cm(1.27) section.footer_distance = Cm(1.27) # Header header = section.header header.is_linked_to_previous = False hp = header.paragraphs[0] hp.alignment = WD_ALIGN_PARAGRAPH.CENTER run = hp.add_run('大连科技学院2026届本科毕业设计(论文)') set_run_font(run, '宋体', 'Times New Roman', Pt(9), bold=False) # Add bottom border to header paragraph pPr = hp._element.find(qn('w:pPr')) if pPr is None: pPr = parse_xml(f'') hp._element.insert(0, pPr) pBdr = parse_xml( f'' '' '' ) pPr.append(pBdr) # Footer with page number footer = section.footer footer.is_linked_to_previous = False fp = footer.paragraphs[0] fp.alignment = WD_ALIGN_PARAGRAPH.CENTER # Add page number field run = fp.add_run() fldChar1 = parse_xml(f'') run._element.append(fldChar1) run2 = fp.add_run() instrText = parse_xml(f' PAGE ') run2._element.append(instrText) run3 = fp.add_run() fldChar2 = parse_xml(f'') run3._element.append(fldChar2) set_run_font(run, '宋体', 'Times New Roman', Pt(9)) set_run_font(run2, '宋体', 'Times New Roman', Pt(9)) set_run_font(run3, '宋体', 'Times New Roman', Pt(9)) def add_cover_page(doc): """Add the thesis cover page.""" # Blank lines for spacing for _ in range(3): p = doc.add_paragraph() set_paragraph_spacing(p, line_spacing=360) # University name p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER run = p.add_run('大连科技学院') set_run_font(run, '黑体', 'Times New Roman', Pt(26), bold=True) set_paragraph_spacing(p, line_spacing=360) # Thesis type p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER run = p.add_run('毕业设计(论文)') set_run_font(run, '黑体', 'Times New Roman', Pt(26), bold=True) set_paragraph_spacing(p, line_spacing=360, after=600) # Title for _ in range(2): p = doc.add_paragraph() set_paragraph_spacing(p, line_spacing=360) p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER run = p.add_run('论文题目:基于Spring Boot的养老院管理系统的设计与实现') set_run_font(run, '黑体', 'Times New Roman', Pt(16), bold=True) set_paragraph_spacing(p, line_spacing=480) # Blank lines for _ in range(4): p = doc.add_paragraph() set_paragraph_spacing(p, line_spacing=360) # Info fields info_fields = [ ('学 院:', '网络与通信学院'), ('专 业:', '网络工程'), ('学 号:', ' '), ('学生姓名:', ' '), ('指导教师:', ' '), ] for label, value in info_fields: p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER run = p.add_run(label) set_run_font(run, '宋体', 'Times New Roman', Pt(14), bold=False) run2 = p.add_run(value) set_run_font(run2, '宋体', 'Times New Roman', Pt(14), bold=False) # Add underline to value run2.font.underline = True set_paragraph_spacing(p, line_spacing=480) # Page break doc.add_page_break() def parse_markdown_files(): """Parse the 4 thesis markdown files and return structured content.""" files = [ os.path.join(THESIS_DIR, '论文.md'), os.path.join(THESIS_DIR, 'chapter3.md'), os.path.join(THESIS_DIR, 'chapter4.md'), os.path.join(THESIS_DIR, 'chapter5_6_7.md'), ] content = [] for f in files: with open(f, 'r', encoding='utf-8') as fh: content.append(fh.read()) return '\n\n'.join(content) def process_markdown(doc, md_text): """Process markdown text and add to document with proper formatting.""" lines = md_text.split('\n') i = 0 in_table = False table_headers = [] table_rows = [] skip_toc = False while i < len(lines): line = lines[i].rstrip() # Skip empty lines if not line.strip(): i += 1 continue # Skip the TOC section if line.strip() == '## 目录': skip_toc = True i += 1 continue if skip_toc: if line.startswith('# ') or line.startswith('## 摘要'): skip_toc = False else: i += 1 continue # Main title (skip - already on cover page) if line.startswith('# 基于') or line.startswith('# 第'): text = line.lstrip('# ').strip() if '第' in text and '章' in text: add_heading_chapter(doc, text) i += 1 continue # 摘要 / Abstract title if line.strip() == '## 摘要': add_title(doc, '摘 要') i += 1 continue if line.strip() == '## Abstract': doc.add_page_break() add_title(doc, 'Abstract') i += 1 continue # Keywords line if line.startswith('关键词:') or line.startswith('关键词:'): p = doc.add_paragraph() run = p.add_run('关键词:') set_run_font(run, '黑体', 'Times New Roman', Pt(12), bold=True) run2 = p.add_run(line.split(':', 1)[1] if ':' in line else line.split(':', 1)[1]) set_run_font(run2, '宋体', 'Times New Roman', Pt(12)) set_paragraph_spacing(p, line_spacing=360) i += 1 continue if line.startswith('Keywords:') or line.startswith('Key words:'): p = doc.add_paragraph() run = p.add_run('Key words: ') set_run_font(run, 'Times New Roman', 'Times New Roman', Pt(12), bold=True) kw_text = line.split(':', 1)[1].strip() if ':' in line else '' run2 = p.add_run(kw_text) set_run_font(run2, 'Times New Roman', 'Times New Roman', Pt(12)) set_paragraph_spacing(p, line_spacing=360) # Page break after English abstract keywords doc.add_page_break() i += 1 continue # Section headings if line.startswith('## '): text = line[3:].strip() # Check if it's a special section if text in ['参考文献']: doc.add_page_break() add_heading_chapter(doc, text) elif text in ['致谢']: doc.add_page_break() add_heading_chapter(doc, '致 谢') else: add_heading_section(doc, text) i += 1 continue # Subsection headings if line.startswith('### '): text = line[4:].strip() add_heading_subsection(doc, text) i += 1 continue # Image img_match = re.match(r'!\[(.+?)\]\((.+?)\)', line) if img_match: alt_text = img_match.group(1) img_path = img_match.group(2) full_path = os.path.join(THESIS_DIR, img_path) add_image(doc, full_path) i += 1 continue # Figure/table caption (line like "图4.1 xxx" or "表4.1 xxx") if re.match(r'^(图|表)\d+\.\d+', line.strip()): add_caption(doc, line.strip()) i += 1 continue # Table detection if '|' in line and line.strip().startswith('|'): # Parse table if not in_table: in_table = True # Parse header cells = [c.strip() for c in line.strip().strip('|').split('|')] table_headers = cells i += 1 # Skip separator line if i < len(lines) and '---' in lines[i]: i += 1 table_rows = [] continue else: cells = [c.strip() for c in line.strip().strip('|').split('|')] table_rows.append(cells) i += 1 # Check if next line is still table if i >= len(lines) or not lines[i].strip().startswith('|'): in_table = False add_table_from_md(doc, table_headers, table_rows) table_headers = [] table_rows = [] continue # Reference items [1], [2], etc. ref_match = re.match(r'^\[(\d+)\]\s*(.+)', line.strip()) if ref_match: p = doc.add_paragraph() run = p.add_run(line.strip()) set_run_font(run, '宋体', 'Times New Roman', Pt(10.5)) set_paragraph_spacing(p, line_spacing=360) i += 1 continue # Numbered items like (1), (2) num_match = re.match(r'^(\d+)', line.strip()) if num_match: add_body_paragraph(doc, line.strip(), first_line_indent=True) i += 1 continue # Normal body text if line.strip(): add_body_paragraph(doc, line.strip(), first_line_indent=True) i += 1 def main(): doc = Document() # Setup page setup_page(doc) # Cover page add_cover_page(doc) # Parse and process markdown md_text = parse_markdown_files() process_markdown(doc, md_text) # Save output_path = os.path.join(THESIS_DIR, '基于Spring Boot的养老院管理系统的设计与实现.docx') doc.save(output_path) print(f'Thesis saved to: {output_path}') print(f'File size: {os.path.getsize(output_path) / 1024:.1f} KB') if __name__ == '__main__': main()