#!/usr/bin/env python3
"""
Generate formatted DOCX thesis from markdown files.
Matches the style of the reference document (2106090117-佟欣鑫-论文.docx).
Formatting spec (from reference analysis):
- Page: A4 (11906x16838 twips), margins 2.5cm all sides
- Normal text: 宋体/Times New Roman, 小四(12pt/sz=24), line spacing 1.5x(360twips), first-line indent 2chars
- Heading 1 (章): 黑体/Times New Roman, 二号(22pt/sz=44), bold, centered, spacing before/after
- Heading 2 (节): 黑体/Arial, 小三(15pt/sz=32 half-pt), bold, left
- Heading 3 (小节): 黑体, 四号(14pt/sz=28), bold, left
- Title (摘要/Abstract): 黑体, 小三(15pt/sz=32), bold, centered
- Caption: 黑体, 五号(10.5pt/sz=20)
- Header: 大连科技学院2026届本科毕业设计(论文)
- Footer: page numbers
- TOC styles: toc1=黑体14pt, toc2=宋体14pt indent
"""
import os
import re
import sys
from docx import Document
from docx.shared import Pt, Cm, Inches, Twips, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.table import WD_TABLE_ALIGNMENT
from docx.enum.section import WD_ORIENT
from docx.oxml.ns import qn, nsdecls
from docx.oxml import parse_xml
from lxml import etree
THESIS_DIR = os.path.dirname(os.path.abspath(__file__))
DIAGRAMS_DIR = os.path.join(THESIS_DIR, 'diagrams')
# ─── Helper functions ───
def set_run_font(run, cn_font='宋体', en_font='Times New Roman', size=Pt(12), bold=False, italic=False):
"""Set font for a run with both Chinese and English fonts."""
run.font.size = size
run.font.bold = bold
run.font.italic = italic
run.font.name = en_font
r = run._element
rPr = r.find(qn('w:rPr'))
if rPr is None:
rPr = parse_xml(f'')
r.insert(0, rPr)
rFonts = rPr.find(qn('w:rFonts'))
if rFonts is None:
rFonts = parse_xml(f'')
rPr.insert(0, rFonts)
rFonts.set(qn('w:eastAsia'), cn_font)
rFonts.set(qn('w:ascii'), en_font)
rFonts.set(qn('w:hAnsi'), en_font)
def set_paragraph_spacing(paragraph, line_spacing=360, before=0, after=0, first_line_chars=None, first_line=None):
"""Set paragraph spacing and indentation."""
pPr = paragraph._element.find(qn('w:pPr'))
if pPr is None:
pPr = parse_xml(f'')
paragraph._element.insert(0, pPr)
# Spacing
spacing = pPr.find(qn('w:spacing'))
if spacing is None:
spacing = parse_xml(f'')
pPr.append(spacing)
if line_spacing:
spacing.set(qn('w:line'), str(line_spacing))
spacing.set(qn('w:lineRule'), 'auto')
if before:
spacing.set(qn('w:before'), str(before))
if after:
spacing.set(qn('w:after'), str(after))
# Indentation
if first_line_chars or first_line:
ind = pPr.find(qn('w:ind'))
if ind is None:
ind = parse_xml(f'')
pPr.append(ind)
if first_line_chars:
ind.set(qn('w:firstLineChars'), str(first_line_chars))
if first_line:
ind.set(qn('w:firstLine'), str(first_line))
def add_body_paragraph(doc, text, cn_font='宋体', en_font='Times New Roman', size=Pt(12),
bold=False, alignment=None, first_line_indent=True, line_spacing=360):
"""Add a normal body paragraph."""
p = doc.add_paragraph()
if alignment:
p.alignment = alignment
run = p.add_run(text)
set_run_font(run, cn_font, en_font, size, bold)
if first_line_indent:
set_paragraph_spacing(p, line_spacing=line_spacing, first_line_chars=200, first_line=480)
else:
set_paragraph_spacing(p, line_spacing=line_spacing)
return p
def add_heading_chapter(doc, text):
"""Add chapter heading (第X章) - 黑体 二号 bold centered."""
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = p.add_run(text)
set_run_font(run, '黑体', 'Times New Roman', Pt(22), bold=True)
set_paragraph_spacing(p, line_spacing=360, before=312, after=312)
return p
def add_heading_section(doc, text):
"""Add section heading (X.X) - 黑体 小三 bold left."""
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
run = p.add_run(text)
set_run_font(run, '黑体', 'Times New Roman', Pt(15), bold=True)
set_paragraph_spacing(p, line_spacing=360, before=156, after=156)
return p
def add_heading_subsection(doc, text):
"""Add subsection heading (X.X.X) - 黑体 四号 bold left."""
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
run = p.add_run(text)
set_run_font(run, '黑体', 'Times New Roman', Pt(14), bold=True)
set_paragraph_spacing(p, line_spacing=360, before=78, after=78)
return p
def add_title(doc, text):
"""Add a title (摘要, Abstract, etc.) - 黑体 小三 bold centered."""
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = p.add_run(text)
set_run_font(run, '黑体', 'Times New Roman', Pt(15), bold=True)
set_paragraph_spacing(p, line_spacing=360, before=240, after=60)
return p
def add_caption(doc, text):
"""Add figure/table caption - 黑体 五号 centered."""
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = p.add_run(text)
set_run_font(run, '黑体', 'Times New Roman', Pt(10.5), bold=False)
set_paragraph_spacing(p, line_spacing=360, before=60, after=60)
return p
def add_image(doc, image_path, width=None):
"""Add an image centered."""
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = p.add_run()
if os.path.exists(image_path):
if width:
run.add_picture(image_path, width=width)
else:
# Auto-size but max width ~14cm
from PIL import Image
try:
img = Image.open(image_path)
w, h = img.size
max_width = Cm(14)
aspect = h / w
if Cm(w * 2.54 / 96) > max_width:
run.add_picture(image_path, width=max_width)
else:
run.add_picture(image_path, width=Cm(min(w * 2.54 / 96, 14)))
except ImportError:
run.add_picture(image_path, width=Cm(14))
else:
run.add_text(f'[图片缺失: {image_path}]')
set_paragraph_spacing(p, line_spacing=360)
return p
def add_table_from_md(doc, headers, rows):
"""Add a formatted table from markdown table data."""
table = doc.add_table(rows=1 + len(rows), cols=len(headers))
table.alignment = WD_TABLE_ALIGNMENT.CENTER
# Set table style
tbl = table._tbl
tblPr = tbl.find(qn('w:tblPr'))
if tblPr is None:
tblPr = parse_xml(f'')
tbl.insert(0, tblPr)
borders = parse_xml(
f''
''
''
''
''
''
''
''
)
tblPr.append(borders)
# Header row - gray background, bold text, vertical center
for i, h in enumerate(headers):
cell = table.cell(0, i)
cell.text = ''
# Gray background
shading = parse_xml(f'')
cell._element.find(qn('w:tcPr')).append(shading) if cell._element.find(qn('w:tcPr')) is not None else None
tcPr = cell._element.find(qn('w:tcPr'))
if tcPr is None:
tcPr = parse_xml(f'')
cell._element.insert(0, tcPr)
shading = tcPr.find(qn('w:shd'))
if shading is None:
shading = parse_xml(f'')
tcPr.append(shading)
else:
shading.set(qn('w:fill'), 'D9D9D9')
# Vertical center
vAlign = tcPr.find(qn('w:vAlign'))
if vAlign is None:
vAlign = parse_xml(f'')
tcPr.append(vAlign)
p = cell.paragraphs[0]
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = p.add_run(h.strip())
set_run_font(run, '黑体', 'Times New Roman', Pt(10.5), bold=True)
set_paragraph_spacing(p, line_spacing=300)
# Data rows - vertical center
for r_idx, row in enumerate(rows):
for c_idx, cell_text in enumerate(row):
if c_idx < len(headers):
cell = table.cell(r_idx + 1, c_idx)
cell.text = ''
# Vertical center
tcPr = cell._element.find(qn('w:tcPr'))
if tcPr is None:
tcPr = parse_xml(f'')
cell._element.insert(0, tcPr)
vAlign = tcPr.find(qn('w:vAlign'))
if vAlign is None:
vAlign = parse_xml(f'')
tcPr.append(vAlign)
p = cell.paragraphs[0]
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = p.add_run(cell_text.strip())
set_run_font(run, '宋体', 'Times New Roman', Pt(10.5), bold=False)
set_paragraph_spacing(p, line_spacing=300)
return table
def setup_page(doc):
"""Set up page size, margins, headers, footers."""
section = doc.sections[0]
section.page_width = Twips(11906)
section.page_height = Twips(16838)
section.top_margin = Cm(2.5)
section.bottom_margin = Cm(2.5)
section.left_margin = Cm(2.5)
section.right_margin = Cm(2.5)
section.header_distance = Cm(1.27)
section.footer_distance = Cm(1.27)
# Header
header = section.header
header.is_linked_to_previous = False
hp = header.paragraphs[0]
hp.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = hp.add_run('大连科技学院2026届本科毕业设计(论文)')
set_run_font(run, '宋体', 'Times New Roman', Pt(9), bold=False)
# Add bottom border to header paragraph
pPr = hp._element.find(qn('w:pPr'))
if pPr is None:
pPr = parse_xml(f'')
hp._element.insert(0, pPr)
pBdr = parse_xml(
f''
''
''
)
pPr.append(pBdr)
# Footer with page number
footer = section.footer
footer.is_linked_to_previous = False
fp = footer.paragraphs[0]
fp.alignment = WD_ALIGN_PARAGRAPH.CENTER
# Add page number field
run = fp.add_run()
fldChar1 = parse_xml(f'')
run._element.append(fldChar1)
run2 = fp.add_run()
instrText = parse_xml(f' PAGE ')
run2._element.append(instrText)
run3 = fp.add_run()
fldChar2 = parse_xml(f'')
run3._element.append(fldChar2)
set_run_font(run, '宋体', 'Times New Roman', Pt(9))
set_run_font(run2, '宋体', 'Times New Roman', Pt(9))
set_run_font(run3, '宋体', 'Times New Roman', Pt(9))
def add_cover_page(doc):
"""Add the thesis cover page."""
# Blank lines for spacing
for _ in range(3):
p = doc.add_paragraph()
set_paragraph_spacing(p, line_spacing=360)
# University name
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = p.add_run('大连科技学院')
set_run_font(run, '黑体', 'Times New Roman', Pt(26), bold=True)
set_paragraph_spacing(p, line_spacing=360)
# Thesis type
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = p.add_run('毕业设计(论文)')
set_run_font(run, '黑体', 'Times New Roman', Pt(26), bold=True)
set_paragraph_spacing(p, line_spacing=360, after=600)
# Title
for _ in range(2):
p = doc.add_paragraph()
set_paragraph_spacing(p, line_spacing=360)
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = p.add_run('论文题目:基于Spring Boot的养老院管理系统的设计与实现')
set_run_font(run, '黑体', 'Times New Roman', Pt(16), bold=True)
set_paragraph_spacing(p, line_spacing=480)
# Blank lines
for _ in range(4):
p = doc.add_paragraph()
set_paragraph_spacing(p, line_spacing=360)
# Info fields
info_fields = [
('学 院:', '网络与通信学院'),
('专 业:', '网络工程'),
('学 号:', ' '),
('学生姓名:', ' '),
('指导教师:', ' '),
]
for label, value in info_fields:
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = p.add_run(label)
set_run_font(run, '宋体', 'Times New Roman', Pt(14), bold=False)
run2 = p.add_run(value)
set_run_font(run2, '宋体', 'Times New Roman', Pt(14), bold=False)
# Add underline to value
run2.font.underline = True
set_paragraph_spacing(p, line_spacing=480)
# Page break
doc.add_page_break()
def parse_markdown_files():
"""Parse the 4 thesis markdown files and return structured content."""
files = [
os.path.join(THESIS_DIR, '论文.md'),
os.path.join(THESIS_DIR, 'chapter3.md'),
os.path.join(THESIS_DIR, 'chapter4.md'),
os.path.join(THESIS_DIR, 'chapter5_6_7.md'),
]
content = []
for f in files:
with open(f, 'r', encoding='utf-8') as fh:
content.append(fh.read())
return '\n\n'.join(content)
def process_markdown(doc, md_text):
"""Process markdown text and add to document with proper formatting."""
lines = md_text.split('\n')
i = 0
in_table = False
table_headers = []
table_rows = []
skip_toc = False
while i < len(lines):
line = lines[i].rstrip()
# Skip empty lines
if not line.strip():
i += 1
continue
# Skip the TOC section
if line.strip() == '## 目录':
skip_toc = True
i += 1
continue
if skip_toc:
if line.startswith('# ') or line.startswith('## 摘要'):
skip_toc = False
else:
i += 1
continue
# Main title (skip - already on cover page)
if line.startswith('# 基于') or line.startswith('# 第'):
text = line.lstrip('# ').strip()
if '第' in text and '章' in text:
add_heading_chapter(doc, text)
i += 1
continue
# 摘要 / Abstract title
if line.strip() == '## 摘要':
add_title(doc, '摘 要')
i += 1
continue
if line.strip() == '## Abstract':
doc.add_page_break()
add_title(doc, 'Abstract')
i += 1
continue
# Keywords line
if line.startswith('关键词:') or line.startswith('关键词:'):
p = doc.add_paragraph()
run = p.add_run('关键词:')
set_run_font(run, '黑体', 'Times New Roman', Pt(12), bold=True)
run2 = p.add_run(line.split(':', 1)[1] if ':' in line else line.split(':', 1)[1])
set_run_font(run2, '宋体', 'Times New Roman', Pt(12))
set_paragraph_spacing(p, line_spacing=360)
i += 1
continue
if line.startswith('Keywords:') or line.startswith('Key words:'):
p = doc.add_paragraph()
run = p.add_run('Key words: ')
set_run_font(run, 'Times New Roman', 'Times New Roman', Pt(12), bold=True)
kw_text = line.split(':', 1)[1].strip() if ':' in line else ''
run2 = p.add_run(kw_text)
set_run_font(run2, 'Times New Roman', 'Times New Roman', Pt(12))
set_paragraph_spacing(p, line_spacing=360)
# Page break after English abstract keywords
doc.add_page_break()
i += 1
continue
# Section headings
if line.startswith('## '):
text = line[3:].strip()
# Check if it's a special section
if text in ['参考文献']:
doc.add_page_break()
add_heading_chapter(doc, text)
elif text in ['致谢']:
doc.add_page_break()
add_heading_chapter(doc, '致 谢')
else:
add_heading_section(doc, text)
i += 1
continue
# Subsection headings
if line.startswith('### '):
text = line[4:].strip()
add_heading_subsection(doc, text)
i += 1
continue
# Image
img_match = re.match(r'!\[(.+?)\]\((.+?)\)', line)
if img_match:
alt_text = img_match.group(1)
img_path = img_match.group(2)
full_path = os.path.join(THESIS_DIR, img_path)
add_image(doc, full_path)
i += 1
continue
# Figure/table caption (line like "图4.1 xxx" or "表4.1 xxx")
if re.match(r'^(图|表)\d+\.\d+', line.strip()):
add_caption(doc, line.strip())
i += 1
continue
# Table detection
if '|' in line and line.strip().startswith('|'):
# Parse table
if not in_table:
in_table = True
# Parse header
cells = [c.strip() for c in line.strip().strip('|').split('|')]
table_headers = cells
i += 1
# Skip separator line
if i < len(lines) and '---' in lines[i]:
i += 1
table_rows = []
continue
else:
cells = [c.strip() for c in line.strip().strip('|').split('|')]
table_rows.append(cells)
i += 1
# Check if next line is still table
if i >= len(lines) or not lines[i].strip().startswith('|'):
in_table = False
add_table_from_md(doc, table_headers, table_rows)
table_headers = []
table_rows = []
continue
# Reference items [1], [2], etc.
ref_match = re.match(r'^\[(\d+)\]\s*(.+)', line.strip())
if ref_match:
p = doc.add_paragraph()
run = p.add_run(line.strip())
set_run_font(run, '宋体', 'Times New Roman', Pt(10.5))
set_paragraph_spacing(p, line_spacing=360)
i += 1
continue
# Numbered items like (1), (2)
num_match = re.match(r'^(\d+)', line.strip())
if num_match:
add_body_paragraph(doc, line.strip(), first_line_indent=True)
i += 1
continue
# Normal body text
if line.strip():
add_body_paragraph(doc, line.strip(), first_line_indent=True)
i += 1
def main():
doc = Document()
# Setup page
setup_page(doc)
# Cover page
add_cover_page(doc)
# Parse and process markdown
md_text = parse_markdown_files()
process_markdown(doc, md_text)
# Save
output_path = os.path.join(THESIS_DIR, '基于Spring Boot的养老院管理系统的设计与实现.docx')
doc.save(output_path)
print(f'Thesis saved to: {output_path}')
print(f'File size: {os.path.getsize(output_path) / 1024:.1f} KB')
if __name__ == '__main__':
main()