Files
nursing-home/thesis/generate_docx.py
2026-03-01 01:13:16 +08:00

565 lines
20 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
Generate formatted DOCX thesis from markdown files.
Matches the style of the reference document (2106090117-佟欣鑫-论文.docx).
Formatting spec (from reference analysis):
- Page: A4 (11906x16838 twips), margins 2.5cm all sides
- Normal text: 宋体/Times New Roman, 小四(12pt/sz=24), line spacing 1.5x(360twips), first-line indent 2chars
- Heading 1 (章): 黑体/Times New Roman, 二号(22pt/sz=44), bold, centered, spacing before/after
- Heading 2 (节): 黑体/Arial, 小三(15pt/sz=32 half-pt), bold, left
- Heading 3 (小节): 黑体, 四号(14pt/sz=28), bold, left
- Title (摘要/Abstract): 黑体, 小三(15pt/sz=32), bold, centered
- Caption: 黑体, 五号(10.5pt/sz=20)
- Header: 大连科技学院2026届本科毕业设计论文
- Footer: page numbers
- TOC styles: toc1=黑体14pt, toc2=宋体14pt indent
"""
import os
import re
import sys
from docx import Document
from docx.shared import Pt, Cm, Inches, Twips, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.table import WD_TABLE_ALIGNMENT
from docx.enum.section import WD_ORIENT
from docx.oxml.ns import qn, nsdecls
from docx.oxml import parse_xml
from lxml import etree
THESIS_DIR = os.path.dirname(os.path.abspath(__file__))
DIAGRAMS_DIR = os.path.join(THESIS_DIR, 'diagrams')
# ─── Helper functions ───
def set_run_font(run, cn_font='宋体', en_font='Times New Roman', size=Pt(12), bold=False, italic=False):
"""Set font for a run with both Chinese and English fonts."""
run.font.size = size
run.font.bold = bold
run.font.italic = italic
run.font.name = en_font
r = run._element
rPr = r.find(qn('w:rPr'))
if rPr is None:
rPr = parse_xml(f'<w:rPr {nsdecls("w")}></w:rPr>')
r.insert(0, rPr)
rFonts = rPr.find(qn('w:rFonts'))
if rFonts is None:
rFonts = parse_xml(f'<w:rFonts {nsdecls("w")}/>')
rPr.insert(0, rFonts)
rFonts.set(qn('w:eastAsia'), cn_font)
rFonts.set(qn('w:ascii'), en_font)
rFonts.set(qn('w:hAnsi'), en_font)
def set_paragraph_spacing(paragraph, line_spacing=360, before=0, after=0, first_line_chars=None, first_line=None):
"""Set paragraph spacing and indentation."""
pPr = paragraph._element.find(qn('w:pPr'))
if pPr is None:
pPr = parse_xml(f'<w:pPr {nsdecls("w")}></w:pPr>')
paragraph._element.insert(0, pPr)
# Spacing
spacing = pPr.find(qn('w:spacing'))
if spacing is None:
spacing = parse_xml(f'<w:spacing {nsdecls("w")}/>')
pPr.append(spacing)
if line_spacing:
spacing.set(qn('w:line'), str(line_spacing))
spacing.set(qn('w:lineRule'), 'auto')
if before:
spacing.set(qn('w:before'), str(before))
if after:
spacing.set(qn('w:after'), str(after))
# Indentation
if first_line_chars or first_line:
ind = pPr.find(qn('w:ind'))
if ind is None:
ind = parse_xml(f'<w:ind {nsdecls("w")}/>')
pPr.append(ind)
if first_line_chars:
ind.set(qn('w:firstLineChars'), str(first_line_chars))
if first_line:
ind.set(qn('w:firstLine'), str(first_line))
def add_body_paragraph(doc, text, cn_font='宋体', en_font='Times New Roman', size=Pt(12),
bold=False, alignment=None, first_line_indent=True, line_spacing=360):
"""Add a normal body paragraph."""
p = doc.add_paragraph()
if alignment:
p.alignment = alignment
run = p.add_run(text)
set_run_font(run, cn_font, en_font, size, bold)
if first_line_indent:
set_paragraph_spacing(p, line_spacing=line_spacing, first_line_chars=200, first_line=480)
else:
set_paragraph_spacing(p, line_spacing=line_spacing)
return p
def add_heading_chapter(doc, text):
"""Add chapter heading (第X章) - 黑体 二号 bold centered."""
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = p.add_run(text)
set_run_font(run, '黑体', 'Times New Roman', Pt(22), bold=True)
set_paragraph_spacing(p, line_spacing=360, before=312, after=312)
return p
def add_heading_section(doc, text):
"""Add section heading (X.X) - 黑体 小三 bold left."""
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
run = p.add_run(text)
set_run_font(run, '黑体', 'Times New Roman', Pt(15), bold=True)
set_paragraph_spacing(p, line_spacing=360, before=156, after=156)
return p
def add_heading_subsection(doc, text):
"""Add subsection heading (X.X.X) - 黑体 四号 bold left."""
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
run = p.add_run(text)
set_run_font(run, '黑体', 'Times New Roman', Pt(14), bold=True)
set_paragraph_spacing(p, line_spacing=360, before=78, after=78)
return p
def add_title(doc, text):
"""Add a title (摘要, Abstract, etc.) - 黑体 小三 bold centered."""
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = p.add_run(text)
set_run_font(run, '黑体', 'Times New Roman', Pt(15), bold=True)
set_paragraph_spacing(p, line_spacing=360, before=240, after=60)
return p
def add_caption(doc, text):
"""Add figure/table caption - 黑体 五号 centered."""
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = p.add_run(text)
set_run_font(run, '黑体', 'Times New Roman', Pt(10.5), bold=False)
set_paragraph_spacing(p, line_spacing=360, before=60, after=60)
return p
def add_image(doc, image_path, width=None):
"""Add an image centered."""
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = p.add_run()
if os.path.exists(image_path):
if width:
run.add_picture(image_path, width=width)
else:
# Auto-size but max width ~14cm
from PIL import Image
try:
img = Image.open(image_path)
w, h = img.size
max_width = Cm(14)
aspect = h / w
if Cm(w * 2.54 / 96) > max_width:
run.add_picture(image_path, width=max_width)
else:
run.add_picture(image_path, width=Cm(min(w * 2.54 / 96, 14)))
except ImportError:
run.add_picture(image_path, width=Cm(14))
else:
run.add_text(f'[图片缺失: {image_path}]')
set_paragraph_spacing(p, line_spacing=360)
return p
def add_table_from_md(doc, headers, rows):
"""Add a formatted table from markdown table data."""
table = doc.add_table(rows=1 + len(rows), cols=len(headers))
table.alignment = WD_TABLE_ALIGNMENT.CENTER
# Set table style
tbl = table._tbl
tblPr = tbl.find(qn('w:tblPr'))
if tblPr is None:
tblPr = parse_xml(f'<w:tblPr {nsdecls("w")}></w:tblPr>')
tbl.insert(0, tblPr)
borders = parse_xml(
f'<w:tblBorders {nsdecls("w")}>'
'<w:top w:val="single" w:sz="4" w:space="0" w:color="000000"/>'
'<w:left w:val="single" w:sz="4" w:space="0" w:color="000000"/>'
'<w:bottom w:val="single" w:sz="4" w:space="0" w:color="000000"/>'
'<w:right w:val="single" w:sz="4" w:space="0" w:color="000000"/>'
'<w:insideH w:val="single" w:sz="4" w:space="0" w:color="000000"/>'
'<w:insideV w:val="single" w:sz="4" w:space="0" w:color="000000"/>'
'</w:tblBorders>'
)
tblPr.append(borders)
# Header row - gray background, bold text, vertical center
for i, h in enumerate(headers):
cell = table.cell(0, i)
cell.text = ''
# Gray background
shading = parse_xml(f'<w:shd {nsdecls("w")} w:fill="D9D9D9" w:val="clear"/>')
cell._element.find(qn('w:tcPr')).append(shading) if cell._element.find(qn('w:tcPr')) is not None else None
tcPr = cell._element.find(qn('w:tcPr'))
if tcPr is None:
tcPr = parse_xml(f'<w:tcPr {nsdecls("w")}></w:tcPr>')
cell._element.insert(0, tcPr)
shading = tcPr.find(qn('w:shd'))
if shading is None:
shading = parse_xml(f'<w:shd {nsdecls("w")} w:fill="D9D9D9" w:val="clear"/>')
tcPr.append(shading)
else:
shading.set(qn('w:fill'), 'D9D9D9')
# Vertical center
vAlign = tcPr.find(qn('w:vAlign'))
if vAlign is None:
vAlign = parse_xml(f'<w:vAlign {nsdecls("w")} w:val="center"/>')
tcPr.append(vAlign)
p = cell.paragraphs[0]
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = p.add_run(h.strip())
set_run_font(run, '黑体', 'Times New Roman', Pt(10.5), bold=True)
set_paragraph_spacing(p, line_spacing=300)
# Data rows - vertical center
for r_idx, row in enumerate(rows):
for c_idx, cell_text in enumerate(row):
if c_idx < len(headers):
cell = table.cell(r_idx + 1, c_idx)
cell.text = ''
# Vertical center
tcPr = cell._element.find(qn('w:tcPr'))
if tcPr is None:
tcPr = parse_xml(f'<w:tcPr {nsdecls("w")}></w:tcPr>')
cell._element.insert(0, tcPr)
vAlign = tcPr.find(qn('w:vAlign'))
if vAlign is None:
vAlign = parse_xml(f'<w:vAlign {nsdecls("w")} w:val="center"/>')
tcPr.append(vAlign)
p = cell.paragraphs[0]
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = p.add_run(cell_text.strip())
set_run_font(run, '宋体', 'Times New Roman', Pt(10.5), bold=False)
set_paragraph_spacing(p, line_spacing=300)
return table
def setup_page(doc):
"""Set up page size, margins, headers, footers."""
section = doc.sections[0]
section.page_width = Twips(11906)
section.page_height = Twips(16838)
section.top_margin = Cm(2.5)
section.bottom_margin = Cm(2.5)
section.left_margin = Cm(2.5)
section.right_margin = Cm(2.5)
section.header_distance = Cm(1.27)
section.footer_distance = Cm(1.27)
# Header
header = section.header
header.is_linked_to_previous = False
hp = header.paragraphs[0]
hp.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = hp.add_run('大连科技学院2026届本科毕业设计论文')
set_run_font(run, '宋体', 'Times New Roman', Pt(9), bold=False)
# Add bottom border to header paragraph
pPr = hp._element.find(qn('w:pPr'))
if pPr is None:
pPr = parse_xml(f'<w:pPr {nsdecls("w")}></w:pPr>')
hp._element.insert(0, pPr)
pBdr = parse_xml(
f'<w:pBdr {nsdecls("w")}>'
'<w:bottom w:val="single" w:sz="6" w:space="1" w:color="000000"/>'
'</w:pBdr>'
)
pPr.append(pBdr)
# Footer with page number
footer = section.footer
footer.is_linked_to_previous = False
fp = footer.paragraphs[0]
fp.alignment = WD_ALIGN_PARAGRAPH.CENTER
# Add page number field
run = fp.add_run()
fldChar1 = parse_xml(f'<w:fldChar {nsdecls("w")} w:fldCharType="begin"/>')
run._element.append(fldChar1)
run2 = fp.add_run()
instrText = parse_xml(f'<w:instrText {nsdecls("w")} xml:space="preserve"> PAGE </w:instrText>')
run2._element.append(instrText)
run3 = fp.add_run()
fldChar2 = parse_xml(f'<w:fldChar {nsdecls("w")} w:fldCharType="end"/>')
run3._element.append(fldChar2)
set_run_font(run, '宋体', 'Times New Roman', Pt(9))
set_run_font(run2, '宋体', 'Times New Roman', Pt(9))
set_run_font(run3, '宋体', 'Times New Roman', Pt(9))
def add_cover_page(doc):
"""Add the thesis cover page."""
# Blank lines for spacing
for _ in range(3):
p = doc.add_paragraph()
set_paragraph_spacing(p, line_spacing=360)
# University name
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = p.add_run('大连科技学院')
set_run_font(run, '黑体', 'Times New Roman', Pt(26), bold=True)
set_paragraph_spacing(p, line_spacing=360)
# Thesis type
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = p.add_run('毕业设计(论文)')
set_run_font(run, '黑体', 'Times New Roman', Pt(26), bold=True)
set_paragraph_spacing(p, line_spacing=360, after=600)
# Title
for _ in range(2):
p = doc.add_paragraph()
set_paragraph_spacing(p, line_spacing=360)
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = p.add_run('论文题目基于Spring Boot的养老院管理系统的设计与实现')
set_run_font(run, '黑体', 'Times New Roman', Pt(16), bold=True)
set_paragraph_spacing(p, line_spacing=480)
# Blank lines
for _ in range(4):
p = doc.add_paragraph()
set_paragraph_spacing(p, line_spacing=360)
# Info fields
info_fields = [
('学 院:', '网络与通信学院'),
('专 业:', '网络工程'),
('学 号:', ' '),
('学生姓名:', ' '),
('指导教师:', ' '),
]
for label, value in info_fields:
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = p.add_run(label)
set_run_font(run, '宋体', 'Times New Roman', Pt(14), bold=False)
run2 = p.add_run(value)
set_run_font(run2, '宋体', 'Times New Roman', Pt(14), bold=False)
# Add underline to value
run2.font.underline = True
set_paragraph_spacing(p, line_spacing=480)
# Page break
doc.add_page_break()
def parse_markdown_files():
"""Parse the 4 thesis markdown files and return structured content."""
files = [
os.path.join(THESIS_DIR, '论文.md'),
os.path.join(THESIS_DIR, 'chapter3.md'),
os.path.join(THESIS_DIR, 'chapter4.md'),
os.path.join(THESIS_DIR, 'chapter5_6_7.md'),
]
content = []
for f in files:
with open(f, 'r', encoding='utf-8') as fh:
content.append(fh.read())
return '\n\n'.join(content)
def process_markdown(doc, md_text):
"""Process markdown text and add to document with proper formatting."""
lines = md_text.split('\n')
i = 0
in_table = False
table_headers = []
table_rows = []
skip_toc = False
while i < len(lines):
line = lines[i].rstrip()
# Skip empty lines
if not line.strip():
i += 1
continue
# Skip the TOC section
if line.strip() == '## 目录':
skip_toc = True
i += 1
continue
if skip_toc:
if line.startswith('# ') or line.startswith('## 摘要'):
skip_toc = False
else:
i += 1
continue
# Main title (skip - already on cover page)
if line.startswith('# 基于') or line.startswith('# 第'):
text = line.lstrip('# ').strip()
if '' in text and '' in text:
add_heading_chapter(doc, text)
i += 1
continue
# 摘要 / Abstract title
if line.strip() == '## 摘要':
add_title(doc, '摘 要')
i += 1
continue
if line.strip() == '## Abstract':
doc.add_page_break()
add_title(doc, 'Abstract')
i += 1
continue
# Keywords line
if line.startswith('关键词:') or line.startswith('关键词:'):
p = doc.add_paragraph()
run = p.add_run('关键词:')
set_run_font(run, '黑体', 'Times New Roman', Pt(12), bold=True)
run2 = p.add_run(line.split('', 1)[1] if '' in line else line.split(':', 1)[1])
set_run_font(run2, '宋体', 'Times New Roman', Pt(12))
set_paragraph_spacing(p, line_spacing=360)
i += 1
continue
if line.startswith('Keywords:') or line.startswith('Key words:'):
p = doc.add_paragraph()
run = p.add_run('Key words: ')
set_run_font(run, 'Times New Roman', 'Times New Roman', Pt(12), bold=True)
kw_text = line.split(':', 1)[1].strip() if ':' in line else ''
run2 = p.add_run(kw_text)
set_run_font(run2, 'Times New Roman', 'Times New Roman', Pt(12))
set_paragraph_spacing(p, line_spacing=360)
# Page break after English abstract keywords
doc.add_page_break()
i += 1
continue
# Section headings
if line.startswith('## '):
text = line[3:].strip()
# Check if it's a special section
if text in ['参考文献']:
doc.add_page_break()
add_heading_chapter(doc, text)
elif text in ['致谢']:
doc.add_page_break()
add_heading_chapter(doc, '致 谢')
else:
add_heading_section(doc, text)
i += 1
continue
# Subsection headings
if line.startswith('### '):
text = line[4:].strip()
add_heading_subsection(doc, text)
i += 1
continue
# Image
img_match = re.match(r'!\[(.+?)\]\((.+?)\)', line)
if img_match:
alt_text = img_match.group(1)
img_path = img_match.group(2)
full_path = os.path.join(THESIS_DIR, img_path)
add_image(doc, full_path)
i += 1
continue
# Figure/table caption (line like "图4.1 xxx" or "表4.1 xxx")
if re.match(r'^(图|表)\d+\.\d+', line.strip()):
add_caption(doc, line.strip())
i += 1
continue
# Table detection
if '|' in line and line.strip().startswith('|'):
# Parse table
if not in_table:
in_table = True
# Parse header
cells = [c.strip() for c in line.strip().strip('|').split('|')]
table_headers = cells
i += 1
# Skip separator line
if i < len(lines) and '---' in lines[i]:
i += 1
table_rows = []
continue
else:
cells = [c.strip() for c in line.strip().strip('|').split('|')]
table_rows.append(cells)
i += 1
# Check if next line is still table
if i >= len(lines) or not lines[i].strip().startswith('|'):
in_table = False
add_table_from_md(doc, table_headers, table_rows)
table_headers = []
table_rows = []
continue
# Reference items [1], [2], etc.
ref_match = re.match(r'^\[(\d+)\]\s*(.+)', line.strip())
if ref_match:
p = doc.add_paragraph()
run = p.add_run(line.strip())
set_run_font(run, '宋体', 'Times New Roman', Pt(10.5))
set_paragraph_spacing(p, line_spacing=360)
i += 1
continue
# Numbered items like 1, 2
num_match = re.match(r'^\d+', line.strip())
if num_match:
add_body_paragraph(doc, line.strip(), first_line_indent=True)
i += 1
continue
# Normal body text
if line.strip():
add_body_paragraph(doc, line.strip(), first_line_indent=True)
i += 1
def main():
doc = Document()
# Setup page
setup_page(doc)
# Cover page
add_cover_page(doc)
# Parse and process markdown
md_text = parse_markdown_files()
process_markdown(doc, md_text)
# Save
output_path = os.path.join(THESIS_DIR, '基于Spring Boot的养老院管理系统的设计与实现.docx')
doc.save(output_path)
print(f'Thesis saved to: {output_path}')
print(f'File size: {os.path.getsize(output_path) / 1024:.1f} KB')
if __name__ == '__main__':
main()