Files
nursing-home/example/format_thesis_docx.py
2026-03-01 01:13:16 +08:00

342 lines
11 KiB
Python

from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_BREAK
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from docx.shared import Cm, Pt, RGBColor
import re
SRC = r"/Users/apple/code/bs/mying/example/萌贝母婴商城毕业论文初稿-2026版.docx"
DST = r"/Users/apple/code/bs/mying/example/萌贝母婴商城毕业论文初稿-2026版-排版.docx"
def set_style_font(
style,
east_asia_font: str,
size_pt: float,
bold: bool | None = None,
west_font: str = "Times New Roman",
):
font = style.font
font.name = west_font
font.size = Pt(size_pt)
if bold is not None:
font.bold = bold
font.color.rgb = RGBColor(0, 0, 0)
rfonts = style.element.get_or_add_rPr().get_or_add_rFonts()
rfonts.set(qn("w:ascii"), west_font)
rfonts.set(qn("w:hAnsi"), west_font)
rfonts.set(qn("w:eastAsia"), east_asia_font)
def set_runs_font(
paragraph,
east_asia_font: str,
size_pt: float,
bold: bool | None = None,
west_font: str = "Times New Roman",
):
for run in paragraph.runs:
run.font.name = west_font
run.font.size = Pt(size_pt)
if bold is not None:
run.font.bold = bold
run.font.color.rgb = RGBColor(0, 0, 0)
rpr = run._element.get_or_add_rPr()
rfonts = rpr.get_or_add_rFonts()
rfonts.set(qn("w:ascii"), west_font)
rfonts.set(qn("w:hAnsi"), west_font)
rfonts.set(qn("w:eastAsia"), east_asia_font)
def set_runs_common(paragraph, italic: bool | None = None, color_black: bool = True):
for run in paragraph.runs:
if italic is not None:
run.font.italic = italic
if color_black:
run.font.color.rgb = RGBColor(0, 0, 0)
def is_numbered_paragraph(paragraph) -> bool:
ppr = paragraph._p.pPr
if ppr is None:
return False
return ppr.numPr is not None
def iter_table_paragraphs(table):
for row in table.rows:
for cell in row.cells:
for p in cell.paragraphs:
yield p
for t in cell.tables:
yield from iter_table_paragraphs(t)
def format_table_paragraph(p, bold: bool = False):
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
fmt = p.paragraph_format
fmt.line_spacing = 1.0
fmt.space_before = Pt(0)
fmt.space_after = Pt(0)
fmt.first_line_indent = Pt(0)
set_runs_font(p, "宋体", 10.5, bold=bold)
set_runs_common(p, italic=False, color_black=True)
def set_table_style_like_template(table):
tbl = table._tbl
tbl_pr = tbl.tblPr
if tbl_pr is None:
tbl_pr = OxmlElement("w:tblPr")
tbl.insert(0, tbl_pr)
tbl_style = tbl_pr.find(qn("w:tblStyle"))
if tbl_style is None:
tbl_style = OxmlElement("w:tblStyle")
tbl_pr.append(tbl_style)
tbl_style.set(qn("w:val"), "Table Grid")
tbl_w = tbl_pr.find(qn("w:tblW"))
if tbl_w is None:
tbl_w = OxmlElement("w:tblW")
tbl_pr.append(tbl_w)
tbl_w.set(qn("w:type"), "pct")
tbl_w.set(qn("w:w"), "4997")
tbl_jc = tbl_pr.find(qn("w:jc"))
if tbl_jc is None:
tbl_jc = OxmlElement("w:jc")
tbl_pr.append(tbl_jc)
tbl_jc.set(qn("w:val"), "center")
tbl_cell_mar = tbl_pr.find(qn("w:tblCellMar"))
if tbl_cell_mar is None:
tbl_cell_mar = OxmlElement("w:tblCellMar")
tbl_pr.append(tbl_cell_mar)
for edge, width in (("top", "120"), ("bottom", "120"), ("left", "140"), ("right", "140")):
elem = tbl_cell_mar.find(qn(f"w:{edge}"))
if elem is None:
elem = OxmlElement(f"w:{edge}")
tbl_cell_mar.append(elem)
elem.set(qn("w:w"), width)
elem.set(qn("w:type"), "dxa")
tbl_borders = tbl_pr.find(qn("w:tblBorders"))
if tbl_borders is None:
tbl_borders = OxmlElement("w:tblBorders")
tbl_pr.append(tbl_borders)
for edge in ("top", "left", "bottom", "right", "insideH", "insideV"):
elem = tbl_borders.find(qn(f"w:{edge}"))
if elem is None:
elem = OxmlElement(f"w:{edge}")
tbl_borders.append(elem)
elem.set(qn("w:val"), "single")
elem.set(qn("w:sz"), "4")
elem.set(qn("w:color"), "auto")
elem.set(qn("w:space"), "0")
for row in table.rows:
tr_pr = row._tr.get_or_add_trPr()
tr_height = tr_pr.find(qn("w:trHeight"))
if tr_height is None:
tr_height = OxmlElement("w:trHeight")
tr_pr.append(tr_height)
tr_height.set(qn("w:val"), "620")
tr_height.set(qn("w:hRule"), "atLeast")
for cell in row.cells:
tc_pr = cell._tc.get_or_add_tcPr()
v_align = tc_pr.find(qn("w:vAlign"))
if v_align is None:
v_align = OxmlElement("w:vAlign")
tc_pr.append(v_align)
v_align.set(qn("w:val"), "center")
tc_borders = tc_pr.find(qn("w:tcBorders"))
if tc_borders is None:
tc_borders = OxmlElement("w:tcBorders")
tc_pr.append(tc_borders)
for edge in ("top", "left", "bottom", "right"):
elem = tc_borders.find(qn(f"w:{edge}"))
if elem is None:
elem = OxmlElement(f"w:{edge}")
tc_borders.append(elem)
elem.set(qn("w:val"), "single")
elem.set(qn("w:sz"), "4")
elem.set(qn("w:color"), "auto")
elem.set(qn("w:space"), "0")
def set_table_header_gray(table):
if not table.rows:
return
for cell in table.rows[0].cells:
tc_pr = cell._tc.get_or_add_tcPr()
shd = tc_pr.find(qn("w:shd"))
if shd is None:
shd = OxmlElement("w:shd")
tc_pr.append(shd)
shd.set(qn("w:val"), "clear")
shd.set(qn("w:color"), "auto")
shd.set(qn("w:fill"), "D9D9D9")
def cleanup_paragraph_spaces(paragraph):
runs = paragraph.runs
if not runs:
return
for run in runs:
if run.text:
run.text = re.sub(r"[ \t]{2,}", " ", run.text)
runs[0].text = runs[0].text.lstrip(" \t\u3000")
runs[-1].text = runs[-1].text.rstrip(" \t\u3000")
def remove_redundant_blank_paragraphs(doc):
prev_blank = False
for p in list(doc.paragraphs):
text = p.text.replace("\u3000", " ").strip()
is_blank = text == ""
if is_blank and prev_blank:
p._element.getparent().remove(p._element)
continue
prev_blank = is_blank
def add_page_break_between_chapters(doc):
chapter_pattern = re.compile(r"^第\s*\d+\s*章")
chapter_paragraphs = []
for p in list(doc.paragraphs):
text = p.text.replace("\u3000", " ").strip()
if not text or not chapter_pattern.match(text):
continue
chapter_paragraphs.append(p)
for index, p in enumerate(chapter_paragraphs):
if index == 0:
continue
prev = p._element.getprevious()
has_page_break = False
if prev is not None:
for br in prev.findall('.//w:br', {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}):
if br.attrib.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type') == 'page':
has_page_break = True
break
if not has_page_break:
break_paragraph = p.insert_paragraph_before("")
break_paragraph.add_run().add_break(WD_BREAK.PAGE)
def set_first_line_two_chars(paragraph, twips: int = 420, chars: int = 200):
ppr = paragraph._p.get_or_add_pPr()
ind = ppr.find(qn("w:ind"))
if ind is None:
ind = OxmlElement("w:ind")
ppr.append(ind)
ind.set(qn("w:firstLine"), str(twips))
ind.set(qn("w:firstLineChars"), str(chars))
def apply_para_format(paragraph, line_spacing: float, first_line_pt: float | None = None, align=None):
fmt = paragraph.paragraph_format
fmt.line_spacing = line_spacing
fmt.space_before = Pt(0)
fmt.space_after = Pt(0)
if first_line_pt is not None:
fmt.first_line_indent = Pt(first_line_pt)
set_first_line_two_chars(paragraph)
if align is not None:
paragraph.alignment = align
def format_paragraph(p):
style_name = p.style.name if p.style is not None else ""
if style_name == "Heading 1":
apply_para_format(p, 1.5, 0, WD_ALIGN_PARAGRAPH.CENTER)
set_runs_font(p, "黑体", 22, True)
elif style_name == "Heading 2":
apply_para_format(p, 1.5, 32)
set_runs_font(p, "黑体", 16, True)
elif style_name == "Heading 3":
apply_para_format(p, 1.5, 28)
set_runs_font(p, "黑体", 14, True)
elif style_name == "Heading 4":
apply_para_format(p, 1.5, 24)
set_runs_font(p, "黑体", 14, True)
set_runs_common(p, italic=False, color_black=True)
elif is_numbered_paragraph(p) or style_name.startswith("List Number"):
p.paragraph_format.line_spacing = 1.5
set_runs_font(p, "宋体", 12)
set_runs_common(p, color_black=True)
else:
apply_para_format(p, 1.5, 24)
set_runs_font(p, "宋体", 10.5)
set_runs_common(p, color_black=True)
def set_page_layout(doc):
for section in doc.sections:
section.page_width = Cm(21.0)
section.page_height = Cm(29.7)
section.top_margin = Cm(2.5)
section.bottom_margin = Cm(2.5)
section.left_margin = Cm(2.5)
section.right_margin = Cm(2.5)
section.header_distance = Cm(1.5)
section.footer_distance = Cm(1.75)
def main():
doc = Document(SRC)
normal = doc.styles["Normal"]
h1 = doc.styles["Heading 1"]
h2 = doc.styles["Heading 2"]
h3 = doc.styles["Heading 3"]
h4 = doc.styles["Heading 4"]
set_style_font(normal, "宋体", 10.5)
normal.paragraph_format.line_spacing = 1.5
normal.paragraph_format.first_line_indent = Pt(21)
set_style_font(h1, "黑体", 22, True)
h1.paragraph_format.line_spacing = 1.5
h1.paragraph_format.first_line_indent = Pt(0)
set_style_font(h2, "黑体", 16, True)
h2.paragraph_format.line_spacing = 1.5
h2.paragraph_format.first_line_indent = Pt(32)
set_style_font(h3, "黑体", 14, True)
h3.paragraph_format.line_spacing = 1.5
h3.paragraph_format.first_line_indent = Pt(28)
set_style_font(h4, "黑体", 14, True)
h4.font.italic = False
h4.paragraph_format.line_spacing = 1.5
h4.paragraph_format.first_line_indent = Pt(24)
set_page_layout(doc)
for p in doc.paragraphs:
format_paragraph(p)
cleanup_paragraph_spaces(p)
for t in doc.tables:
set_table_style_like_template(t)
set_table_header_gray(t)
for row_index, row in enumerate(t.rows):
for cell in row.cells:
for p in cell.paragraphs:
format_table_paragraph(p, bold=(row_index == 0))
cleanup_paragraph_spaces(p)
remove_redundant_blank_paragraphs(doc)
add_page_break_between_chapters(doc)
doc.save(DST)
print(DST)
if __name__ == "__main__":
main()