from docx import Document from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_BREAK from docx.oxml import OxmlElement from docx.oxml.ns import qn from docx.shared import Cm, Pt, RGBColor import re SRC = r"/Users/apple/code/bs/mying/example/萌贝母婴商城毕业论文初稿-2026版.docx" DST = r"/Users/apple/code/bs/mying/example/萌贝母婴商城毕业论文初稿-2026版-排版.docx" def set_style_font( style, east_asia_font: str, size_pt: float, bold: bool | None = None, west_font: str = "Times New Roman", ): font = style.font font.name = west_font font.size = Pt(size_pt) if bold is not None: font.bold = bold font.color.rgb = RGBColor(0, 0, 0) rfonts = style.element.get_or_add_rPr().get_or_add_rFonts() rfonts.set(qn("w:ascii"), west_font) rfonts.set(qn("w:hAnsi"), west_font) rfonts.set(qn("w:eastAsia"), east_asia_font) def set_runs_font( paragraph, east_asia_font: str, size_pt: float, bold: bool | None = None, west_font: str = "Times New Roman", ): for run in paragraph.runs: run.font.name = west_font run.font.size = Pt(size_pt) if bold is not None: run.font.bold = bold run.font.color.rgb = RGBColor(0, 0, 0) rpr = run._element.get_or_add_rPr() rfonts = rpr.get_or_add_rFonts() rfonts.set(qn("w:ascii"), west_font) rfonts.set(qn("w:hAnsi"), west_font) rfonts.set(qn("w:eastAsia"), east_asia_font) def set_runs_common(paragraph, italic: bool | None = None, color_black: bool = True): for run in paragraph.runs: if italic is not None: run.font.italic = italic if color_black: run.font.color.rgb = RGBColor(0, 0, 0) def is_numbered_paragraph(paragraph) -> bool: ppr = paragraph._p.pPr if ppr is None: return False return ppr.numPr is not None def iter_table_paragraphs(table): for row in table.rows: for cell in row.cells: for p in cell.paragraphs: yield p for t in cell.tables: yield from iter_table_paragraphs(t) def format_table_paragraph(p, bold: bool = False): p.alignment = WD_ALIGN_PARAGRAPH.CENTER fmt = p.paragraph_format fmt.line_spacing = 1.0 fmt.space_before = Pt(0) fmt.space_after = Pt(0) fmt.first_line_indent = Pt(0) set_runs_font(p, "宋体", 10.5, bold=bold) set_runs_common(p, italic=False, color_black=True) def set_table_style_like_template(table): tbl = table._tbl tbl_pr = tbl.tblPr if tbl_pr is None: tbl_pr = OxmlElement("w:tblPr") tbl.insert(0, tbl_pr) tbl_style = tbl_pr.find(qn("w:tblStyle")) if tbl_style is None: tbl_style = OxmlElement("w:tblStyle") tbl_pr.append(tbl_style) tbl_style.set(qn("w:val"), "Table Grid") tbl_w = tbl_pr.find(qn("w:tblW")) if tbl_w is None: tbl_w = OxmlElement("w:tblW") tbl_pr.append(tbl_w) tbl_w.set(qn("w:type"), "pct") tbl_w.set(qn("w:w"), "4997") tbl_jc = tbl_pr.find(qn("w:jc")) if tbl_jc is None: tbl_jc = OxmlElement("w:jc") tbl_pr.append(tbl_jc) tbl_jc.set(qn("w:val"), "center") tbl_cell_mar = tbl_pr.find(qn("w:tblCellMar")) if tbl_cell_mar is None: tbl_cell_mar = OxmlElement("w:tblCellMar") tbl_pr.append(tbl_cell_mar) for edge, width in (("top", "120"), ("bottom", "120"), ("left", "140"), ("right", "140")): elem = tbl_cell_mar.find(qn(f"w:{edge}")) if elem is None: elem = OxmlElement(f"w:{edge}") tbl_cell_mar.append(elem) elem.set(qn("w:w"), width) elem.set(qn("w:type"), "dxa") tbl_borders = tbl_pr.find(qn("w:tblBorders")) if tbl_borders is None: tbl_borders = OxmlElement("w:tblBorders") tbl_pr.append(tbl_borders) for edge in ("top", "left", "bottom", "right", "insideH", "insideV"): elem = tbl_borders.find(qn(f"w:{edge}")) if elem is None: elem = OxmlElement(f"w:{edge}") tbl_borders.append(elem) elem.set(qn("w:val"), "single") elem.set(qn("w:sz"), "4") elem.set(qn("w:color"), "auto") elem.set(qn("w:space"), "0") for row in table.rows: tr_pr = row._tr.get_or_add_trPr() tr_height = tr_pr.find(qn("w:trHeight")) if tr_height is None: tr_height = OxmlElement("w:trHeight") tr_pr.append(tr_height) tr_height.set(qn("w:val"), "620") tr_height.set(qn("w:hRule"), "atLeast") for cell in row.cells: tc_pr = cell._tc.get_or_add_tcPr() v_align = tc_pr.find(qn("w:vAlign")) if v_align is None: v_align = OxmlElement("w:vAlign") tc_pr.append(v_align) v_align.set(qn("w:val"), "center") tc_borders = tc_pr.find(qn("w:tcBorders")) if tc_borders is None: tc_borders = OxmlElement("w:tcBorders") tc_pr.append(tc_borders) for edge in ("top", "left", "bottom", "right"): elem = tc_borders.find(qn(f"w:{edge}")) if elem is None: elem = OxmlElement(f"w:{edge}") tc_borders.append(elem) elem.set(qn("w:val"), "single") elem.set(qn("w:sz"), "4") elem.set(qn("w:color"), "auto") elem.set(qn("w:space"), "0") def set_table_header_gray(table): if not table.rows: return for cell in table.rows[0].cells: tc_pr = cell._tc.get_or_add_tcPr() shd = tc_pr.find(qn("w:shd")) if shd is None: shd = OxmlElement("w:shd") tc_pr.append(shd) shd.set(qn("w:val"), "clear") shd.set(qn("w:color"), "auto") shd.set(qn("w:fill"), "D9D9D9") def cleanup_paragraph_spaces(paragraph): runs = paragraph.runs if not runs: return for run in runs: if run.text: run.text = re.sub(r"[ \t]{2,}", " ", run.text) runs[0].text = runs[0].text.lstrip(" \t\u3000") runs[-1].text = runs[-1].text.rstrip(" \t\u3000") def remove_redundant_blank_paragraphs(doc): prev_blank = False for p in list(doc.paragraphs): text = p.text.replace("\u3000", " ").strip() is_blank = text == "" if is_blank and prev_blank: p._element.getparent().remove(p._element) continue prev_blank = is_blank def add_page_break_between_chapters(doc): chapter_pattern = re.compile(r"^第\s*\d+\s*章") chapter_paragraphs = [] for p in list(doc.paragraphs): text = p.text.replace("\u3000", " ").strip() if not text or not chapter_pattern.match(text): continue chapter_paragraphs.append(p) for index, p in enumerate(chapter_paragraphs): if index == 0: continue prev = p._element.getprevious() has_page_break = False if prev is not None: for br in prev.findall('.//w:br', {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}): if br.attrib.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type') == 'page': has_page_break = True break if not has_page_break: break_paragraph = p.insert_paragraph_before("") break_paragraph.add_run().add_break(WD_BREAK.PAGE) def set_first_line_two_chars(paragraph, twips: int = 420, chars: int = 200): ppr = paragraph._p.get_or_add_pPr() ind = ppr.find(qn("w:ind")) if ind is None: ind = OxmlElement("w:ind") ppr.append(ind) ind.set(qn("w:firstLine"), str(twips)) ind.set(qn("w:firstLineChars"), str(chars)) def apply_para_format(paragraph, line_spacing: float, first_line_pt: float | None = None, align=None): fmt = paragraph.paragraph_format fmt.line_spacing = line_spacing fmt.space_before = Pt(0) fmt.space_after = Pt(0) if first_line_pt is not None: fmt.first_line_indent = Pt(first_line_pt) set_first_line_two_chars(paragraph) if align is not None: paragraph.alignment = align def format_paragraph(p): style_name = p.style.name if p.style is not None else "" if style_name == "Heading 1": apply_para_format(p, 1.5, 0, WD_ALIGN_PARAGRAPH.CENTER) set_runs_font(p, "黑体", 22, True) elif style_name == "Heading 2": apply_para_format(p, 1.5, 32) set_runs_font(p, "黑体", 16, True) elif style_name == "Heading 3": apply_para_format(p, 1.5, 28) set_runs_font(p, "黑体", 14, True) elif style_name == "Heading 4": apply_para_format(p, 1.5, 24) set_runs_font(p, "黑体", 14, True) set_runs_common(p, italic=False, color_black=True) elif is_numbered_paragraph(p) or style_name.startswith("List Number"): p.paragraph_format.line_spacing = 1.5 set_runs_font(p, "宋体", 12) set_runs_common(p, color_black=True) else: apply_para_format(p, 1.5, 24) set_runs_font(p, "宋体", 10.5) set_runs_common(p, color_black=True) def set_page_layout(doc): for section in doc.sections: section.page_width = Cm(21.0) section.page_height = Cm(29.7) section.top_margin = Cm(2.5) section.bottom_margin = Cm(2.5) section.left_margin = Cm(2.5) section.right_margin = Cm(2.5) section.header_distance = Cm(1.5) section.footer_distance = Cm(1.75) def main(): doc = Document(SRC) normal = doc.styles["Normal"] h1 = doc.styles["Heading 1"] h2 = doc.styles["Heading 2"] h3 = doc.styles["Heading 3"] h4 = doc.styles["Heading 4"] set_style_font(normal, "宋体", 10.5) normal.paragraph_format.line_spacing = 1.5 normal.paragraph_format.first_line_indent = Pt(21) set_style_font(h1, "黑体", 22, True) h1.paragraph_format.line_spacing = 1.5 h1.paragraph_format.first_line_indent = Pt(0) set_style_font(h2, "黑体", 16, True) h2.paragraph_format.line_spacing = 1.5 h2.paragraph_format.first_line_indent = Pt(32) set_style_font(h3, "黑体", 14, True) h3.paragraph_format.line_spacing = 1.5 h3.paragraph_format.first_line_indent = Pt(28) set_style_font(h4, "黑体", 14, True) h4.font.italic = False h4.paragraph_format.line_spacing = 1.5 h4.paragraph_format.first_line_indent = Pt(24) set_page_layout(doc) for p in doc.paragraphs: format_paragraph(p) cleanup_paragraph_spaces(p) for t in doc.tables: set_table_style_like_template(t) set_table_header_gray(t) for row_index, row in enumerate(t.rows): for cell in row.cells: for p in cell.paragraphs: format_table_paragraph(p, bold=(row_index == 0)) cleanup_paragraph_spaces(p) remove_redundant_blank_paragraphs(doc) add_page_break_between_chapters(doc) doc.save(DST) print(DST) if __name__ == "__main__": main()