diff --git a/example/format_thesis_docx.py b/example/format_thesis_docx.py new file mode 100644 index 0000000..14ec1d9 --- /dev/null +++ b/example/format_thesis_docx.py @@ -0,0 +1,197 @@ +from docx import Document +from docx.enum.text import WD_ALIGN_PARAGRAPH +from docx.oxml import OxmlElement +from docx.oxml.ns import qn +from docx.shared import Cm, Pt, RGBColor + + +SRC = r"/Users/apple/code/bs/mying/example/萌贝母婴商城毕业论文初稿-2026版.docx" +DST = r"/Users/apple/code/bs/mying/example/萌贝母婴商城毕业论文初稿-2026版-排版.docx" + + +def set_style_font( + style, + east_asia_font: str, + size_pt: float, + bold: bool | None = None, + west_font: str = "Times New Roman", +): + font = style.font + font.name = west_font + font.size = Pt(size_pt) + if bold is not None: + font.bold = bold + font.color.rgb = RGBColor(0, 0, 0) + rfonts = style.element.get_or_add_rPr().get_or_add_rFonts() + rfonts.set(qn("w:ascii"), west_font) + rfonts.set(qn("w:hAnsi"), west_font) + rfonts.set(qn("w:eastAsia"), east_asia_font) + + +def set_runs_font( + paragraph, + east_asia_font: str, + size_pt: float, + bold: bool | None = None, + west_font: str = "Times New Roman", +): + for run in paragraph.runs: + run.font.name = west_font + run.font.size = Pt(size_pt) + if bold is not None: + run.font.bold = bold + run.font.color.rgb = RGBColor(0, 0, 0) + rpr = run._element.get_or_add_rPr() + rfonts = rpr.get_or_add_rFonts() + rfonts.set(qn("w:ascii"), west_font) + rfonts.set(qn("w:hAnsi"), west_font) + rfonts.set(qn("w:eastAsia"), east_asia_font) + + +def set_runs_common(paragraph, italic: bool | None = None, color_black: bool = True): + for run in paragraph.runs: + if italic is not None: + run.font.italic = italic + if color_black: + run.font.color.rgb = RGBColor(0, 0, 0) + + +def is_numbered_paragraph(paragraph) -> bool: + ppr = paragraph._p.pPr + if ppr is None: + return False + return ppr.numPr is not None + + +def set_table_all_borders_black(table): + for row in table.rows: + for cell in row.cells: + tc = cell._tc + tc_pr = tc.get_or_add_tcPr() + tc_borders = tc_pr.find(qn("w:tcBorders")) + if tc_borders is None: + tc_borders = OxmlElement("w:tcBorders") + tc_pr.append(tc_borders) + for edge in ("top", "left", "bottom", "right", "insideH", "insideV"): + edge_tag = qn(f"w:{edge}") + elem = tc_borders.find(edge_tag) + if elem is None: + elem = OxmlElement(f"w:{edge}") + tc_borders.append(elem) + elem.set(qn("w:val"), "single") + elem.set(qn("w:sz"), "4") + elem.set(qn("w:color"), "000000") + elem.set(qn("w:space"), "0") +def iter_table_paragraphs(table): + for row in table.rows: + for cell in row.cells: + for p in cell.paragraphs: + yield p + for t in cell.tables: + yield from iter_table_paragraphs(t) + +def set_first_line_two_chars(paragraph, twips: int = 420, chars: int = 200): + ppr = paragraph._p.get_or_add_pPr() + ind = ppr.find(qn("w:ind")) + if ind is None: + ind = OxmlElement("w:ind") + ppr.append(ind) + ind.set(qn("w:firstLine"), str(twips)) + ind.set(qn("w:firstLineChars"), str(chars)) + +def format_paragraph(p): + style_name = p.style.name if p.style is not None else "" + if style_name == "Heading 1": + apply_para_format(p, 1.5, 0, WD_ALIGN_PARAGRAPH.CENTER) + set_runs_font(p, "黑体", 22, True) + elif style_name == "Heading 2": + apply_para_format(p, 1.5, 32) + set_runs_font(p, "黑体", 16, True) + elif style_name == "Heading 3": + apply_para_format(p, 1.5, 28) + set_runs_font(p, "宋体", 14, True) + elif style_name == "Heading 4": + apply_para_format(p, 1.5, 24) + set_runs_font(p, "宋体", 12, True) + set_runs_common(p, italic=False, color_black=True) + elif is_numbered_paragraph(p) or style_name.startswith("List Number"): + p.paragraph_format.line_spacing = 1.5 + set_runs_font(p, "宋体", 12) + set_runs_common(p, color_black=True) + else: + apply_para_format(p, 1.5, 24) + set_runs_font(p, "宋体", 10.5) + set_runs_common(p, color_black=True) + + +def apply_para_format(paragraph, line_spacing: float, first_line_pt: float | None = None, align=None): + fmt = paragraph.paragraph_format + fmt.line_spacing = line_spacing + fmt.space_before = Pt(0) + fmt.space_after = Pt(0) + if first_line_pt is not None: + fmt.first_line_indent = Pt(first_line_pt) + set_first_line_two_chars(paragraph) + + if align is not None: + paragraph.alignment = align +def set_page_layout(doc): + for section in doc.sections: + section.page_width = Cm(21.0) + section.page_height = Cm(29.7) + section.top_margin = Cm(2.5) + section.bottom_margin = Cm(2.5) + section.left_margin = Cm(2.5) + section.right_margin = Cm(2.5) + section.header_distance = Cm(1.5) + section.footer_distance = Cm(1.75) + +def main(): + doc = Document(SRC) + + normal = doc.styles["Normal"] + h1 = doc.styles["Heading 1"] + h2 = doc.styles["Heading 2"] + h3 = doc.styles["Heading 3"] + h4 = doc.styles["Heading 4"] + + # 正文:宋体小四,首行缩进2字符(约24pt),1.5倍行距 + set_style_font(normal, "宋体", 10.5) + normal.paragraph_format.line_spacing = 1.5 + normal.paragraph_format.first_line_indent = Pt(21) + + # 标题1:黑体二号,加粗,居中,1.5倍行距 + set_style_font(h1, "黑体", 22, True) + h1.paragraph_format.line_spacing = 1.5 + h1.paragraph_format.first_line_indent = Pt(0) + + # 标题2:黑体三号,加粗,首行缩进2字符,1.5倍行距 + set_style_font(h2, "黑体", 16, True) + h2.paragraph_format.line_spacing = 1.5 + h2.paragraph_format.first_line_indent = Pt(32) + + # 标题3:宋体四号,加粗,首行缩进2字符,1.5倍行距 + set_style_font(h3, "黑体", 14, True) + h3.paragraph_format.line_spacing = 1.5 + h3.paragraph_format.first_line_indent = Pt(28) + + # 标题4:加粗,取消斜体,黑色,1.5倍行距 + set_style_font(h4, "黑体", 14, True) + h4.font.italic = False + h4.paragraph_format.line_spacing = 1.5 + h4.paragraph_format.first_line_indent = Pt(24) + set_page_layout(doc) + for p in doc.paragraphs: + format_paragraph(p) + + for t in doc.tables: + set_table_all_borders_black(t) + for p in iter_table_paragraphs(t): + format_paragraph(p) + + doc.save(DST) + print(DST) + + +if __name__ == "__main__": + main() diff --git a/example/萌贝母婴商城毕业论文初稿-2026版-排版.docx b/example/萌贝母婴商城毕业论文初稿-2026版-排版.docx new file mode 100644 index 0000000..35970cc Binary files /dev/null and b/example/萌贝母婴商城毕业论文初稿-2026版-排版.docx differ