""" 将 Markdown 文件转换为 DOCX 格式 使用 python-docx 库生成格式化的 Word 文档 """ from docx import Document from docx.shared import Pt, RGBColor, Inches from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.oxml.ns import qn import re from pathlib import Path def parse_markdown_to_docx(md_file: str, docx_file: str): """将 Markdown 文件转换为 DOCX 文档""" # 创建文档 doc = Document() # 设置中文字体 doc.styles['Normal'].font.name = '宋体' doc.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体') doc.styles['Normal'].font.size = Pt(10.5) # 读取 Markdown 文件 with open(md_file, 'r', encoding='utf-8') as f: lines = f.readlines() # 解析并转换 i = 0 in_code_block = False code_lines = [] in_table = False table_lines = [] while i < len(lines): line = lines[i].rstrip() # 代码块处理 if line.startswith('```'): if not in_code_block: in_code_block = True code_lines = [] else: # 结束代码块 in_code_block = False if code_lines: p = doc.add_paragraph() p.style = 'Normal' run = p.add_run('\n'.join(code_lines)) run.font.name = 'Consolas' run.font.size = Pt(9) run.font.color.rgb = RGBColor(0, 0, 0) # 设置背景色(浅灰色) p.paragraph_format.left_indent = Inches(0.5) p.paragraph_format.right_indent = Inches(0.5) i += 1 continue if in_code_block: code_lines.append(line) i += 1 continue # 表格处理 if line.startswith('|') and '|' in line[1:]: if not in_table: in_table = True table_lines = [] table_lines.append(line) i += 1 # 检查下一行是否还是表格 if i < len(lines) and not lines[i].strip().startswith('|'): # 表格结束,创建表格 create_table_from_markdown(doc, table_lines) in_table = False table_lines = [] continue # 空行 if not line.strip(): doc.add_paragraph() i += 1 continue # 一级标题 if line.startswith('# '): heading = doc.add_heading(line[2:], level=1) heading.alignment = WD_ALIGN_PARAGRAPH.CENTER for run in heading.runs: run.font.name = '黑体' run._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体') run.font.size = Pt(18) run.font.color.rgb = RGBColor(0, 0, 0) # 二级标题 elif line.startswith('## '): heading = doc.add_heading(line[3:], level=2) for run in heading.runs: run.font.name = '黑体' run._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体') run.font.size = Pt(16) run.font.color.rgb = RGBColor(0, 0, 128) # 三级标题 elif line.startswith('### '): heading = doc.add_heading(line[4:], level=3) for run in heading.runs: run.font.name = '黑体' run._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体') run.font.size = Pt(14) run.font.color.rgb = RGBColor(0, 0, 128) # 四级标题 elif line.startswith('#### '): heading = doc.add_heading(line[5:], level=4) for run in heading.runs: run.font.name = '黑体' run._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体') run.font.size = Pt(12) # 无序列表 elif line.startswith('- ') or line.startswith('* '): text = line[2:] # 处理加粗 text = process_bold_text(text) p = doc.add_paragraph(style='List Bullet') add_formatted_text(p, text) # 有序列表 elif re.match(r'^\d+\.\s', line): text = re.sub(r'^\d+\.\s', '', line) text = process_bold_text(text) p = doc.add_paragraph(style='List Number') add_formatted_text(p, text) # 分隔线 elif line.startswith('---'): doc.add_paragraph('_' * 50) # 普通段落 else: text = process_bold_text(line) p = doc.add_paragraph() add_formatted_text(p, text) i += 1 # 保存文档 doc.save(docx_file) print(f"文档已生成: {docx_file}") def create_table_from_markdown(doc, table_lines): """从 Markdown 表格行创建 Word 表格""" if len(table_lines) < 2: return # 解析表头 header_line = table_lines[0] headers = [cell.strip() for cell in header_line.split('|')[1:-1]] # 跳过分隔线 data_lines = table_lines[2:] if len(table_lines) > 2 else [] # 解析数据行 rows_data = [] for line in data_lines: cells = [cell.strip() for cell in line.split('|')[1:-1]] rows_data.append(cells) # 创建表格 table = doc.add_table(rows=1 + len(rows_data), cols=len(headers)) table.style = 'Light Grid Accent 1' # 填充表头 header_cells = table.rows[0].cells for i, header in enumerate(headers): header_cells[i].text = header # 设置表头样式 for paragraph in header_cells[i].paragraphs: for run in paragraph.runs: run.font.bold = True run.font.name = '黑体' run._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体') # 填充数据 for row_idx, row_data in enumerate(rows_data, start=1): row_cells = table.rows[row_idx].cells for col_idx, cell_data in enumerate(row_data): # 处理特殊符号 cell_text = cell_data.replace('**', '').replace('`', '') row_cells[col_idx].text = cell_text def process_bold_text(text): """处理加粗文本标记""" return text def add_formatted_text(paragraph, text): """添加格式化文本到段落""" # 处理加粗 **text** parts = re.split(r'(\*\*.*?\*\*)', text) for part in parts: if part.startswith('**') and part.endswith('**'): # 加粗文本 run = paragraph.add_run(part[2:-2]) run.font.bold = True run.font.name = '宋体' run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体') elif part.startswith('`') and part.endswith('`'): # 代码文本 run = paragraph.add_run(part[1:-1]) run.font.name = 'Consolas' run.font.size = Pt(9) else: # 普通文本 # 处理内联代码 `code` code_parts = re.split(r'(`.*?`)', part) for code_part in code_parts: if code_part.startswith('`') and code_part.endswith('`'): run = paragraph.add_run(code_part[1:-1]) run.font.name = 'Consolas' run.font.size = Pt(9) else: # 处理表情符号和特殊字符 run = paragraph.add_run(code_part) run.font.name = '宋体' run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体') if __name__ == '__main__': # 转换文件 md_file = Path(__file__).parent / 'Docx报告生成器使用说明书V2.0.md' docx_file = Path(__file__).parent / 'Docx报告生成器使用说明书V2.0.docx' print(f"开始转换: {md_file.name}") parse_markdown_to_docx(str(md_file), str(docx_file)) print(f"转换完成!") print(f"输出文件: {docx_file}")