2025-12-11 14:32:31 +08:00
|
|
|
"""
|
|
|
|
|
将 Markdown 文件转换为 DOCX 格式
|
|
|
|
|
使用 python-docx 库生成格式化的 Word 文档
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
from docx import Document
|
|
|
|
|
from docx.shared import Pt, RGBColor, Inches
|
|
|
|
|
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
|
|
|
|
from docx.oxml.ns import qn
|
|
|
|
|
import re
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_markdown_to_docx(md_file: str, docx_file: str):
|
|
|
|
|
"""将 Markdown 文件转换为 DOCX 文档"""
|
|
|
|
|
|
|
|
|
|
# 创建文档
|
|
|
|
|
doc = Document()
|
|
|
|
|
|
|
|
|
|
# 设置中文字体
|
|
|
|
|
doc.styles['Normal'].font.name = '宋体'
|
|
|
|
|
doc.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
|
|
|
|
|
doc.styles['Normal'].font.size = Pt(10.5)
|
|
|
|
|
|
|
|
|
|
# 读取 Markdown 文件
|
|
|
|
|
with open(md_file, 'r', encoding='utf-8') as f:
|
|
|
|
|
lines = f.readlines()
|
|
|
|
|
|
|
|
|
|
# 解析并转换
|
|
|
|
|
i = 0
|
|
|
|
|
in_code_block = False
|
|
|
|
|
code_lines = []
|
|
|
|
|
in_table = False
|
|
|
|
|
table_lines = []
|
|
|
|
|
|
|
|
|
|
while i < len(lines):
|
|
|
|
|
line = lines[i].rstrip()
|
|
|
|
|
|
|
|
|
|
# 代码块处理
|
|
|
|
|
if line.startswith('```'):
|
|
|
|
|
if not in_code_block:
|
|
|
|
|
in_code_block = True
|
|
|
|
|
code_lines = []
|
|
|
|
|
else:
|
|
|
|
|
# 结束代码块
|
|
|
|
|
in_code_block = False
|
|
|
|
|
if code_lines:
|
|
|
|
|
p = doc.add_paragraph()
|
|
|
|
|
p.style = 'Normal'
|
|
|
|
|
run = p.add_run('\n'.join(code_lines))
|
|
|
|
|
run.font.name = 'Consolas'
|
|
|
|
|
run.font.size = Pt(9)
|
|
|
|
|
run.font.color.rgb = RGBColor(0, 0, 0)
|
|
|
|
|
# 设置背景色(浅灰色)
|
|
|
|
|
p.paragraph_format.left_indent = Inches(0.5)
|
|
|
|
|
p.paragraph_format.right_indent = Inches(0.5)
|
|
|
|
|
i += 1
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if in_code_block:
|
|
|
|
|
code_lines.append(line)
|
|
|
|
|
i += 1
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# 表格处理
|
|
|
|
|
if line.startswith('|') and '|' in line[1:]:
|
|
|
|
|
if not in_table:
|
|
|
|
|
in_table = True
|
|
|
|
|
table_lines = []
|
|
|
|
|
table_lines.append(line)
|
|
|
|
|
i += 1
|
|
|
|
|
# 检查下一行是否还是表格
|
|
|
|
|
if i < len(lines) and not lines[i].strip().startswith('|'):
|
|
|
|
|
# 表格结束,创建表格
|
|
|
|
|
create_table_from_markdown(doc, table_lines)
|
|
|
|
|
in_table = False
|
|
|
|
|
table_lines = []
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# 空行
|
|
|
|
|
if not line.strip():
|
|
|
|
|
doc.add_paragraph()
|
|
|
|
|
i += 1
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# 一级标题
|
|
|
|
|
if line.startswith('# '):
|
|
|
|
|
heading = doc.add_heading(line[2:], level=1)
|
|
|
|
|
heading.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
|
|
|
for run in heading.runs:
|
|
|
|
|
run.font.name = '黑体'
|
|
|
|
|
run._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体')
|
|
|
|
|
run.font.size = Pt(18)
|
|
|
|
|
run.font.color.rgb = RGBColor(0, 0, 0)
|
|
|
|
|
|
|
|
|
|
# 二级标题
|
|
|
|
|
elif line.startswith('## '):
|
|
|
|
|
heading = doc.add_heading(line[3:], level=2)
|
|
|
|
|
for run in heading.runs:
|
|
|
|
|
run.font.name = '黑体'
|
|
|
|
|
run._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体')
|
|
|
|
|
run.font.size = Pt(16)
|
|
|
|
|
run.font.color.rgb = RGBColor(0, 0, 128)
|
|
|
|
|
|
|
|
|
|
# 三级标题
|
|
|
|
|
elif line.startswith('### '):
|
|
|
|
|
heading = doc.add_heading(line[4:], level=3)
|
|
|
|
|
for run in heading.runs:
|
|
|
|
|
run.font.name = '黑体'
|
|
|
|
|
run._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体')
|
|
|
|
|
run.font.size = Pt(14)
|
|
|
|
|
run.font.color.rgb = RGBColor(0, 0, 128)
|
|
|
|
|
|
|
|
|
|
# 四级标题
|
|
|
|
|
elif line.startswith('#### '):
|
|
|
|
|
heading = doc.add_heading(line[5:], level=4)
|
|
|
|
|
for run in heading.runs:
|
|
|
|
|
run.font.name = '黑体'
|
|
|
|
|
run._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体')
|
|
|
|
|
run.font.size = Pt(12)
|
|
|
|
|
|
|
|
|
|
# 无序列表
|
|
|
|
|
elif line.startswith('- ') or line.startswith('* '):
|
|
|
|
|
text = line[2:]
|
|
|
|
|
# 处理加粗
|
|
|
|
|
text = process_bold_text(text)
|
|
|
|
|
p = doc.add_paragraph(style='List Bullet')
|
|
|
|
|
add_formatted_text(p, text)
|
|
|
|
|
|
|
|
|
|
# 有序列表
|
|
|
|
|
elif re.match(r'^\d+\.\s', line):
|
|
|
|
|
text = re.sub(r'^\d+\.\s', '', line)
|
|
|
|
|
text = process_bold_text(text)
|
|
|
|
|
p = doc.add_paragraph(style='List Number')
|
|
|
|
|
add_formatted_text(p, text)
|
|
|
|
|
|
|
|
|
|
# 分隔线
|
|
|
|
|
elif line.startswith('---'):
|
|
|
|
|
doc.add_paragraph('_' * 50)
|
|
|
|
|
|
|
|
|
|
# 普通段落
|
|
|
|
|
else:
|
|
|
|
|
text = process_bold_text(line)
|
|
|
|
|
p = doc.add_paragraph()
|
|
|
|
|
add_formatted_text(p, text)
|
|
|
|
|
|
|
|
|
|
i += 1
|
|
|
|
|
|
|
|
|
|
# 保存文档
|
|
|
|
|
doc.save(docx_file)
|
|
|
|
|
print(f"文档已生成: {docx_file}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_table_from_markdown(doc, table_lines):
|
|
|
|
|
"""从 Markdown 表格行创建 Word 表格"""
|
|
|
|
|
if len(table_lines) < 2:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# 解析表头
|
|
|
|
|
header_line = table_lines[0]
|
|
|
|
|
headers = [cell.strip() for cell in header_line.split('|')[1:-1]]
|
|
|
|
|
|
|
|
|
|
# 跳过分隔线
|
|
|
|
|
data_lines = table_lines[2:] if len(table_lines) > 2 else []
|
|
|
|
|
|
|
|
|
|
# 解析数据行
|
|
|
|
|
rows_data = []
|
|
|
|
|
for line in data_lines:
|
|
|
|
|
cells = [cell.strip() for cell in line.split('|')[1:-1]]
|
|
|
|
|
rows_data.append(cells)
|
|
|
|
|
|
|
|
|
|
# 创建表格
|
|
|
|
|
table = doc.add_table(rows=1 + len(rows_data), cols=len(headers))
|
|
|
|
|
table.style = 'Light Grid Accent 1'
|
|
|
|
|
|
|
|
|
|
# 填充表头
|
|
|
|
|
header_cells = table.rows[0].cells
|
|
|
|
|
for i, header in enumerate(headers):
|
|
|
|
|
header_cells[i].text = header
|
|
|
|
|
# 设置表头样式
|
|
|
|
|
for paragraph in header_cells[i].paragraphs:
|
|
|
|
|
for run in paragraph.runs:
|
|
|
|
|
run.font.bold = True
|
|
|
|
|
run.font.name = '黑体'
|
|
|
|
|
run._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体')
|
|
|
|
|
|
|
|
|
|
# 填充数据
|
|
|
|
|
for row_idx, row_data in enumerate(rows_data, start=1):
|
|
|
|
|
row_cells = table.rows[row_idx].cells
|
|
|
|
|
for col_idx, cell_data in enumerate(row_data):
|
|
|
|
|
# 处理特殊符号
|
|
|
|
|
cell_text = cell_data.replace('**', '').replace('`', '')
|
|
|
|
|
row_cells[col_idx].text = cell_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def process_bold_text(text):
|
|
|
|
|
"""处理加粗文本标记"""
|
|
|
|
|
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def add_formatted_text(paragraph, text):
|
|
|
|
|
"""添加格式化文本到段落"""
|
|
|
|
|
# 处理加粗 **text**
|
|
|
|
|
parts = re.split(r'(\*\*.*?\*\*)', text)
|
|
|
|
|
|
|
|
|
|
for part in parts:
|
|
|
|
|
if part.startswith('**') and part.endswith('**'):
|
|
|
|
|
# 加粗文本
|
|
|
|
|
run = paragraph.add_run(part[2:-2])
|
|
|
|
|
run.font.bold = True
|
|
|
|
|
run.font.name = '宋体'
|
|
|
|
|
run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
|
|
|
|
|
elif part.startswith('`') and part.endswith('`'):
|
|
|
|
|
# 代码文本
|
|
|
|
|
run = paragraph.add_run(part[1:-1])
|
|
|
|
|
run.font.name = 'Consolas'
|
|
|
|
|
run.font.size = Pt(9)
|
|
|
|
|
else:
|
|
|
|
|
# 普通文本
|
|
|
|
|
# 处理内联代码 `code`
|
|
|
|
|
code_parts = re.split(r'(`.*?`)', part)
|
|
|
|
|
for code_part in code_parts:
|
|
|
|
|
if code_part.startswith('`') and code_part.endswith('`'):
|
|
|
|
|
run = paragraph.add_run(code_part[1:-1])
|
|
|
|
|
run.font.name = 'Consolas'
|
|
|
|
|
run.font.size = Pt(9)
|
|
|
|
|
else:
|
|
|
|
|
# 处理表情符号和特殊字符
|
|
|
|
|
run = paragraph.add_run(code_part)
|
|
|
|
|
run.font.name = '宋体'
|
|
|
|
|
run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
# 转换文件
|
2025-12-19 10:18:51 +08:00
|
|
|
md_file = Path(__file__).parent / 'Docx报告生成器使用说明书V2.0.md'
|
|
|
|
|
docx_file = Path(__file__).parent / 'Docx报告生成器使用说明书V2.0.docx'
|
2025-12-11 14:32:31 +08:00
|
|
|
|
|
|
|
|
print(f"开始转换: {md_file.name}")
|
|
|
|
|
parse_markdown_to_docx(str(md_file), str(docx_file))
|
|
|
|
|
print(f"转换完成!")
|
|
|
|
|
print(f"输出文件: {docx_file}")
|