NAC_Blockchain/docs/reports/convert_to_word.py

204 lines
6.3 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
将Markdown文档转换为Word格式
"""
from docx import Document
from docx.shared import Pt, Inches, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.style import WD_STYLE_TYPE
import re
import sys
def create_styled_document():
"""创建带样式的Word文档"""
doc = Document()
# 设置默认字体
style = doc.styles['Normal']
font = style.font
font.name = 'Microsoft YaHei'
font.size = Pt(11)
# 标题1样式
heading1 = doc.styles['Heading 1']
heading1.font.name = 'Microsoft YaHei'
heading1.font.size = Pt(20)
heading1.font.bold = True
heading1.font.color.rgb = RGBColor(0, 0, 128)
# 标题2样式
heading2 = doc.styles['Heading 2']
heading2.font.name = 'Microsoft YaHei'
heading2.font.size = Pt(16)
heading2.font.bold = True
heading2.font.color.rgb = RGBColor(0, 64, 128)
# 标题3样式
heading3 = doc.styles['Heading 3']
heading3.font.name = 'Microsoft YaHei'
heading3.font.size = Pt(14)
heading3.font.bold = True
heading3.font.color.rgb = RGBColor(0, 96, 128)
return doc
def parse_markdown_line(line):
"""解析Markdown行"""
line = line.rstrip()
# 标题
if line.startswith('# '):
return ('heading1', line[2:])
elif line.startswith('## '):
return ('heading2', line[3:])
elif line.startswith('### '):
return ('heading3', line[4:])
elif line.startswith('#### '):
return ('heading4', line[5:])
# 列表
elif line.startswith('- ') or line.startswith('* '):
return ('list', line[2:])
elif re.match(r'^\d+\.\s', line):
return ('numbered_list', re.sub(r'^\d+\.\s', '', line))
# 代码块
elif line.startswith('```'):
return ('code_block', line[3:])
# 表格
elif line.startswith('|'):
return ('table', line)
# 分隔线
elif line.strip() in ['---', '***', '___']:
return ('separator', '')
# 空行
elif not line.strip():
return ('empty', '')
# 普通段落
else:
return ('paragraph', line)
def add_formatted_text(paragraph, text):
"""添加格式化文本(支持粗体、斜体等)"""
# 处理粗体 **text**
parts = re.split(r'(\*\*.*?\*\*)', text)
for part in parts:
if part.startswith('**') and part.endswith('**'):
run = paragraph.add_run(part[2:-2])
run.bold = True
elif part.startswith('`') and part.endswith('`'):
run = paragraph.add_run(part[1:-1])
run.font.name = 'Consolas'
run.font.size = Pt(10)
else:
paragraph.add_run(part)
def convert_markdown_to_word(md_file, output_file):
"""转换Markdown文件为Word文档"""
doc = create_styled_document()
with open(md_file, 'r', encoding='utf-8') as f:
lines = f.readlines()
in_code_block = False
code_lines = []
table_lines = []
in_table = False
for line in lines:
line_type, content = parse_markdown_line(line)
# 处理代码块
if line_type == 'code_block':
if in_code_block:
# 结束代码块
if code_lines:
p = doc.add_paragraph('\n'.join(code_lines))
p.style = 'Normal'
p_format = p.paragraph_format
p_format.left_indent = Inches(0.5)
for run in p.runs:
run.font.name = 'Consolas'
run.font.size = Pt(9)
code_lines = []
in_code_block = False
else:
# 开始代码块
in_code_block = True
continue
if in_code_block:
code_lines.append(line.rstrip())
continue
# 处理表格
if line_type == 'table':
if not in_table:
in_table = True
table_lines = []
table_lines.append(content)
continue
else:
if in_table:
# 结束表格,创建表格
if len(table_lines) > 2: # 至少有标题行和分隔行
rows = []
for tline in table_lines:
cells = [c.strip() for c in tline.split('|')[1:-1]]
if cells and not all(c.startswith('-') for c in cells):
rows.append(cells)
if rows:
table = doc.add_table(rows=len(rows), cols=len(rows[0]))
table.style = 'Light Grid Accent 1'
for i, row_data in enumerate(rows):
for j, cell_data in enumerate(row_data):
cell = table.rows[i].cells[j]
cell.text = cell_data
if i == 0: # 标题行
cell.paragraphs[0].runs[0].bold = True
table_lines = []
in_table = False
# 处理其他类型
if line_type == 'heading1':
doc.add_heading(content, level=1)
elif line_type == 'heading2':
doc.add_heading(content, level=2)
elif line_type == 'heading3':
doc.add_heading(content, level=3)
elif line_type == 'heading4':
doc.add_heading(content, level=3)
elif line_type == 'list':
p = doc.add_paragraph(content, style='List Bullet')
elif line_type == 'numbered_list':
p = doc.add_paragraph(content, style='List Number')
elif line_type == 'separator':
doc.add_paragraph('_' * 80)
elif line_type == 'empty':
doc.add_paragraph()
elif line_type == 'paragraph':
p = doc.add_paragraph()
add_formatted_text(p, content)
# 保存文档
doc.save(output_file)
print(f"✅ 已生成Word文档: {output_file}")
if __name__ == '__main__':
if len(sys.argv) < 3:
print("用法: python3 convert_to_word.py <input.md> <output.docx>")
sys.exit(1)
md_file = sys.argv[1]
output_file = sys.argv[2]
convert_markdown_to_word(md_file, output_file)