204 lines
6.3 KiB
Python
204 lines
6.3 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
将Markdown文档转换为Word格式
|
|
"""
|
|
|
|
from docx import Document
|
|
from docx.shared import Pt, Inches, RGBColor
|
|
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
|
from docx.enum.style import WD_STYLE_TYPE
|
|
import re
|
|
import sys
|
|
|
|
def create_styled_document():
|
|
"""创建带样式的Word文档"""
|
|
doc = Document()
|
|
|
|
# 设置默认字体
|
|
style = doc.styles['Normal']
|
|
font = style.font
|
|
font.name = 'Microsoft YaHei'
|
|
font.size = Pt(11)
|
|
|
|
# 标题1样式
|
|
heading1 = doc.styles['Heading 1']
|
|
heading1.font.name = 'Microsoft YaHei'
|
|
heading1.font.size = Pt(20)
|
|
heading1.font.bold = True
|
|
heading1.font.color.rgb = RGBColor(0, 0, 128)
|
|
|
|
# 标题2样式
|
|
heading2 = doc.styles['Heading 2']
|
|
heading2.font.name = 'Microsoft YaHei'
|
|
heading2.font.size = Pt(16)
|
|
heading2.font.bold = True
|
|
heading2.font.color.rgb = RGBColor(0, 64, 128)
|
|
|
|
# 标题3样式
|
|
heading3 = doc.styles['Heading 3']
|
|
heading3.font.name = 'Microsoft YaHei'
|
|
heading3.font.size = Pt(14)
|
|
heading3.font.bold = True
|
|
heading3.font.color.rgb = RGBColor(0, 96, 128)
|
|
|
|
return doc
|
|
|
|
def parse_markdown_line(line):
|
|
"""解析Markdown行"""
|
|
line = line.rstrip()
|
|
|
|
# 标题
|
|
if line.startswith('# '):
|
|
return ('heading1', line[2:])
|
|
elif line.startswith('## '):
|
|
return ('heading2', line[3:])
|
|
elif line.startswith('### '):
|
|
return ('heading3', line[4:])
|
|
elif line.startswith('#### '):
|
|
return ('heading4', line[5:])
|
|
|
|
# 列表
|
|
elif line.startswith('- ') or line.startswith('* '):
|
|
return ('list', line[2:])
|
|
elif re.match(r'^\d+\.\s', line):
|
|
return ('numbered_list', re.sub(r'^\d+\.\s', '', line))
|
|
|
|
# 代码块
|
|
elif line.startswith('```'):
|
|
return ('code_block', line[3:])
|
|
|
|
# 表格
|
|
elif line.startswith('|'):
|
|
return ('table', line)
|
|
|
|
# 分隔线
|
|
elif line.strip() in ['---', '***', '___']:
|
|
return ('separator', '')
|
|
|
|
# 空行
|
|
elif not line.strip():
|
|
return ('empty', '')
|
|
|
|
# 普通段落
|
|
else:
|
|
return ('paragraph', line)
|
|
|
|
def add_formatted_text(paragraph, text):
|
|
"""添加格式化文本(支持粗体、斜体等)"""
|
|
# 处理粗体 **text**
|
|
parts = re.split(r'(\*\*.*?\*\*)', text)
|
|
for part in parts:
|
|
if part.startswith('**') and part.endswith('**'):
|
|
run = paragraph.add_run(part[2:-2])
|
|
run.bold = True
|
|
elif part.startswith('`') and part.endswith('`'):
|
|
run = paragraph.add_run(part[1:-1])
|
|
run.font.name = 'Consolas'
|
|
run.font.size = Pt(10)
|
|
else:
|
|
paragraph.add_run(part)
|
|
|
|
def convert_markdown_to_word(md_file, output_file):
|
|
"""转换Markdown文件为Word文档"""
|
|
doc = create_styled_document()
|
|
|
|
with open(md_file, 'r', encoding='utf-8') as f:
|
|
lines = f.readlines()
|
|
|
|
in_code_block = False
|
|
code_lines = []
|
|
table_lines = []
|
|
in_table = False
|
|
|
|
for line in lines:
|
|
line_type, content = parse_markdown_line(line)
|
|
|
|
# 处理代码块
|
|
if line_type == 'code_block':
|
|
if in_code_block:
|
|
# 结束代码块
|
|
if code_lines:
|
|
p = doc.add_paragraph('\n'.join(code_lines))
|
|
p.style = 'Normal'
|
|
p_format = p.paragraph_format
|
|
p_format.left_indent = Inches(0.5)
|
|
for run in p.runs:
|
|
run.font.name = 'Consolas'
|
|
run.font.size = Pt(9)
|
|
code_lines = []
|
|
in_code_block = False
|
|
else:
|
|
# 开始代码块
|
|
in_code_block = True
|
|
continue
|
|
|
|
if in_code_block:
|
|
code_lines.append(line.rstrip())
|
|
continue
|
|
|
|
# 处理表格
|
|
if line_type == 'table':
|
|
if not in_table:
|
|
in_table = True
|
|
table_lines = []
|
|
table_lines.append(content)
|
|
continue
|
|
else:
|
|
if in_table:
|
|
# 结束表格,创建表格
|
|
if len(table_lines) > 2: # 至少有标题行和分隔行
|
|
rows = []
|
|
for tline in table_lines:
|
|
cells = [c.strip() for c in tline.split('|')[1:-1]]
|
|
if cells and not all(c.startswith('-') for c in cells):
|
|
rows.append(cells)
|
|
|
|
if rows:
|
|
table = doc.add_table(rows=len(rows), cols=len(rows[0]))
|
|
table.style = 'Light Grid Accent 1'
|
|
|
|
for i, row_data in enumerate(rows):
|
|
for j, cell_data in enumerate(row_data):
|
|
cell = table.rows[i].cells[j]
|
|
cell.text = cell_data
|
|
if i == 0: # 标题行
|
|
cell.paragraphs[0].runs[0].bold = True
|
|
|
|
table_lines = []
|
|
in_table = False
|
|
|
|
# 处理其他类型
|
|
if line_type == 'heading1':
|
|
doc.add_heading(content, level=1)
|
|
elif line_type == 'heading2':
|
|
doc.add_heading(content, level=2)
|
|
elif line_type == 'heading3':
|
|
doc.add_heading(content, level=3)
|
|
elif line_type == 'heading4':
|
|
doc.add_heading(content, level=3)
|
|
elif line_type == 'list':
|
|
p = doc.add_paragraph(content, style='List Bullet')
|
|
elif line_type == 'numbered_list':
|
|
p = doc.add_paragraph(content, style='List Number')
|
|
elif line_type == 'separator':
|
|
doc.add_paragraph('_' * 80)
|
|
elif line_type == 'empty':
|
|
doc.add_paragraph()
|
|
elif line_type == 'paragraph':
|
|
p = doc.add_paragraph()
|
|
add_formatted_text(p, content)
|
|
|
|
# 保存文档
|
|
doc.save(output_file)
|
|
print(f"✅ 已生成Word文档: {output_file}")
|
|
|
|
if __name__ == '__main__':
|
|
if len(sys.argv) < 3:
|
|
print("用法: python3 convert_to_word.py <input.md> <output.docx>")
|
|
sys.exit(1)
|
|
|
|
md_file = sys.argv[1]
|
|
output_file = sys.argv[2]
|
|
convert_markdown_to_word(md_file, output_file)
|