#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 将Markdown文档转换为Word格式 """ from docx import Document from docx.shared import Pt, Inches, RGBColor from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.enum.style import WD_STYLE_TYPE import re import sys def create_styled_document(): """创建带样式的Word文档""" doc = Document() # 设置默认字体 style = doc.styles['Normal'] font = style.font font.name = 'Microsoft YaHei' font.size = Pt(11) # 标题1样式 heading1 = doc.styles['Heading 1'] heading1.font.name = 'Microsoft YaHei' heading1.font.size = Pt(20) heading1.font.bold = True heading1.font.color.rgb = RGBColor(0, 0, 128) # 标题2样式 heading2 = doc.styles['Heading 2'] heading2.font.name = 'Microsoft YaHei' heading2.font.size = Pt(16) heading2.font.bold = True heading2.font.color.rgb = RGBColor(0, 64, 128) # 标题3样式 heading3 = doc.styles['Heading 3'] heading3.font.name = 'Microsoft YaHei' heading3.font.size = Pt(14) heading3.font.bold = True heading3.font.color.rgb = RGBColor(0, 96, 128) return doc def parse_markdown_line(line): """解析Markdown行""" line = line.rstrip() # 标题 if line.startswith('# '): return ('heading1', line[2:]) elif line.startswith('## '): return ('heading2', line[3:]) elif line.startswith('### '): return ('heading3', line[4:]) elif line.startswith('#### '): return ('heading4', line[5:]) # 列表 elif line.startswith('- ') or line.startswith('* '): return ('list', line[2:]) elif re.match(r'^\d+\.\s', line): return ('numbered_list', re.sub(r'^\d+\.\s', '', line)) # 代码块 elif line.startswith('```'): return ('code_block', line[3:]) # 表格 elif line.startswith('|'): return ('table', line) # 分隔线 elif line.strip() in ['---', '***', '___']: return ('separator', '') # 空行 elif not line.strip(): return ('empty', '') # 普通段落 else: return ('paragraph', line) def add_formatted_text(paragraph, text): """添加格式化文本(支持粗体、斜体等)""" # 处理粗体 **text** parts = re.split(r'(\*\*.*?\*\*)', text) for part in parts: if part.startswith('**') and part.endswith('**'): run = paragraph.add_run(part[2:-2]) run.bold = True elif part.startswith('`') and part.endswith('`'): run = paragraph.add_run(part[1:-1]) run.font.name = 'Consolas' run.font.size = Pt(10) else: paragraph.add_run(part) def convert_markdown_to_word(md_file, output_file): """转换Markdown文件为Word文档""" doc = create_styled_document() with open(md_file, 'r', encoding='utf-8') as f: lines = f.readlines() in_code_block = False code_lines = [] table_lines = [] in_table = False for line in lines: line_type, content = parse_markdown_line(line) # 处理代码块 if line_type == 'code_block': if in_code_block: # 结束代码块 if code_lines: p = doc.add_paragraph('\n'.join(code_lines)) p.style = 'Normal' p_format = p.paragraph_format p_format.left_indent = Inches(0.5) for run in p.runs: run.font.name = 'Consolas' run.font.size = Pt(9) code_lines = [] in_code_block = False else: # 开始代码块 in_code_block = True continue if in_code_block: code_lines.append(line.rstrip()) continue # 处理表格 if line_type == 'table': if not in_table: in_table = True table_lines = [] table_lines.append(content) continue else: if in_table: # 结束表格,创建表格 if len(table_lines) > 2: # 至少有标题行和分隔行 rows = [] for tline in table_lines: cells = [c.strip() for c in tline.split('|')[1:-1]] if cells and not all(c.startswith('-') for c in cells): rows.append(cells) if rows: table = doc.add_table(rows=len(rows), cols=len(rows[0])) table.style = 'Light Grid Accent 1' for i, row_data in enumerate(rows): for j, cell_data in enumerate(row_data): cell = table.rows[i].cells[j] cell.text = cell_data if i == 0: # 标题行 cell.paragraphs[0].runs[0].bold = True table_lines = [] in_table = False # 处理其他类型 if line_type == 'heading1': doc.add_heading(content, level=1) elif line_type == 'heading2': doc.add_heading(content, level=2) elif line_type == 'heading3': doc.add_heading(content, level=3) elif line_type == 'heading4': doc.add_heading(content, level=3) elif line_type == 'list': p = doc.add_paragraph(content, style='List Bullet') elif line_type == 'numbered_list': p = doc.add_paragraph(content, style='List Number') elif line_type == 'separator': doc.add_paragraph('_' * 80) elif line_type == 'empty': doc.add_paragraph() elif line_type == 'paragraph': p = doc.add_paragraph() add_formatted_text(p, content) # 保存文档 doc.save(output_file) print(f"✅ 已生成Word文档: {output_file}") if __name__ == '__main__': if len(sys.argv) < 3: print("用法: python3 convert_to_word.py ") sys.exit(1) md_file = sys.argv[1] output_file = sys.argv[2] convert_markdown_to_word(md_file, output_file)