#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import re
import shutil
from pathlib import Path
import logging
# 设置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler("conversion.log"),
logging.StreamHandler()
]
)
logger = logging.getLogger("md-to-mdx")
class MarkdownToMDXConverter:
def __init__(self, backup=True):
self.backup = backup
self.conversion_count = 0
self.error_count = 0
self.base_output_dir = None
def process_directory(self, input_dir, output_dir=None, recursive=True):
"""处理指定目录中的所有Markdown文件"""
input_path = Path(input_dir)
if not input_path.exists():
logger.error(f"输入目录不存在: {input_dir}")
return
# 保存基础输出目录,用于构建子目录输出路径
if self.base_output_dir is None and output_dir:
self.base_output_dir = Path(output_dir)
self.base_input_dir = input_path
self.base_output_dir.mkdir(parents=True, exist_ok=True)
logger.info(f"创建基础输出目录: {self.base_output_dir}")
# 处理当前目录中的所有.md文件
for file in input_path.glob("*.md"):
# 计算相对于基础输入目录的路径
if self.base_output_dir:
rel_path = file.parent.relative_to(self.base_input_dir) if file.parent != self.base_input_dir else Path('')
target_dir = self.base_output_dir / rel_path
target_dir.mkdir(parents=True, exist_ok=True)
self._process_file(file, target_dir)
else:
# 如果没有基础输出目录,则就地处理
self._process_file(file, file.parent)
# 如果需要递归处理子目录
if recursive:
for subdir in [d for d in input_path.iterdir() if d.is_dir()]:
# 跳过output目录,避免重复处理
if subdir.name == "output" or subdir.name.startswith('.'):
continue
self.process_directory(subdir, output_dir, recursive)
def _process_file(self, file_path, output_dir):
"""处理单个Markdown文件"""
try:
logger.info(f"处理文件: {file_path}")
# 备份原始文件
if self.backup:
backup_file = str(file_path) + ".bak"
if not os.path.exists(backup_file):
shutil.copy2(file_path, backup_file)
logger.info(f"已创建备份: {backup_file}")
# 读取文件内容
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 执行转换
converted_content = self.convert_content(content)
# 确定输出文件路径
output_file = output_dir / (file_path.stem + ".mdx")
# 写入转换后的内容
with open(output_file, 'w', encoding='utf-8') as f:
f.write(converted_content)
logger.info(f"转换完成: {output_file}")
self.conversion_count += 1
except Exception as e:
logger.error(f"处理文件 {file_path} 时出错: {str(e)}")
self.error_count += 1
def convert_content(self, content):
"""将Gitbook Markdown内容转换为Mintlify MDX格式"""
# 1. 转换文档开头的h1元素为frontmatter
h1_pattern = re.compile(r'^#\s+(.+?)$', re.MULTILINE)
match = h1_pattern.search(content)
if match:
title = match.group(1).strip()
content = h1_pattern.sub(f'---\ntitle: {title}\n---\n', content, count=1)
# 2. 转换hint提示框
hint_pattern = re.compile(
r'{%\s*hint\s+style="(\w+)"\s*%}(.*?){%\s*endhint\s*%}',
re.DOTALL
)
def hint_replacer(match):
style = match.group(1)
text = match.group(2).strip()
component_name = style.capitalize() if style != "info" else "Info"
return f'<{component_name}>\n{text}\n{component_name}>'
content = hint_pattern.sub(hint_replacer, content)
# 3. 转换卡片链接
card_pattern = re.compile(
r'{%\s*content-ref\s+url="([^"]+)"\s*%}\s*\[([^\]]+)\]\(([^)]+)\)\s*{%\s*endcontent-ref\s*%}',
re.DOTALL
)
def card_replacer(match):
url = match.group(1)
title = match.group(2)
return f'
| {cell} | \n" mdx_table += "|
|---|---|
| \n {cell}\n | \n" else: # 普通文本单元格 mdx_table += f"{cell} | \n" mdx_table += "