Feat: update jp docs

2026-03-27 13:28:32 +07:00 · 2025-03-21 20:15:22 +08:00
parent c4225ab529
commit 5e6924c7ec
427 changed files with 35687 additions and 5735 deletions
--- a/scripts/auto-url-check.py
+++ b/scripts/auto-url-check.py
@@ -0,0 +1,757 @@
+#!/usr/bin/env python3
+"""
+多线程版GitBook链接检查器
+
+此脚本使用多线程并行检查在线链接，大幅提高检查速度。
+生成两个报告文件：
+1. 包含所有链接的完整报告
+2. 仅包含错误链接的报告
+"""
+
+import os
+import re
+import sys
+import time
+import threading
+import queue
+from concurrent.futures import ThreadPoolExecutor
+from collections import defaultdict
+from urllib.parse import urlparse
+
+try:
+    import requests
+    from requests.exceptions import RequestException
+except ImportError:
+    print("正在安装requests库...")
+    import subprocess
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "requests"])
+    import requests
+    from requests.exceptions import RequestException
+
+class LinkChecker:
+    def __init__(self, summary_path, base_dir=None, verify_online=True, max_threads=10):
+        """
+        初始化链接检查器
+        
+        Args:
+            summary_path: SUMMARY.md文件路径
+            base_dir: 文档根目录，默认为SUMMARY.md所在目录
+            verify_online: 是否验证在线链接
+            max_threads: 最大线程数
+        """
+        self.summary_path = os.path.abspath(summary_path)
+        self.base_dir = base_dir or os.path.dirname(self.summary_path)
+        self.verify_online = verify_online
+        self.max_threads = max_threads
+        self.summary_links = []  # SUMMARY.md中的链接
+        self.md_links = defaultdict(list)  # 每个文档中引用的链接
+        self.processed_files = set()  # 已处理的文件
+        self.summary_content = ""  # SUMMARY.md的内容
+        self.invalid_links = []  # 存储所有无效链接
+        
+        # 图片文件扩展名
+        self.image_extensions = ('.png', '.jpg', '.jpeg', '.gif', '.svg', '.bmp', '.tiff', '.webp')
+        
+        # 在线链接缓存，避免重复检查
+        self.online_link_cache = {}
+        self.online_link_cache_lock = threading.Lock()  # 线程安全的缓存锁
+        
+        # 用于存储待检查的在线链接
+        self.online_links_queue = queue.Queue()
+        
+        # 进度统计
+        self.total_online_links = 0
+        self.checked_online_links = 0
+        self.progress_lock = threading.Lock()
+    
+    def is_image_link(self, link):
+        """
+        检查链接是否为图片链接
+        
+        Args:
+            link: 链接路径
+            
+        Returns:
+            is_image: 是否为图片链接
+        """
+        return link.lower().endswith(self.image_extensions)
+    
+    def check_online_link(self, url):
+        """
+        检查在线链接是否有效
+        
+        Args:
+            url: 在线链接URL
+            
+        Returns:
+            is_valid: 链接是否有效
+        """
+        # 如果已经检查过，直接返回缓存结果
+        with self.online_link_cache_lock:
+            if url in self.online_link_cache:
+                return self.online_link_cache[url]
+        
+        if not self.verify_online:
+            # 如果不验证在线链接，默认返回无效
+            with self.online_link_cache_lock:
+                self.online_link_cache[url] = False
+            return False
+        
+        try:
+            # 先尝试HEAD请求，速度更快
+            response = requests.head(
+                url, 
+                timeout=5,
+                allow_redirects=True,
+                headers={'User-Agent': 'Mozilla/5.0 GitBook-Link-Checker/1.0'}
+            )
+            
+            if response.status_code < 400:
+                # 状态码小于400，认为链接有效
+                with self.online_link_cache_lock:
+                    self.online_link_cache[url] = True
+                return True
+            
+            # HEAD请求失败，尝试GET请求
+            response = requests.get(
+                url, 
+                timeout=5,
+                allow_redirects=True,
+                headers={'User-Agent': 'Mozilla/5.0 GitBook-Link-Checker/1.0'}
+            )
+            
+            result = response.status_code < 400
+            with self.online_link_cache_lock:
+                self.online_link_cache[url] = result
+            return result
+            
+        except RequestException:
+            # 请求异常，链接无效
+            with self.online_link_cache_lock:
+                self.online_link_cache[url] = False
+            return False
+    
+    def resolve_path(self, link, current_dir):
+        """
+        解析链接的实际路径
+        
+        Args:
+            link: 链接路径
+            current_dir: 当前文件所在目录
+            
+        Returns:
+            resolved_path: 解析后的路径
+            is_external: 是否为外部链接
+            is_valid: 链接是否有效
+        """
+        if not link:
+            return None, False, False
+        
+        # 处理锚点链接
+        if '#' in link:
+            link_part = link.split('#')[0]
+            if not link_part:  # 如果只有锚点，没有路径部分
+                return None, False, True  # 假设内部锚点是有效的
+            link = link_part
+        
+        # 检查是否为图片链接
+        if self.is_image_link(link):
+            return None, False, True  # 跳过图片链接，并假设它们是有效的
+        
+        # 处理外部链接
+        if link.startswith(('http://', 'https://', 'mailto:', 'tel:')):
+            # 如果是http/https链接，加入待检查队列
+            if link.startswith(('http://', 'https://')) and self.verify_online:
+                # 将链接添加到待检查队列
+                self.online_links_queue.put(link)
+                with self.progress_lock:
+                    self.total_online_links += 1
+                
+                # 暂时返回未知状态，后续会更新
+                return link, True, None
+            elif link.startswith(('http://', 'https://')) and not self.verify_online:
+                # 如果不验证在线链接，标记为错误
+                return link, True, False
+            else:
+                # mailto和tel链接默认有效
+                return link, True, True
+        
+        # 处理绝对路径 (从文档根目录开始)
+        if link.startswith('/'):
+            resolved_path = os.path.normpath(os.path.join(self.base_dir, link.lstrip('/')))
+        # 处理相对路径 (从当前文件所在目录开始)
+        else:
+            resolved_path = os.path.normpath(os.path.join(current_dir, link))
+        
+        # 处理目录链接
+        if os.path.isdir(resolved_path):
+            readme_path = os.path.join(resolved_path, 'README.md')
+            if os.path.exists(readme_path):
+                return readme_path, False, True
+            index_path = os.path.join(resolved_path, 'index.md')
+            if os.path.exists(index_path):
+                return index_path, False, True
+            # 如果没有README.md或index.md，保持原样
+            return resolved_path, False, os.path.exists(resolved_path)
+        
+        # 处理不带扩展名的文件引用
+        if not os.path.exists(resolved_path) and '.' not in os.path.basename(resolved_path):
+            md_path = f"{resolved_path}.md"
+            if os.path.exists(md_path):
+                return md_path, False, True
+        
+        return resolved_path, False, os.path.exists(resolved_path)
+    
+    def online_link_worker(self):
+        """工作线程：处理在线链接检查"""
+        while True:
+            try:
+                # 从队列获取链接
+                url = self.online_links_queue.get(block=False)
+                
+                # 检查链接
+                is_valid = self.check_online_link(url)
+                
+                # 更新进度
+                with self.progress_lock:
+                    self.checked_online_links += 1
+                    checked = self.checked_online_links
+                    total = self.total_online_links
+                
+                # 显示进度
+                print(f"在线链接检查进度: [{checked}/{total}] - {url} - {'✅' if is_valid else '❌'}")
+                
+                # 标记任务完成
+                self.online_links_queue.task_done()
+            except queue.Empty:
+                # 队列为空，退出线程
+                break
+    
+    def extract_sections_from_summary(self):
+        """
+        从SUMMARY.md提取所有章节信息
+        
+        Returns:
+            sections: 章节列表
+        """
+        print(f"从 {self.summary_path} 提取章节信息...")
+        
+        try:
+            with open(self.summary_path, 'r', encoding='utf-8') as file:
+                self.summary_content = file.read()
+        except Exception as e:
+            print(f"读取文件时出错: {e}")
+            sys.exit(1)
+        
+        # 提取所有章节标题
+        sections = []
+        section_pattern = r'^#+\s+(.*?)(?:\s+<a.*?>)?$'
+        
+        for line in self.summary_content.split('\n'):
+            match = re.match(section_pattern, line)
+            if match:
+                section_title = match.group(1).strip()
+                sections.append(section_title)
+        
+        return sections
+    
+    def extract_links_from_summary(self):
+        """
+        从SUMMARY.md提取所有链接及其层级结构
+        
+        Returns:
+            links: 链接列表，每项包含链接信息和层级
+        """
+        print(f"从 {self.summary_path} 提取链接...")
+        
+        # 记录当前所在章节
+        current_section = ""
+        sections = self.extract_sections_from_summary()
+        
+        # 按行处理SUMMARY文件
+        links = []
+        
+        for line in self.summary_content.split('\n'):
+            # 检查是否是章节标题行
+            section_match = re.match(r'^#+\s+(.*?)(?:\s+<a.*?>)?$', line)
+            if section_match:
+                current_section = section_match.group(1).strip()
+                continue
+            
+            # 检查缩进级别
+            indent_match = re.match(r'^(\s*)\*', line)
+            if not indent_match:
+                continue
+            
+            indent = indent_match.group(1)
+            level = len(indent) // 2  # 假设每级缩进是2个空格
+            
+            # 提取链接
+            link_match = re.search(r'\[([^\]]+)\]\(([^)]+)\)', line)
+            if not link_match:
+                continue
+            
+            text, link = link_match.groups()
+            
+            # 跳过只有锚点的链接
+            if link.startswith('#'):
+                continue
+            
+            # 解析实际文件路径
+            file_path, is_external, is_valid = self.resolve_path(link, self.base_dir)
+            
+            # 添加链接
+            link_info = {
+                'text': text,
+                'link': link,
+                'file_path': file_path,
+                'exists': is_valid,
+                'level': level,
+                'section': current_section,
+                'is_external': is_external,
+                'children': [],  # 用于存储子链接
+                'source_file': 'SUMMARY.md'
+            }
+            
+            links.append(link_info)
+            
+            # 如果链接无效，添加到无效链接列表
+            if is_valid is False:  # 注意：is_valid可能为None（在线链接待检查）
+                self.invalid_links.append(link_info)
+        
+        # 构建层级结构
+        root_links = []
+        level_stack = [None]  # 用于跟踪每个级别的最后一个链接
+        
+        for link in links:
+            level = link['level']
+            
+            # 调整栈以匹配当前级别
+            while len(level_stack) > level + 1:
+                level_stack.pop()
+            
+            # 扩展栈以匹配当前级别
+            while len(level_stack) < level + 1:
+                level_stack.append(None)
+            
+            if level == 0:
+                # 顶级链接
+                root_links.append(link)
+            else:
+                # 子链接，添加到父链接的children列表中
+                parent = level_stack[level - 1]
+                if parent:
+                    parent['children'].append(link)
+            
+            # 更新当前级别的最后一个链接
+            level_stack[level] = link
+        
+        self.summary_links = root_links
+        return links
+    
+    def extract_links_from_markdown(self, file_path):
+        """
+        从Markdown文件中提取链接
+        
+        Args:
+            file_path: Markdown文件路径
+            
+        Returns:
+            links: 提取的链接列表
+        """
+        if not file_path or file_path in self.processed_files:
+            return []
+            
+        if not os.path.exists(file_path) or not file_path.endswith('.md'):
+            return []
+            
+        self.processed_files.add(file_path)
+        
+        try:
+            with open(file_path, 'r', encoding='utf-8') as file:
+                content = file.read()
+        except Exception as e:
+            print(f"读取文件 {file_path} 时出错: {e}")
+            return []
+        
+        # 提取链接
+        link_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
+        matches = re.findall(link_pattern, content)
+        
+        links = []
+        current_dir = os.path.dirname(file_path)
+        relative_source_path = os.path.relpath(file_path, self.base_dir)
+        
+        for text, link in matches:
+            # 检查是否为图片链接
+            if self.is_image_link(link):
+                continue
+            
+            # 解析链接
+            resolved_path, is_external, is_valid = self.resolve_path(link, current_dir)
+            
+            # 添加链接
+            link_info = {
+                'text': text,
+                'link': link,
+                'file_path': resolved_path,
+                'exists': is_valid,
+                'is_external': is_external,
+                'source_file': relative_source_path
+            }
+            
+            links.append(link_info)
+            
+            # 存储到字典中，以文件路径为键
+            if file_path not in self.md_links:
+                self.md_links[file_path] = []
+            self.md_links[file_path].append(link_info)
+            
+            # 如果链接无效，添加到无效链接列表
+            if is_valid is False:  # 注意：is_valid可能为None（在线链接待检查）
+                self.invalid_links.append(link_info)
+        
+        return links
+    
+    def check_links(self):
+        """
+        递归检查所有链接
+        """
+        # 提取SUMMARY中的链接
+        self.extract_links_from_summary()
+        
+        # 递归处理每个链接
+        def process_link(link):
+            if not link.get('is_external') and link.get('exists') and link.get('file_path') and link.get('file_path').endswith('.md'):
+                try:
+                    relative_path = os.path.relpath(link['file_path'], self.base_dir)
+                    print(f"检查文件: {relative_path}")
+                    self.extract_links_from_markdown(link['file_path'])
+                except Exception as e:
+                    print(f"处理文件 {link.get('file_path')} 时出错: {e}")
+            
+            # 递归处理子链接
+            for child in link.get('children', []):
+                process_link(child)
+        
+        # 处理所有顶级链接
+        for link in self.summary_links:
+            process_link(link)
+        
+        # 如果需要验证在线链接，启动多线程进行检查
+        if self.verify_online and self.total_online_links > 0:
+            self.check_online_links_with_threads()
+            
+            # 更新链接状态
+            self.update_link_statuses()
+    
+    def check_online_links_with_threads(self):
+        """使用多线程检查在线链接"""
+        print(f"\n开始使用多线程检查在线链接，共有 {self.total_online_links} 个链接...")
+        
+        # 创建线程池
+        num_threads = min(self.max_threads, self.total_online_links)
+        
+        with ThreadPoolExecutor(max_workers=num_threads) as executor:
+            # 提交任务
+            futures = [executor.submit(self.online_link_worker) for _ in range(num_threads)]
+            
+            # 等待队列任务完成
+            self.online_links_queue.join()
+            
+            print(f"所有在线链接检查完成，共 {self.total_online_links} 个")
+    
+    def update_link_statuses(self):
+        """根据检查结果更新链接状态"""
+        # 更新所有链接的有效性状态
+        def update_link(link):
+            if link.get('is_external') and link.get('file_path') and link.get('file_path').startswith(('http://', 'https://')):
+                with self.online_link_cache_lock:
+                    is_valid = self.online_link_cache.get(link['file_path'], False)
+                
+                link['exists'] = is_valid
+                
+                # 如果链接无效，添加到无效链接列表
+                if not is_valid and link not in self.invalid_links:
+                    self.invalid_links.append(link)
+            
+            # 递归处理子链接
+            for child in link.get('children', []):
+                update_link(child)
+        
+        # 处理所有顶级链接
+        for link in self.summary_links:
+            update_link(link)
+        
+        # 更新文档链接字典
+        for file_path, links in self.md_links.items():
+            for link in links:
+                if link.get('is_external') and link.get('file_path') and link.get('file_path').startswith(('http://', 'https://')):
+                    with self.online_link_cache_lock:
+                        is_valid = self.online_link_cache.get(link['file_path'], False)
+                    
+                    link['exists'] = is_valid
+                    
+                    # 如果链接无效，添加到无效链接列表
+                    if not is_valid and link not in self.invalid_links:
+                        self.invalid_links.append(link)
+    
+    def generate_reports(self, output_path):
+        """
+        生成两个报告：完整报告和错误链接报告
+        
+        Args:
+            output_path: 完整报告输出文件路径
+        """
+        # 生成完整报告
+        self.generate_full_report(output_path)
+        
+        # 生成错误链接报告
+        error_report_path = output_path.replace('.md', '-error.md')
+        if output_path == error_report_path:
+            error_report_path = os.path.splitext(output_path)[0] + '-error.md'
+        
+        self.generate_error_report(error_report_path)
+    
+    def generate_full_report(self, output_path):
+        """
+        生成包含所有链接的完整报告
+        
+        Args:
+            output_path: 输出文件路径
+        """
+        content = "# GitBook链接检查报告（完整版）\n\n"
+        
+        # 添加章节标题说明
+        content += "本报告显示了GitBook文档中的所有链接及其引用的文档。每行的格式为：\n"
+        content += "* [文档标题](文档链接) | [引用的文档1](链接1) | [引用的文档2](链接2) | ...\n\n"
+        
+        # 跟踪已处理的章节
+        processed_sections = set()
+        
+        # 递归生成报告内容
+        def generate_link_report(link, indent=""):
+            nonlocal content
+            
+            # 检查是否有新章节
+            if 'section' in link and link['section'] and link['section'] not in processed_sections:
+                content += f"\n## {link['section']}\n\n"
+                processed_sections.add(link['section'])
+            
+            # 生成主链接
+            file_path = link.get('file_path')
+            status = "✅" if link.get('exists', False) else "❌"
+            
+            # 基本链接信息
+            content += f"{indent}* [{link['text']}]({link['link']}) {status}"
+            
+            # 添加该文档中引用的所有非图片链接
+            if file_path and file_path in self.md_links and self.md_links[file_path]:
+                referenced_links = self.md_links[file_path]
+                
+                # 遍历文档中引用的所有链接
+                for ref_link in referenced_links:
+                    # 跳过图片链接
+                    if 'link' in ref_link and self.is_image_link(ref_link['link']):
+                        continue
+                    
+                    ref_status = "✅" if ref_link.get('exists', False) else "❌"
+                    content += f" | [{ref_link['text']}]({ref_link['link']}) {ref_status}"
+            
+            content += "\n"
+            
+            # 递归处理子链接
+            for child in link.get('children', []):
+                generate_link_report(child, indent + "  ")
+        
+        # 处理所有顶级链接
+        for link in self.summary_links:
+            generate_link_report(link)
+        
+        # 保存报告
+        try:
+            # 确保输出目录存在
+            output_dir = os.path.dirname(output_path)
+            if output_dir and not os.path.exists(output_dir):
+                os.makedirs(output_dir)
+                
+            with open(output_path, 'w', encoding='utf-8') as file:
+                file.write(content)
+                
+            print(f"完整报告已生成: {output_path}")
+        except Exception as e:
+            print(f"写入报告时出错: {e}")
+    
+    def generate_error_report(self, output_path):
+        """
+        生成仅包含错误链接的报告
+        
+        Args:
+            output_path: 输出文件路径
+        """
+        if not self.invalid_links:
+            print(f"没有发现无效链接，不生成错误报告")
+            return
+        
+        content = "# GitBook链接检查报告（仅错误链接）\n\n"
+        content += "本报告仅显示文档中的无效链接。每行的格式为：\n"
+        content += "* [文档标题](文档链接) | [无效链接](链接路径) ❌\n\n"
+        
+        # 按源文件组织无效链接
+        links_by_source = defaultdict(list)
+        
+        for link in self.invalid_links:
+            source = link.get('source_file', 'Unknown')
+            links_by_source[source].append(link)
+        
+        # 按源文件添加无效链接
+        for source, links in sorted(links_by_source.items()):
+            # 添加源文件标题
+            content += f"## 来自 {source}\n\n"
+            
+            # 找到源文件在summary中的对应链接
+            summary_link = None
+            
+            # 查找源文件对应的summary链接
+            for link in self.extract_links_from_summary():
+                if link.get('file_path') and os.path.relpath(link['file_path'], self.base_dir) == source:
+                    summary_link = link
+                    break
+            
+            # 如果是SUMMARY.md本身
+            if source == 'SUMMARY.md':
+                # 添加每个无效链接
+                for link in links:
+                    status = "❌"
+                    content += f"* [{link['text']}]({link['link']}) {status}\n"
+            else:
+                # 如果找到了源文件对应的summary链接
+                if summary_link:
+                    # 显示源文件链接和其中的无效链接
+                    source_status = "✅" if summary_link.get('exists', False) else "❌"
+                    content += f"* [{summary_link['text']}]({summary_link['link']}) {source_status}"
+                    
+                    # 添加源文件中的无效链接
+                    for link in links:
+                        content += f" | [{link['text']}]({link['link']}) ❌"
+                    
+                    content += "\n\n"
+                else:
+                    # 没有找到源文件对应的summary链接，只显示无效链接
+                    for link in links:
+                        content += f"* 来自: {source} - [{link['text']}]({link['link']}) ❌\n"
+                    
+                    content += "\n"
+        
+        # 保存报告
+        try:
+            # 确保输出目录存在
+            output_dir = os.path.dirname(output_path)
+            if output_dir and not os.path.exists(output_dir):
+                os.makedirs(output_dir)
+                
+            with open(output_path, 'w', encoding='utf-8') as file:
+                file.write(content)
+                
+            print(f"错误报告已生成: {output_path}")
+        except Exception as e:
+            print(f"写入错误报告时出错: {e}")
+
+
+def main():
+    """主函数"""
+    print("=" * 60)
+    print("多线程版GitBook链接检查器")
+    print("=" * 60)
+    
+    # 获取SUMMARY.md文件路径
+    if len(sys.argv) > 1:
+        summary_path = sys.argv[1]
+    else:
+        summary_path = input("请输入SUMMARY.md文件路径: ").strip()
+        if not summary_path:
+            summary_path = os.path.join(os.getcwd(), "SUMMARY.md")
+            print(f"使用默认路径: {summary_path}")
+    
+    # 检查文件是否存在
+    if not os.path.isfile(summary_path):
+        print(f"错误: 文件 '{summary_path}' 不存在")
+        sys.exit(1)
+    
+    # 获取基础目录
+    base_dir = os.path.dirname(os.path.abspath(summary_path))
+    if len(sys.argv) > 2:
+        base_dir = sys.argv[2]
+    else:
+        input_base_dir = input(f"请输入文档根目录 [默认: {base_dir}]: ").strip()
+        if input_base_dir:
+            base_dir = input_base_dir
+    
+    # 获取输出文件路径
+    if len(sys.argv) > 3:
+        output_path = sys.argv[3]
+    else:
+        default_output = os.path.join(base_dir, "link-check-report.md")
+        output_path = input(f"请输入输出文件路径 [默认: {default_output}]: ").strip()
+        if not output_path:
+            output_path = default_output
+    
+    # 处理目录输出
+    if os.path.isdir(output_path):
+        output_path = os.path.join(output_path, "link-check-report.md")
+    
+    # 询问是否验证在线链接
+    verify_online = input("是否验证在线链接? (y/n) [默认: n]: ").strip().lower() == 'y'
+    
+    max_threads = 10
+    if verify_online:
+        # 获取最大线程数
+        try:
+            max_threads = int(input(f"请输入最大线程数 [默认: 10]: ").strip() or "10")
+            if max_threads < 1:
+                max_threads = 10
+                print(f"线程数必须大于0，已设置为默认值10")
+        except ValueError:
+            max_threads = 10
+            print(f"输入无效，已设置为默认值10")
+        
+        print(f"将使用 {max_threads} 个线程并行检查在线链接")
+    else:
+        print("未验证的在线链接将被标记为错误，并添加到错误报告中")
+    
+    start_time = time.time()
+    
+    try:
+        # 创建链接检查器并执行检查
+        checker = LinkChecker(
+            summary_path=summary_path,
+            base_dir=base_dir,
+            verify_online=verify_online,
+            max_threads=max_threads
+        )
+        
+        checker.check_links()
+        checker.generate_reports(output_path)
+        
+        # 统计信息
+        total_files = len(checker.processed_files)
+        invalid_links = len(checker.invalid_links)
+        
+        end_time = time.time()
+        elapsed_time = end_time - start_time
+        
+        print(f"\n统计信息:")
+        print(f"- 检查的文件数: {total_files}")
+        print(f"- 无效链接数: {invalid_links}")
+        print(f"- 耗时: {elapsed_time:.2f} 秒")
+        
+        print("\n检查完成！")
+    except Exception as e:
+        print(f"执行过程中出错: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/extract-gitbook-url.py
+++ b/scripts/extract-gitbook-url.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+"""
+改进的GitBook Summary链接提取器 (支持目录输出)
+
+此脚本从SUMMARY.md文件中提取所有内容，
+保留原始的目录结构和标题，
+将链接转换为在线URL（不包含.md后缀）。
+支持将输出文件放在指定目录中。
+"""
+
+import os
+import re
+import sys
+import urllib.parse
+
+def process_summary_file(summary_path, base_url):
+    """
+    处理SUMMARY.md文件，保留结构并转换链接
+    
+    Args:
+        summary_path: SUMMARY.md文件的路径
+        base_url: 基础URL
+        
+    Returns:
+        processed_content: 处理后的内容
+    """
+    print(f"正在处理 {summary_path}...")
+    
+    try:
+        with open(summary_path, 'r', encoding='utf-8') as file:
+            content = file.read()
+    except Exception as e:
+        print(f"读取文件时出错: {e}")
+        sys.exit(1)
+    
+    # 确保base_url以/结尾
+    if not base_url.endswith('/'):
+        base_url += '/'
+    
+    # 处理每一行
+    lines = content.split('\n')
+    processed_lines = []
+    
+    for line in lines:
+        # 提取行中的Markdown链接
+        link_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
+        matches = re.findall(link_pattern, line)
+        
+        processed_line = line
+        
+        # 替换每个链接
+        for text, link in matches:
+            # 跳过锚点链接
+            if link.startswith('#'):
+                continue
+            
+            # 构建完整URL
+            if not link.startswith(('http://', 'https://')):
+                if link.startswith('/'):
+                    link = link[1:]
+                full_url = urllib.parse.urljoin(base_url, link)
+            else:
+                full_url = link
+            
+            # 移除.md后缀
+            if full_url.endswith('.md'):
+                full_url = full_url[:-3]
+            
+            # 替换链接
+            original_link = f"[{text}]({link})"
+            new_link = f"[{text}]({full_url})"
+            processed_line = processed_line.replace(original_link, new_link)
+        
+        processed_lines.append(processed_line)
+    
+    return '\n'.join(processed_lines)
+
+
+def save_to_markdown(content, output_path):
+    """
+    保存处理后的内容到Markdown文件
+    
+    Args:
+        content: 处理后的内容
+        output_path: 输出文件路径
+    """
+    # 检查路径是否是目录
+    if os.path.isdir(output_path):
+        # 如果是目录，在该目录中创建默认文件名
+        output_file = os.path.join(output_path, "gitbook-urls.md")
+    else:
+        # 否则使用提供的路径
+        output_file = output_path
+    
+    # 确保输出目录存在
+    output_dir = os.path.dirname(output_file)
+    if output_dir and not os.path.exists(output_dir):
+        try:
+            os.makedirs(output_dir)
+            print(f"已创建目录: {output_dir}")
+        except Exception as e:
+            print(f"创建目录时出错: {e}")
+            sys.exit(1)
+    
+    try:
+        with open(output_file, 'w', encoding='utf-8') as file:
+            file.write(content)
+        print(f"Markdown文件已生成: {output_file}")
+    except Exception as e:
+        print(f"写入文件时出错: {e}")
+        sys.exit(1)
+
+
+def add_header(content):
+    """
+    向内容添加标题和说明
+    
+    Args:
+        content: 原始内容
+        
+    Returns:
+        new_content: 添加标题和说明后的内容
+    """
+    header = "# GitBook文档链接\n\n"
+    header += "以下是从SUMMARY.md提取的文档结构和链接：\n\n"
+    
+    return header + content
+
+
+if __name__ == "__main__":
+    print("=" * 60)
+    print("改进的GitBook Summary链接提取器 (支持目录输出)")
+    print("=" * 60)
+    
+    # 获取SUMMARY.md文件路径
+    if len(sys.argv) > 1:
+        summary_path = sys.argv[1]
+    else:
+        summary_path = input("请输入SUMMARY.md文件路径: ").strip()
+        if not summary_path:
+            summary_path = os.path.join(os.getcwd(), "SUMMARY.md")
+            print(f"使用默认路径: {summary_path}")
+    
+    # 检查文件是否存在
+    if not os.path.isfile(summary_path):
+        print(f"错误: 文件 '{summary_path}' 不存在")
+        sys.exit(1)
+    
+    # 获取基础URL
+    if len(sys.argv) > 2:
+        base_url = sys.argv[2]
+    else:
+        base_url = input("请输入文档基础URL: ").strip()
+        if not base_url:
+            base_url = "https://docs.example.com/"
+            print(f"使用默认URL: {base_url}")
+    
+    # 获取输出文件路径或目录
+    if len(sys.argv) > 3:
+        output_path = sys.argv[3]
+    else:
+        default_output = os.path.join(os.path.dirname(summary_path), "gitbook-urls.md")
+        output_path = input(f"请输入输出文件路径或目录 [默认: {default_output}]: ").strip()
+        if not output_path:
+            output_path = default_output
+    
+    # 处理文件内容
+    processed_content = process_summary_file(summary_path, base_url)
+    
+    # 添加标题和说明
+    final_content = add_header(processed_content)
+    
+    # 保存到Markdown文件
+    save_to_markdown(final_content, output_path)
+    
+    print("\n处理完成！")
--- a/scripts/extract-local-file-url.py
+++ b/scripts/extract-local-file-url.py
@@ -0,0 +1,367 @@
+#!/usr/bin/env python3
+"""
+本地GitBook Markdown文件链接检查工具
+
+此脚本会:
+1. 从SUMMARY.md提取所有文档链接
+2. 解析每个本地Markdown文件
+3. 提取并验证文件中的内部链接
+4. 生成链接检查报告
+"""
+
+import os
+import re
+import sys
+import csv
+from datetime import datetime
+from urllib.parse import urlparse, urljoin
+
+# 尝试导入依赖，如果不存在则自动安装
+try:
+    from bs4 import BeautifulSoup
+    import markdown
+except ImportError:
+    print("正在安装必要依赖...")
+    import subprocess
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "beautifulsoup4", "markdown"])
+    from bs4 import BeautifulSoup
+    import markdown
+
+
+class GitbookLocalChecker:
+    """GitBook本地文件链接检查工具"""
+    
+    def __init__(self, summary_path, base_dir=None, remove_md=True):
+        """
+        初始化链接检查器
+        
+        Args:
+            summary_path: SUMMARY.md文件路径
+            base_dir: 文档根目录，默认为SUMMARY.md所在目录
+            remove_md: 是否移除.md后缀
+        """
+        self.summary_path = os.path.abspath(summary_path)
+        self.base_dir = base_dir or os.path.dirname(self.summary_path)
+        self.remove_md = remove_md
+        self.all_links = []
+        self.all_md_files = []
+        self.invalid_links = []
+        
+        # 记录解析过的文件，避免重复处理
+        self.processed_files = set()
+    
+    def extract_summary_links(self):
+        """从SUMMARY.md提取所有Markdown文件链接"""
+        print(f"正在从 {self.summary_path} 提取文档链接...")
+        
+        with open(self.summary_path, 'r', encoding='utf-8') as file:
+            content = file.read()
+        
+        # 使用正则表达式提取链接
+        link_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
+        matches = re.findall(link_pattern, content)
+        
+        links = []
+        for i, (text, link) in enumerate(matches, 1):
+            # 排除锚点链接
+            if not link.startswith('#') and link.endswith('.md'):
+                # 计算本地文件路径
+                local_path = os.path.normpath(os.path.join(self.base_dir, link))
+                
+                links.append({
+                    'id': i,
+                    'text': text,
+                    'link': link,
+                    'local_path': local_path,
+                    'exists': os.path.exists(local_path),
+                    'type': 'summary_link',
+                    'source_file': 'SUMMARY.md'
+                })
+                
+                # 将文件添加到待处理列表
+                if os.path.exists(local_path):
+                    self.all_md_files.append(local_path)
+        
+        print(f"找到 {len(links)} 个文档链接，{len(self.all_md_files)} 个本地Markdown文件")
+        self.all_links.extend(links)
+        return links
+    
+    def process_md_file(self, file_path):
+        """处理单个Markdown文件，提取其中的链接"""
+        # 如果文件已处理，跳过
+        if file_path in self.processed_files:
+            return []
+        
+        self.processed_files.add(file_path)
+        relative_path = os.path.relpath(file_path, self.base_dir)
+        
+        try:
+            with open(file_path, 'r', encoding='utf-8') as file:
+                content = file.read()
+            
+            # 提取所有链接
+            link_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
+            matches = re.findall(link_pattern, content)
+            
+            links = []
+            for text, link in matches:
+                # 排除外部链接和锚点链接
+                if link.startswith(('http://', 'https://', '#')):
+                    continue
+                
+                # 解析相对路径
+                if link.startswith('/'):
+                    # 从根目录计算
+                    target_path = os.path.normpath(os.path.join(self.base_dir, link.lstrip('/')))
+                else:
+                    # 从当前文件所在目录计算
+                    target_path = os.path.normpath(os.path.join(os.path.dirname(file_path), link))
+                
+                # 如果链接没有扩展名但指向目录，添加README.md
+                if not os.path.splitext(target_path)[1]:
+                    if os.path.isdir(target_path):
+                        target_path = os.path.join(target_path, 'README.md')
+                    else:
+                        # 可能是不带扩展名的文件引用，添加.md
+                        target_path += '.md'
+                
+                # 检查链接是否有效
+                exists = os.path.exists(target_path)
+                
+                link_info = {
+                    'text': text,
+                    'link': link,
+                    'local_path': target_path,
+                    'target_file': os.path.basename(target_path),
+                    'exists': exists,
+                    'type': 'internal_link',
+                    'source_file': relative_path
+                }
+                
+                links.append(link_info)
+                
+                # 如果链接无效，添加到无效链接列表
+                if not exists:
+                    self.invalid_links.append(link_info)
+                # 如果是有效的Markdown文件且尚未处理，添加到待处理列表
+                elif target_path.endswith('.md') and target_path not in self.processed_files:
+                    self.all_md_files.append(target_path)
+            
+            return links
+            
+        except Exception as e:
+            print(f"处理文件 {file_path} 时出错: {e}")
+            return []
+    
+    def process_all_files(self):
+        """处理所有Markdown文件"""
+        print("开始处理所有Markdown文件...")
+        
+        # 先提取SUMMARY.md中的链接
+        self.extract_summary_links()
+        
+        # 处理所有Markdown文件
+        files_to_process = list(self.all_md_files)  # 创建副本，因为处理过程中会添加新文件
+        processed_count = 0
+        
+        for file_path in files_to_process:
+            if file_path not in self.processed_files:
+                relative_path = os.path.relpath(file_path, self.base_dir)
+                print(f"处理文件: {relative_path}")
+                
+                links = self.process_md_file(file_path)
+                self.all_links.extend(links)
+                
+                processed_count += 1
+                
+                # 如果发现新文件，可能需要处理它们
+                new_files = [f for f in self.all_md_files if f not in files_to_process and f not in self.processed_files]
+                files_to_process.extend(new_files)
+        
+        print(f"已处理 {processed_count} 个Markdown文件")
+        print(f"共找到 {len(self.all_links)} 个链接，其中 {len(self.invalid_links)} 个无效")
+    
+    def generate_markdown_report(self, output_path):
+        """生成Markdown格式的报告"""
+        print(f"正在生成报告: {output_path}")
+        
+        content = f"""# GitBook本地链接检查报告
+
+## 摘要
+- 检查时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+- 处理文件数: {len(self.processed_files)}
+- 总链接数: {len(self.all_links)}
+- 无效链接数: {len(self.invalid_links)}
+
+## 无效链接列表
+"""
+        
+        # 按源文件分组显示无效链接
+        grouped_links = {}
+        for link in self.invalid_links:
+            source = link['source_file']
+            if source not in grouped_links:
+                grouped_links[source] = []
+            grouped_links[source].append(link)
+        
+        for source, links in sorted(grouped_links.items()):
+            content += f"\n### 文件: {source}\n"
+            for link in links:
+                content += f"- [{link['text']}]({link['link']}) -> {link['local_path']} (无效)\n"
+        
+        # 添加所有文件的链接统计
+        content += "\n## 文件链接统计\n"
+        file_stats = {}
+        for link in self.all_links:
+            source = link['source_file']
+            if source not in file_stats:
+                file_stats[source] = {'total': 0, 'invalid': 0}
+            file_stats[source]['total'] += 1
+            if not link['exists']:
+                file_stats[source]['invalid'] += 1
+        
+        for source, stats in sorted(file_stats.items()):
+            content += f"- {source}: 共 {stats['total']} 个链接，{stats['invalid']} 个无效\n"
+        
+        with open(output_path, 'w', encoding='utf-8') as file:
+            file.write(content)
+        
+        print(f"报告已生成: {output_path}")
+    
+    def generate_csv_report(self, output_path):
+        """生成CSV格式的报告"""
+        print(f"正在生成CSV报告: {output_path}")
+        
+        with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
+            fieldnames = ['source_file', 'text', 'link', 'local_path', 'exists', 'type']
+            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+            writer.writeheader()
+            
+            for link in self.all_links:
+                writer.writerow({
+                    'source_file': link['source_file'],
+                    'text': link['text'],
+                    'link': link['link'],
+                    'local_path': link['local_path'],
+                    'exists': link['exists'],
+                    'type': link['type']
+                })
+        
+        print(f"CSV报告已生成: {output_path}")
+
+
+def get_input_with_default(prompt, default=None):
+    """获取用户输入，如果为空则使用默认值"""
+    if default:
+        user_input = input(f"{prompt} [{default}]: ")
+        return user_input if user_input.strip() else default
+    else:
+        return input(f"{prompt}: ")
+
+
+def get_yes_no_input(prompt, default="y"):
+    """获取用户是/否输入"""
+    valid_responses = {
+        'y': True, 'yes': True, '是': True, 
+        'n': False, 'no': False, '否': False
+    }
+    
+    if default.lower() in ['y', 'yes', '是']:
+        prompt = f"{prompt} [Y/n]: "
+        default_value = True
+    else:
+        prompt = f"{prompt} [y/N]: "
+        default_value = False
+    
+    user_input = input(prompt).lower()
+    
+    if not user_input:
+        return default_value
+    
+    return valid_responses.get(user_input, default_value)
+
+
+def main():
+    """主函数，交互式获取输入"""
+    print("=" * 60)
+    print("本地GitBook Markdown文件链接检查工具")
+    print("=" * 60)
+    
+    # 获取SUMMARY.md文件路径
+    while True:
+        summary_path = get_input_with_default(
+            "请输入SUMMARY.md文件路径", 
+            os.path.join(os.getcwd(), "SUMMARY.md")
+        )
+        
+        # 检查文件是否存在
+        if os.path.isfile(summary_path):
+            break
+        else:
+            print(f"错误: 文件 '{summary_path}' 不存在")
+    
+    # 获取文档根目录
+    default_base_dir = os.path.dirname(os.path.abspath(summary_path))
+    base_dir = get_input_with_default(
+        "请输入文档根目录(包含所有Markdown文件的目录)", 
+        default_base_dir
+    )
+    
+    # 获取输出目录
+    output_dir = get_input_with_default(
+        "请输入输出目录", 
+        os.path.dirname(summary_path) or os.getcwd()
+    )
+    
+    # 确保输出目录存在
+    os.makedirs(output_dir, exist_ok=True)
+    
+    # 生成文件路径
+    report_path = os.path.join(output_dir, "gitbook-links-report.md")
+    csv_path = os.path.join(output_dir, "gitbook-links-report.csv")
+    
+    # 询问是否移除.md后缀
+    remove_md = get_yes_no_input("是否移除链接中的.md后缀", "y")
+    
+    try:
+        # 创建检查器实例
+        checker = GitbookLocalChecker(
+            summary_path=summary_path,
+            base_dir=base_dir,
+            remove_md=remove_md
+        )
+        
+        # 处理所有文件
+        checker.process_all_files()
+        
+        # 生成报告
+        checker.generate_markdown_report(report_path)
+        checker.generate_csv_report(csv_path)
+        
+        print("\n检查完成！")
+        print(f"Markdown报告: {report_path}")
+        print(f"CSV报告: {csv_path}")
+        
+        # 显示摘要
+        print(f"\n摘要:")
+        print(f"- 处理文件数: {len(checker.processed_files)}")
+        print(f"- 总链接数: {len(checker.all_links)}")
+        print(f"- 无效链接数: {len(checker.invalid_links)}")
+        
+        if checker.invalid_links:
+            print("\n无效链接示例:")
+            for i, link in enumerate(checker.invalid_links[:5], 1):
+                print(f"{i}. 文件 '{link['source_file']}' 中 [{link['text']}]({link['link']}) -> {link['local_path']} (无效)")
+            
+            if len(checker.invalid_links) > 5:
+                print(f"... 以及其他 {len(checker.invalid_links) - 5} 个无效链接")
+        
+    except Exception as e:
+        print(f"执行过程中出错: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/md-to-mdx.py
+++ b/scripts/md-to-mdx.py
@@ -3,6 +3,7 @@

 import os
 import re
+import shutil
 from pathlib import Path
 import logging

@@ -18,7 +19,9 @@ logging.basicConfig(
 logger = logging.getLogger("md-to-mdx")

 class MarkdownToMDXConverter:
-    def __init__(self):
+    def __init__(self, backup=True, in_place=False):
+        self.backup = backup
+        self.in_place = in_place
        self.conversion_count = 0
        self.error_count = 0
        self.base_output_dir = None
@@ -31,90 +34,351 @@ class MarkdownToMDXConverter:
            logger.error(f"输入目录不存在: {input_dir}")
            return
        
-        if self.base_output_dir is None and output_dir:
+        # 保存基础输出目录，用于构建子目录输出路径
+        if not self.in_place and self.base_output_dir is None and output_dir:
            self.base_output_dir = Path(output_dir)
            self.base_input_dir = input_path
            self.base_output_dir.mkdir(parents=True, exist_ok=True)
            logger.info(f"创建基础输出目录: {self.base_output_dir}")
        
-        for file in input_path.glob("*.md"):
-            if self.base_output_dir:
-                rel_path = file.parent.relative_to(self.base_input_dir) if file.parent != self.base_input_dir else Path('')
-                target_dir = self.base_output_dir / rel_path
-                target_dir.mkdir(parents=True, exist_ok=True)
-                self._process_file(file, target_dir)
+        # 处理当前目录中的所有.md和.mdx文件
+        for file in list(input_path.glob("*.md")) + list(input_path.glob("*.mdx")):
+            if self.in_place:
+                # 在原位置处理
+                self._process_file(file, file.parent, delete_original=True)
            else:
-                self._process_file(file, file.parent)
+                # 计算相对于基础输入目录的路径
+                if self.base_output_dir:
+                    rel_path = file.parent.relative_to(self.base_input_dir) if file.parent != self.base_input_dir else Path('')
+                    target_dir = self.base_output_dir / rel_path
+                    target_dir.mkdir(parents=True, exist_ok=True)
+                    self._process_file(file, target_dir)
+                else:
+                    # 如果没有基础输出目录，则就地处理
+                    self._process_file(file, file.parent)
        
+        # 如果需要递归处理子目录
        if recursive:
            for subdir in [d for d in input_path.iterdir() if d.is_dir()]:
+                # 跳过output目录，避免重复处理
                if subdir.name == "output" or subdir.name.startswith('.'):
                    continue
+                
                self.process_directory(subdir, output_dir, recursive)
    
-    def _process_file(self, file_path, output_dir):
+    def _process_file(self, file_path, output_dir, delete_original=False):
        """处理单个Markdown文件"""
        try:
            logger.info(f"处理文件: {file_path}")
            
+            # 备份原始文件（如果需要）
+            if self.backup:
+                backup_file = str(file_path) + ".bak"
+                if not os.path.exists(backup_file):
+                    shutil.copy2(file_path, backup_file)
+                    logger.info(f"已创建备份: {backup_file}")
+            
+            # 读取文件内容
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()
            
-            content = self._fix_broken_text(content)
-            content = self._convert_images(content)
-            content = self._convert_hints(content)
+            # 执行转换
            converted_content = self.convert_content(content)
            
+            # 确定输出文件路径
            output_file = output_dir / (file_path.stem + ".mdx")
            
+            # 写入转换后的内容
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write(converted_content)
            
            logger.info(f"转换完成: {output_file}")
            self.conversion_count += 1
+            
+            # 如果需要，删除原始文件
+            if delete_original:
+                try:
+                    os.remove(file_path)
+                    logger.info(f"已删除源文件: {file_path}")
+                except Exception as e:
+                    logger.error(f"删除源文件 {file_path} 失败: {str(e)}")
                
        except Exception as e:
            logger.error(f"处理文件 {file_path} 时出错: {str(e)}")
            self.error_count += 1
    
-    def _fix_broken_text(self, content):
-        """修复文本中的割裂问题，特别是在代码块周围"""
-        broken_code_pattern = re.compile(r'```([a-zA-Z]*)\r?\n(.*?)\r?\n```([a-zA-Z]*)', re.DOTALL)
-        content = broken_code_pattern.sub(r'```\1\n\2\n```', content)
-        return content
-    
-    def _convert_images(self, content):
-        """转换HTML图片格式为Markdown或MDX格式"""
-        
-        # 转换没有标题的 <figure><img> 结构
-        img_pattern_no_caption = re.compile(r'<figure>\s*<img src="([^"]+)" alt="([^"]*)">\s*<figcaption></figcaption>\s*</figure>', re.DOTALL)
-        content = img_pattern_no_caption.sub(r'![](\1)', content)
-        
-        # 转换带标题的 <figure><img> 结构
-        img_pattern_with_caption = re.compile(r'<figure>\s*<img src="([^"]+)" alt="([^"]*)">\s*<figcaption><p>(.*?)</p></figcaption>\s*</figure>', re.DOTALL)
-        def img_replacer(match):
-            img_src = match.group(1)
-            alt_text = match.group(3).strip()
-            return f'![{alt_text}]({img_src})'
-        content = img_pattern_with_caption.sub(img_replacer, content)
-        
-        return content
-    
-    def _convert_hints(self, content):
-        """转换 hint 提示框"""
-        hint_pattern = re.compile(r'{%\s*hint\s*style="info"\s*%}\s*{%\s*endhint\s*%}', re.DOTALL)
-        content = hint_pattern.sub(r'<Info>\n</Info>', content)
-        return content
-    
    def convert_content(self, content):
        """将Gitbook Markdown内容转换为Mintlify MDX格式"""
+        
+        # 1. 转换文档开头的h1元素为frontmatter
        h1_pattern = re.compile(r'^#\s+(.+?)$', re.MULTILINE)
        match = h1_pattern.search(content)
        if match:
            title = match.group(1).strip()
            content = h1_pattern.sub(f'---\ntitle: {title}\n---\n', content, count=1)
+        
+        # 2. 转换hint提示框
+        hint_pattern = re.compile(
+            r'{%\s*hint\s+style="(\w+)"\s*%}(.*?){%\s*endhint\s*%}', 
+            re.DOTALL
+        )
+        
+        def hint_replacer(match):
+            style = match.group(1)
+            text = match.group(2).strip()
+            component_name = style.capitalize() if style != "info" else "Info"
+            return f'<{component_name}>\n{text}\n</{component_name}>'
+        
+        content = hint_pattern.sub(hint_replacer, content)
+        
+        # 3. 转换卡片链接
+        card_pattern = re.compile(
+            r'{%\s*content-ref\s+url="([^"]+)"\s*%}\s*\[([^\]]+)\]\(([^)]+)\)\s*{%\s*endcontent-ref\s*%}',
+            re.DOTALL
+        )
+        
+        def card_replacer(match):
+            url = match.group(1)
+            title = match.group(2)
+            return f'<Card title="{title}" icon="link" href="{url}">\n  {title}\n</Card>'
+        
+        content = card_pattern.sub(card_replacer, content)
+        
+        # 4. 转换并排图片样式
+        # 寻找连续的图片并转换为并排布局
+        img_pattern = re.compile(r'!\[(.*?)\]\((.*?)\)\s*!\[(.*?)\]\((.*?)\)', re.DOTALL)
+        
+        def img_side_replacer(match):
+            alt1 = match.group(1) or "Image 1"
+            src1 = match.group(2)
+            alt2 = match.group(3) or "Image 2"
+            src2 = match.group(4)
+            
+            return f'''<div class="image-side-by-side">
+  <figure>
+    <img src="{src1}" alt="{alt1}" />
+  </figure>
+  <figure>
+    <img src="{src2}" alt="{alt2}" />
+  </figure>
+</div>'''
+        
+        content = img_pattern.sub(img_side_replacer, content)
+        
+        # 5. 转换Frame包装的图片
+        frame_pattern = re.compile(r'<Frame>\s*<img\s+src="([^"]+)"\s+alt="([^"]+)"\s*/>\s*</Frame>', re.DOTALL)
+        
+        def frame_replacer(match):
+            src = match.group(1)
+            alt = match.group(2)
+            return f'![{alt}]({src})'
+        
+        content = frame_pattern.sub(frame_replacer, content)
+        
+        # 5.1 转换<figure><img>格式的带有宽度和figcaption的图片为特定格式
+        figure_img_width_caption_pattern = re.compile(r'<figure>\s*<img\s+src="([^"]+)"\s+alt="([^"]*)"\s+width="(\d+)"\s*/?>\s*<figcaption>(?:<p>)?(.*?)(?:</p>)?</figcaption>\s*</figure>', re.DOTALL)
+        
+        def figure_img_width_caption_replacer(match):
+            src = match.group(1)
+            alt = match.group(2) or ""
+            width = match.group(3)
+            caption = match.group(4).strip()
+            
+            # 如果有caption，将其添加到alt中
+            if caption:
+                alt = caption
+            
+            return f'''<img
+src="{src}"
+width="{width}"
+className="mx-auto"
+alt="{alt}"
+/>'''
+        
+        content = figure_img_width_caption_pattern.sub(figure_img_width_caption_replacer, content)
+        
+        # 5.2 转换<figure><img>格式的带有宽度但没有figcaption的图片
+        figure_img_width_pattern = re.compile(r'<figure>\s*<img\s+src="([^"]+)"\s+alt="([^"]*)"\s+width="(\d+)"\s*/?>\s*</figure>', re.DOTALL)
+        
+        def figure_img_width_replacer(match):
+            src = match.group(1)
+            alt = match.group(2) or ""
+            width = match.group(3)
+            
+            return f'''<img
+src="{src}"
+width="{width}"
+className="mx-auto"
+alt="{alt}"
+/>'''
+        
+        content = figure_img_width_pattern.sub(figure_img_width_replacer, content)
+        
+        # 5.3 转换<figure><img>格式的没有宽度但有figcaption的图片
+        figure_img_caption_pattern = re.compile(r'<figure>\s*<img\s+src="([^"]+)"\s+alt="([^"]*)"\s*/?>\s*<figcaption>(?:<p>)?(.*?)(?:</p>)?</figcaption>\s*</figure>', re.DOTALL)
+        
+        def figure_img_caption_replacer(match):
+            src = match.group(1)
+            alt = match.group(2) or ""
+            caption = match.group(3).strip()
+            
+            # 如果有caption，将其添加到alt中
+            if caption:
+                alt = caption
+            
+            return f'''<img
+src="{src}"
+className="mx-auto"
+alt="{alt}"
+/>'''
+        
+        content = figure_img_caption_pattern.sub(figure_img_caption_replacer, content)
+        
+        # 5.4 处理没有figcaption和宽度的<figure><img>标签
+        figure_img_no_caption_pattern = re.compile(r'<figure>\s*<img\s+src="([^"]+)"\s+alt="([^"]*)"\s*/?>\s*</figure>', re.DOTALL)
+        
+        def figure_img_no_caption_replacer(match):
+            src = match.group(1)
+            alt = match.group(2) or ""
+            
+            return f'''<img
+src="{src}"
+className="mx-auto"
+alt="{alt}"
+/>'''
+        
+        content = figure_img_no_caption_pattern.sub(figure_img_no_caption_replacer, content)
+        
+        # 6. 转换Tabs组件
+        # 先匹配整个tabs块
+        tabs_pattern = re.compile(
+            r'{%\s*tabs\s*%}(.*?){%\s*endtabs\s*%}',
+            re.DOTALL
+        )
+        
+        def tabs_replacer(match):
+            tabs_content = match.group(1)
+            # 匹配每个tab
+            tab_pattern = re.compile(
+                r'{%\s*tab\s+title="([^"]+)"\s*%}(.*?){%\s*endtab\s*%}',
+                re.DOTALL
+            )
+            
+            # 构建新的Tabs组件
+            tabs_start = "<Tabs>"
+            tabs_items = []
+            
+            for tab_match in tab_pattern.finditer(tabs_content):
+                title = tab_match.group(1)
+                content = tab_match.group(2).strip()
+                tabs_items.append(f'  <Tab title="{title}">\n    {content}\n  </Tab>')
+            
+            tabs_end = "</Tabs>"
+            
+            return tabs_start + "\n" + "\n".join(tabs_items) + "\n" + tabs_end
+        
+        content = tabs_pattern.sub(tabs_replacer, content)
+        
+        # 7. 处理有限制大小的独立img标签
+        img_size_pattern = re.compile(r'<img\s+src="([^"]+)"\s+width="(\d+)"(?:\s+alt="([^"]*)")?\s*/>', re.DOTALL)
+        
+        def img_size_replacer(match):
+            src = match.group(1)
+            width = match.group(2)
+            alt = match.group(3) if match.group(3) else ""
+            
+            return f'''<img
+src="{src}"
+width="{width}"
+className="mx-auto"
+alt="{alt}"
+/>'''
+        
+        content = img_size_pattern.sub(img_size_replacer, content)
+        
+        # 7.1 处理各种形式的独立<img>标签
+        standalone_img_pattern = re.compile(r'<img\s+src="([^"]+)"(?:\s+alt="([^"]*)")?[^>]*>', re.DOTALL)
+        
+        def standalone_img_replacer(match):
+            src = match.group(1)
+            alt = match.group(2) if match.group(2) else ""
+            
+            return f'''<img
+src="{src}"
+className="mx-auto"
+alt="{alt}"
+/>'''
+        
+        content = standalone_img_pattern.sub(standalone_img_replacer, content)
+        
+        # 8. 将markdown表格转换为MDX表格格式
+        # 使用正则表达式匹配markdown表格
+        table_pattern = re.compile(r'(\|.*\|\n\|[-:\s|]*\|\n(?:\|.*\|\n)+)', re.MULTILINE)
+        
+        def table_replacer(match):
+            md_table = match.group(1)
+            lines = md_table.strip().split('\n')
+            
+            # 提取表头和表体
+            header_row = lines[0]
+            header_cells = [cell.strip() for cell in header_row.split('|')[1:-1]]
+            
+            # 忽略分隔行
+            body_rows = lines[2:]
+            body_cells_rows = []
+            for row in body_rows:
+                cells = [cell.strip() for cell in row.split('|')[1:-1]]
+                body_cells_rows.append(cells)
+            
+            # 按照要求的格式构建MDX表格
+            mdx_table = "<table>\n  <thead>\n    <tr>\n"
+            
+            # 添加表头
+            for cell in header_cells:
+                mdx_table += f"      <th>{cell}</th>\n"
+            
+            mdx_table += "    </tr>\n  </thead>\n  <tbody>\n"
+            
+            # 添加表体
+            for row_cells in body_cells_rows:
+                mdx_table += "    <tr>\n"
+                for cell in row_cells:
+                    # 先转换Markdown链接为HTML链接
+                    # 匹配 [text](url) 格式
+                    link_pattern = re.compile(r'\[([^\]]+)\]\(([^)]+)\)')
+                    cell = link_pattern.sub(r'<a href="\2">\1</a>', cell)
+                    
+                    # 替换<br>标签为</p><p>，实现正确的段落分隔
+                    # 先处理<br>标签（可能有不同形式：<br>, <br/>, <br />）
+                    br_pattern = re.compile(r'<br\s*/?>')
+                    
+                    # 处理单元格中的<p>和<br>标签
+                    if '<p>' in cell or br_pattern.search(cell):
+                        # 如果已有<p>标签但包含<br>，替换<br>为</p><p>
+                        if '<p>' in cell and br_pattern.search(cell):
+                            cell = br_pattern.sub(r'</p>\n        <p>', cell)
+                            # 清理末尾的空<br>标签
+                            cell = re.sub(r'<br\s*/?>(\s*</p>)', r'\1', cell)
+                        # 如果没有<p>标签但有<br>，用<p>标签包装每个段落
+                        elif br_pattern.search(cell) and not '<p>' in cell:
+                            paragraphs = br_pattern.split(cell)
+                            cell = '<p>' + '</p>\n        <p>'.join([p.strip() for p in paragraphs if p.strip()]) + '</p>'
+                    
+                        # 确保缩进正确
+                        mdx_table += f"      <td>\n        {cell}\n      </td>\n"
+                    else:
+                        # 普通文本单元格
+                        mdx_table += f"      <td>{cell}</td>\n"
+                mdx_table += "    </tr>\n"
+            
+            mdx_table += "  </tbody>\n</table>"
+            
+            return mdx_table
+        
+        content = table_pattern.sub(table_replacer, content)
+        
        return content
-    
+
    def get_statistics(self):
        """返回处理统计信息"""
        return {
@@ -127,6 +391,7 @@ def main():
    print("Gitbook Markdown 转 Mintlify MDX 转换工具")
    print("=" * 60)
    
+    # 通过交互方式获取输入路径
    input_path_str = input("请输入源文件或目录路径: ")
    input_path = Path(input_path_str)
    
@@ -134,34 +399,54 @@ def main():
        print(f"错误: 路径 '{input_path_str}' 不存在!")
        return
    
+    # 询问是否递归处理子目录
    recursive = False
    if input_path.is_dir():
        recursive_input = input("是否递归处理所有子目录? (y/n): ").lower()
        recursive = recursive_input in ('y', 'yes')
    
-    if input_path.is_file():
-        output_dir = input_path.parent / "output"
-    else:
-        output_dir = input_path / "output"
+    # 询问是否创建备份
+    backup_input = input("是否创建备份文件? (y/n, 默认:y): ").lower()
+    create_backup = backup_input in ('', 'y', 'yes')
    
-    converter = MarkdownToMDXConverter()
+    # 询问是否原地转换并删除源文件
+    in_place_input = input("是否在原地转换并删除源文件? (y/n, 默认:n): ").lower()
+    in_place = in_place_input in ('y', 'yes')
    
-    if input_path.is_file() and input_path.suffix.lower() == '.md':
+    # 确定输出目录
+    output_dir = None
+    if not in_place:
+        if input_path.is_file():
+            output_dir = input_path.parent / "output"
+        else:
+            output_dir = input_path / "output"
        output_dir.mkdir(parents=True, exist_ok=True)
        print(f"输出目录已创建: {output_dir}")
-        converter._process_file(input_path, output_dir)
+    
+    # 创建转换器并处理文件
+    converter = MarkdownToMDXConverter(backup=create_backup, in_place=in_place)
+    
+    if input_path.is_file() and input_path.suffix.lower() == '.md':
+        # 处理单个文件
+        if in_place:
+            converter._process_file(input_path, input_path.parent, delete_original=True)
+        else:
+            converter._process_file(input_path, output_dir)
    elif input_path.is_dir():
+        # 处理目录
        converter.process_directory(input_path, output_dir, recursive)
    else:
        logger.error(f"无效的输入路径: {input_path_str}")
        print(f"错误: '{input_path_str}' 不是有效的Markdown文件或目录!")
        return
    
+    # 打印统计信息
    stats = converter.get_statistics()
    print("=" * 60)
    print(f"转换完成! 成功转换: {stats['conversion_count']}个文件, 错误: {stats['error_count']}个文件")
-    print(f"转换结果已保存至: {output_dir}")
+    if not in_place and output_dir:
+        print(f"转换结果已保存至: {output_dir}")
    print("=" * 60)

 if __name__ == "__main__":
-    main()
+    main()