mirror of
https://github.com/langgenius/dify-docs.git
synced 2026-03-26 13:18:34 +07:00
176 lines
5.1 KiB
Python
176 lines
5.1 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
改进的GitBook Summary链接提取器 (支持目录输出)
|
||
|
||
此脚本从SUMMARY.md文件中提取所有内容,
|
||
保留原始的目录结构和标题,
|
||
将链接转换为在线URL(不包含.md后缀)。
|
||
支持将输出文件放在指定目录中。
|
||
"""
|
||
|
||
import os
|
||
import re
|
||
import sys
|
||
import urllib.parse
|
||
|
||
def process_summary_file(summary_path, base_url):
|
||
"""
|
||
处理SUMMARY.md文件,保留结构并转换链接
|
||
|
||
Args:
|
||
summary_path: SUMMARY.md文件的路径
|
||
base_url: 基础URL
|
||
|
||
Returns:
|
||
processed_content: 处理后的内容
|
||
"""
|
||
print(f"正在处理 {summary_path}...")
|
||
|
||
try:
|
||
with open(summary_path, 'r', encoding='utf-8') as file:
|
||
content = file.read()
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
sys.exit(1)
|
||
|
||
# 确保base_url以/结尾
|
||
if not base_url.endswith('/'):
|
||
base_url += '/'
|
||
|
||
# 处理每一行
|
||
lines = content.split('\n')
|
||
processed_lines = []
|
||
|
||
for line in lines:
|
||
# 提取行中的Markdown链接
|
||
link_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
|
||
matches = re.findall(link_pattern, line)
|
||
|
||
processed_line = line
|
||
|
||
# 替换每个链接
|
||
for text, link in matches:
|
||
# 跳过锚点链接
|
||
if link.startswith('#'):
|
||
continue
|
||
|
||
# 构建完整URL
|
||
if not link.startswith(('http://', 'https://')):
|
||
if link.startswith('/'):
|
||
link = link[1:]
|
||
full_url = urllib.parse.urljoin(base_url, link)
|
||
else:
|
||
full_url = link
|
||
|
||
# 移除.md后缀
|
||
if full_url.endswith('.md'):
|
||
full_url = full_url[:-3]
|
||
|
||
# 替换链接
|
||
original_link = f"[{text}]({link})"
|
||
new_link = f"[{text}]({full_url})"
|
||
processed_line = processed_line.replace(original_link, new_link)
|
||
|
||
processed_lines.append(processed_line)
|
||
|
||
return '\n'.join(processed_lines)
|
||
|
||
|
||
def save_to_markdown(content, output_path):
|
||
"""
|
||
保存处理后的内容到Markdown文件
|
||
|
||
Args:
|
||
content: 处理后的内容
|
||
output_path: 输出文件路径
|
||
"""
|
||
# 检查路径是否是目录
|
||
if os.path.isdir(output_path):
|
||
# 如果是目录,在该目录中创建默认文件名
|
||
output_file = os.path.join(output_path, "gitbook-urls.md")
|
||
else:
|
||
# 否则使用提供的路径
|
||
output_file = output_path
|
||
|
||
# 确保输出目录存在
|
||
output_dir = os.path.dirname(output_file)
|
||
if output_dir and not os.path.exists(output_dir):
|
||
try:
|
||
os.makedirs(output_dir)
|
||
print(f"已创建目录: {output_dir}")
|
||
except Exception as e:
|
||
print(f"创建目录时出错: {e}")
|
||
sys.exit(1)
|
||
|
||
try:
|
||
with open(output_file, 'w', encoding='utf-8') as file:
|
||
file.write(content)
|
||
print(f"Markdown文件已生成: {output_file}")
|
||
except Exception as e:
|
||
print(f"写入文件时出错: {e}")
|
||
sys.exit(1)
|
||
|
||
|
||
def add_header(content):
|
||
"""
|
||
向内容添加标题和说明
|
||
|
||
Args:
|
||
content: 原始内容
|
||
|
||
Returns:
|
||
new_content: 添加标题和说明后的内容
|
||
"""
|
||
header = "# GitBook文档链接\n\n"
|
||
header += "以下是从SUMMARY.md提取的文档结构和链接:\n\n"
|
||
|
||
return header + content
|
||
|
||
|
||
if __name__ == "__main__":
|
||
print("=" * 60)
|
||
print("改进的GitBook Summary链接提取器 (支持目录输出)")
|
||
print("=" * 60)
|
||
|
||
# 获取SUMMARY.md文件路径
|
||
if len(sys.argv) > 1:
|
||
summary_path = sys.argv[1]
|
||
else:
|
||
summary_path = input("请输入SUMMARY.md文件路径: ").strip()
|
||
if not summary_path:
|
||
summary_path = os.path.join(os.getcwd(), "SUMMARY.md")
|
||
print(f"使用默认路径: {summary_path}")
|
||
|
||
# 检查文件是否存在
|
||
if not os.path.isfile(summary_path):
|
||
print(f"错误: 文件 '{summary_path}' 不存在")
|
||
sys.exit(1)
|
||
|
||
# 获取基础URL
|
||
if len(sys.argv) > 2:
|
||
base_url = sys.argv[2]
|
||
else:
|
||
base_url = input("请输入文档基础URL: ").strip()
|
||
if not base_url:
|
||
base_url = "https://docs.example.com/"
|
||
print(f"使用默认URL: {base_url}")
|
||
|
||
# 获取输出文件路径或目录
|
||
if len(sys.argv) > 3:
|
||
output_path = sys.argv[3]
|
||
else:
|
||
default_output = os.path.join(os.path.dirname(summary_path), "gitbook-urls.md")
|
||
output_path = input(f"请输入输出文件路径或目录 [默认: {default_output}]: ").strip()
|
||
if not output_path:
|
||
output_path = default_output
|
||
|
||
# 处理文件内容
|
||
processed_content = process_summary_file(summary_path, base_url)
|
||
|
||
# 添加标题和说明
|
||
final_content = add_header(processed_content)
|
||
|
||
# 保存到Markdown文件
|
||
save_to_markdown(final_content, output_path)
|
||
|
||
print("\n处理完成!") |