diff --git a/.github/workflows/sync_docs_execute.yml b/.github/workflows/sync_docs_execute.yml index c7461d7a..d6982407 100644 --- a/.github/workflows/sync_docs_execute.yml +++ b/.github/workflows/sync_docs_execute.yml @@ -317,6 +317,16 @@ jobs: async def secure_sync(): work_dir = sys.argv[1] + # Load translation config + config_path = Path("../../tools/translate/config.json") + with open(config_path, 'r', encoding='utf-8') as f: + translation_config = json.load(f) + + # Get language settings from config + SOURCE_LANGUAGE = translation_config.get("source_language", "en") + TARGET_LANGUAGES = translation_config.get("target_languages", ["cn", "jp"]) + source_dir = translation_config["languages"][SOURCE_LANGUAGE]["directory"] + # Load sync plan with open(f"{work_dir}/sync_plan.json") as f: sync_plan = json.load(f) @@ -333,9 +343,9 @@ jobs: print(f"Security error: Invalid path {file_path}") return False - # Allow en/ files and docs.json - if not (file_path.startswith("en/") or file_path == "docs.json"): - print(f"Security error: File outside en/ directory: {file_path}") + # Allow source language files and docs.json + if not (file_path.startswith(f"{source_dir}/") or file_path == "docs.json"): + print(f"Security error: File outside {source_dir}/ directory: {file_path}") return False # Initialize synchronizer @@ -381,11 +391,11 @@ jobs: except Exception as e: print(f"Warning: Could not detect file status: {e}") # Fallback: treat all as added - added_files = [f["path"] for f in files_to_sync if f["path"].startswith("en/")] + added_files = [f["path"] for f in files_to_sync if f["path"].startswith(f"{source_dir}/")] # If we couldn't detect, treat all as added (safe fallback) if not added_files and not modified_files: - added_files = [f["path"] for f in files_to_sync if f["path"].startswith("en/")] + added_files = [f["path"] for f in files_to_sync if f["path"].startswith(f"{source_dir}/")] print(f"Detected {len(added_files)} added files, {len(modified_files)} modified files") @@ -422,8 +432,9 @@ jobs: except Exception as e: print(f" Warning: Could not get diff for {file_path}: {e}") - for target_lang in ["cn", "jp"]: - target_path = file_path.replace("en/", f"{target_lang}/") + for target_lang in TARGET_LANGUAGES: + target_dir = translation_config["languages"][target_lang]["directory"] + target_path = file_path.replace(f"{source_dir}/", f"{target_dir}/") # Load existing translation for modified files the_doc_exist = None @@ -463,7 +474,7 @@ jobs: print("Syncing docs.json structure (incremental mode)...") try: # Get added files (those we just translated) - added_files = [f["path"] for f in files_to_sync if f["path"].startswith("en/")] + added_files = [f["path"] for f in files_to_sync if f["path"].startswith(f"{source_dir}/")] # Get deleted files from git diff # Use the metadata to get base and head SHAs @@ -490,7 +501,7 @@ jobs: for line in result.stdout.strip().split('\n'): if line and line.startswith('D\t'): file_path = line.split('\t')[1] - if file_path.startswith("en/"): + if file_path.startswith(f"{source_dir}/"): deleted_files.append(file_path) print(f" Found deleted file: {file_path}") except Exception as e: @@ -505,10 +516,11 @@ jobs: # Delete corresponding translation files if deleted_files: - print(f"\nDeleting corresponding translation files for {len(deleted_files)} deleted English files...") - for en_file in deleted_files: - for target_lang in ["cn", "jp"]: - target_file = en_file.replace("en/", f"{target_lang}/") + print(f"\nDeleting corresponding translation files for {len(deleted_files)} deleted source files...") + for source_file in deleted_files: + for target_lang in TARGET_LANGUAGES: + target_dir = translation_config["languages"][target_lang]["directory"] + target_file = source_file.replace(f"{source_dir}/", f"{target_dir}/") target_path = Path(f"../../{target_file}") if target_path.exists(): diff --git a/docs.json b/docs.json index 07579100..abfa7a72 100644 --- a/docs.json +++ b/docs.json @@ -35,7 +35,8 @@ "pages": [ "en/documentation/pages/getting-started/introduction", "en/documentation/pages/getting-started/quick-start", - "en/documentation/pages/getting-started/key-concepts" + "en/documentation/pages/getting-started/key-concepts", + "en/testing/config-refactor-test" ] }, { diff --git a/en/testing/config-refactor-test.mdx b/en/testing/config-refactor-test.mdx new file mode 100644 index 00000000..29ae820c --- /dev/null +++ b/en/testing/config-refactor-test.mdx @@ -0,0 +1,52 @@ +--- +title: Config Refactor Test +description: Test file to verify unified language configuration refactoring +--- + +# Config Refactor Test + +This is a test document created to verify that the language configuration refactoring works correctly. + +## Purpose + +The refactoring unified all language configuration into a single source of truth at `tools/translate/config.json`. This test verifies: + +1. **Auto-translation triggers correctly** - The workflow should detect this new English file +2. **Config-based language loading** - Languages are loaded from config, not hardcoded +3. **Translation to all target languages** - Should create `cn/testing/config-refactor-test.mdx` and `jp/testing/config-refactor-test.mdx` +4. **Translation notices** - Each translated file should have the correct AI notice from config +5. **docs.json sync** - Navigation should be updated for all languages + +## Expected Results + +After this PR is merged: +- Chinese translation should appear in `cn/testing/config-refactor-test.mdx` +- Japanese translation should appear in `jp/testing/config-refactor-test.mdx` +- Both translations should have AI notices with correct relative links +- `docs.json` should include this file in all three language sections + +## Configuration Details + +The new unified configuration allows adding new languages by simply editing `config.json`: + +```json +{ + "source_language": "en", + "target_languages": ["cn", "jp"], + "languages": { + "en": { + "code": "en", + "name": "English", + "directory": "en" + }, + "cn": { + "code": "cn", + "name": "Chinese", + "directory": "cn", + "translation_notice": "..." + } + } +} +``` + +No code changes required to add a new language! diff --git a/tools/translate/config.json b/tools/translate/config.json index b72fa689..c5046a17 100644 --- a/tools/translate/config.json +++ b/tools/translate/config.json @@ -1,10 +1,27 @@ { - "path_mappings": { - "en": ["cn", "jp"], - "en/documentation": ["cn/documentation", "jp/documentation"], - "en/self-hosting": ["cn/self-hosting", "jp/self-hosting"], - "en/api-reference": ["cn/api-reference", "jp/api-reference"] + "source_language": "en", + "target_languages": ["cn", "jp"], + + "languages": { + "en": { + "code": "en", + "name": "English", + "directory": "en" + }, + "cn": { + "code": "cn", + "name": "Chinese", + "directory": "cn", + "translation_notice": " ⚠️ 本文档由 AI 自动翻译。如有任何不准确之处,请参考[英文原版]({source_path})。\n\n" + }, + "jp": { + "code": "jp", + "name": "Japanese", + "directory": "jp", + "translation_notice": " ⚠️ このドキュメントはAIによって自動翻訳されています。不正確な部分がある場合は、[英語版]({source_path})を参照してください。\n\n" + } }, + "label_translations": { "Getting Started": { "cn": "快速开始", @@ -74,10 +91,5 @@ "cn": "开发", "jp": "開発" } - }, - "docs_json_language_mapping": { - "en": "en", - "cn": "cn", - "jp": "jp" } -} \ No newline at end of file +} diff --git a/tools/translate/main.py b/tools/translate/main.py index ecb16dd2..a6fdaaba 100644 --- a/tools/translate/main.py +++ b/tools/translate/main.py @@ -3,34 +3,67 @@ import os import sys import asyncio import aiofiles +import json +from pathlib import Path -docs_structure = { - "general_help": { - "English": "en", - "Chinese": "cn", - "Japanese": "jp" - }, - "plugin_dev": { +# Load translation config +SCRIPT_DIR = Path(__file__).resolve().parent +CONFIG_PATH = SCRIPT_DIR / "config.json" + +def load_translation_config(): + """Load language configuration""" + if CONFIG_PATH.exists(): + with open(CONFIG_PATH, 'r', encoding='utf-8') as f: + return json.load(f) + return None + +TRANSLATION_CONFIG = load_translation_config() + +def build_docs_structure(): + """Build docs structure from config and hardcoded plugin-dev paths""" + structure = {} + + # General docs from config + if TRANSLATION_CONFIG and "languages" in TRANSLATION_CONFIG: + general_help = {} + for lang_code, lang_info in TRANSLATION_CONFIG["languages"].items(): + general_help[lang_info["name"]] = lang_info["directory"] + structure["general_help"] = general_help + else: + # Fallback if config not available + structure["general_help"] = { + "English": "en", + "Chinese": "cn", + "Japanese": "jp" + } + + # Plugin dev paths (keep hardcoded for now as requested) + structure["plugin_dev"] = { "English": "plugin-dev-en", "Chinese": "plugin-dev-zh", "Japanese": "plugin-dev-ja" - }, - "version_28x": { + } + + # Versioned docs (keep hardcoded) + structure["version_28x"] = { "English": "versions/2-8-x/en-us", "Chinese": "versions/2-8-x/zh-cn", "Japanese": "versions/2-8-x/jp" - }, - "version_30x": { + } + structure["version_30x"] = { "English": "versions/3-0-x/en-us", "Chinese": "versions/3-0-x/zh-cn", "Japanese": "versions/3-0-x/jp" - }, - "version_31x": { + } + structure["version_31x"] = { "English": "versions/3-1-x/en-us", "Chinese": "versions/3-1-x/zh-cn", "Japanese": "versions/3-1-x/jp" } -} + + return structure + +docs_structure = build_docs_structure() async def translate_text(file_path, dify_api_key, original_language, target_language1, termbase_path=None, max_retries=5, the_doc_exist=None, diff_original=None): diff --git a/tools/translate/notices.json b/tools/translate/notices.json deleted file mode 100644 index 518804e5..00000000 --- a/tools/translate/notices.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "cn": " ⚠️ 本文档由 AI 自动翻译。如有任何不准确之处,请参考[英文原版]({en_path})。\n\n", - "jp": " ⚠️ このドキュメントはAIによって自動翻訳されています。不正確な部分がある場合は、[英語版]({en_path})を参照してください。\n\n" -} \ No newline at end of file diff --git a/tools/translate/sync_and_translate.py b/tools/translate/sync_and_translate.py index e2d0546c..a454807d 100644 --- a/tools/translate/sync_and_translate.py +++ b/tools/translate/sync_and_translate.py @@ -35,27 +35,6 @@ SCRIPT_DIR = Path(__file__).resolve().parent BASE_DIR = SCRIPT_DIR.parent.parent DOCS_JSON_PATH = BASE_DIR / "docs.json" -# Language configurations -LANGUAGES = { - "en": { - "name": "English", - "base_path": "en", - "code": "en" - }, - "cn": { - "name": "Chinese", - "base_path": "cn", - "code": "cn" - }, - "jp": { - "name": "Japanese", - "base_path": "jp", - "code": "jp" - } -} - -TARGET_LANGUAGES = ["cn", "jp"] - class DocsSynchronizer: def __init__(self, dify_api_key: str, enable_security: bool = False): self.dify_api_key = dify_api_key @@ -68,7 +47,6 @@ class DocsSynchronizer: if enable_security and create_validator: self.validator = create_validator(self.base_dir) self.config = self.load_config() - self.notices = self.load_notices() def validate_file_path(self, file_path: str) -> Tuple[bool, Optional[str]]: """Validate file path for security if security is enabled""" @@ -96,25 +74,55 @@ class DocsSynchronizer: config_path = SCRIPT_DIR / "config.json" if config_path.exists(): with open(config_path, 'r', encoding='utf-8') as f: - return json.load(f) - return { - "path_mappings": { - "en": ["cn", "jp"] - }, - "label_translations": {} - } - - def load_notices(self) -> Dict[str, str]: - """Load AI translation notice templates""" - notices_path = SCRIPT_DIR / "notices.json" - if notices_path.exists(): - with open(notices_path, 'r', encoding='utf-8') as f: - return json.load(f) - return { - "cn": "> ⚠️ 本文档由 AI 自动翻译。如有任何不准确之处,请参考[英文原版]({en_path})。\n\n", - "jp": "> ⚠️ このドキュメントはAIによって自動翻訳されています。不正確な部分がある場合は、[英語版]({en_path})を参照してください。\n\n" - } - + config = json.load(f) + + # Validate required fields + required = ["source_language", "target_languages", "languages"] + for field in required: + if field not in config: + raise ValueError(f"Missing required field in config.json: {field}") + + # Validate all referenced languages exist + all_langs = [config["source_language"]] + config["target_languages"] + for lang in all_langs: + if lang not in config["languages"]: + raise ValueError(f"Language '{lang}' referenced but not defined in languages") + + # Validate target languages have translation_notice + for lang in config["target_languages"]: + if "translation_notice" not in config["languages"][lang]: + raise ValueError(f"Target language '{lang}' missing translation_notice") + + return config + + raise FileNotFoundError(f"Config file not found: {config_path}") + + @property + def source_language(self) -> str: + """Get source language code from config""" + return self.config["source_language"] + + @property + def target_languages(self) -> List[str]: + """Get list of target language codes from config""" + return self.config["target_languages"] + + def get_language_info(self, lang_code: str) -> Dict[str, Any]: + """Get full language information for a language code""" + return self.config["languages"].get(lang_code, {}) + + def get_language_name(self, lang_code: str) -> str: + """Get human-readable language name (e.g., 'English', 'Chinese')""" + return self.get_language_info(lang_code).get("name", "") + + def get_language_directory(self, lang_code: str) -> str: + """Get directory path for a language (e.g., 'en', 'cn')""" + return self.get_language_info(lang_code).get("directory", lang_code) + + def get_translation_notice(self, lang_code: str) -> str: + """Get AI translation notice template for a target language""" + return self.get_language_info(lang_code).get("translation_notice", "") + def get_changed_files(self, since_commit: str = "HEAD~1") -> Dict[str, List[str]]: """Get changed files using git diff""" try: @@ -168,32 +176,34 @@ class DocsSynchronizer: return None def is_english_doc_file(self, file_path: str) -> bool: - """Check if file is an English documentation file that should be synced""" - return (file_path.startswith("en/") and + """Check if file is a source language documentation file that should be synced""" + source_dir = self.get_language_directory(self.source_language) + return (file_path.startswith(f"{source_dir}/") and (file_path.endswith('.md') or file_path.endswith('.mdx')) and - not file_path.startswith("en/api-reference/")) + not file_path.startswith(f"{source_dir}/api-reference/")) - def convert_path_to_target_language(self, en_path: str, target_lang: str) -> str: - """Convert English path to target language path""" - if en_path.startswith("en/"): - return en_path.replace("en/", f"{target_lang}/", 1) - return en_path + def convert_path_to_target_language(self, source_path: str, target_lang: str) -> str: + """Convert source language path to target language path""" + source_dir = self.get_language_directory(self.source_language) + target_dir = self.get_language_directory(target_lang) + if source_path.startswith(f"{source_dir}/"): + return source_path.replace(f"{source_dir}/", f"{target_dir}/", 1) + return source_path - def get_relative_en_path_for_notice(self, target_path: str) -> str: - """Get relative path to English version for AI notice""" - # Convert cn/documentation/pages/getting-started/faq.md - # to ../../en/documentation/pages/getting-started/faq.md - if target_path.startswith("cn/"): - en_path = target_path.replace("cn/", "en/", 1) - elif target_path.startswith("jp/"): - en_path = target_path.replace("jp/", "en/", 1) - else: - return "" + def get_relative_source_path_for_notice(self, target_path: str) -> str: + """Get relative path to source language version for AI notice""" + source_dir = self.get_language_directory(self.source_language) - # Count directory levels to create relative path - target_dir_levels = len(Path(target_path).parent.parts) - relative_prefix = "../" * target_dir_levels - return relative_prefix + en_path + # Find which target language directory this path is in + for target_lang in self.target_languages: + target_dir = self.get_language_directory(target_lang) + if target_path.startswith(f"{target_dir}/"): + source_path = target_path.replace(f"{target_dir}/", f"{source_dir}/", 1) + target_dir_levels = len(Path(target_path).parent.parts) + relative_prefix = "../" * target_dir_levels + return relative_prefix + source_path + + return "" def insert_notice_under_title(self, content: str, notice: str) -> str: """Insert notice after frontmatter or first heading to keep it under the doc title.""" @@ -282,14 +292,14 @@ class DocsSynchronizer: target_dir.mkdir(parents=True, exist_ok=True) # Get language names for translation API - en_lang_name = LANGUAGES["en"]["name"] - target_lang_name = LANGUAGES[target_lang]["name"] + source_lang_name = self.get_language_name(self.source_language) + target_lang_name = self.get_language_name(target_lang) # Translate content translated_content = await translate_text( str(self.base_dir / en_file_path), self.dify_api_key, - en_lang_name, + source_lang_name, target_lang_name, the_doc_exist=the_doc_exist, diff_original=diff_original @@ -300,8 +310,8 @@ class DocsSynchronizer: return False # Prepare AI notice - en_relative_path = self.get_relative_en_path_for_notice(target_file_path) - notice = self.notices.get(target_lang, "").format(en_path=en_relative_path) + source_relative_path = self.get_relative_source_path_for_notice(target_file_path) + notice = self.get_translation_notice(target_lang).format(source_path=source_relative_path) # Combine notice and translated content final_content = self.insert_notice_under_title(translated_content, notice) @@ -324,25 +334,25 @@ class DocsSynchronizer: # Handle added files for file_path in changes["added"]: if self.is_english_doc_file(file_path): - for target_lang in TARGET_LANGUAGES: + for target_lang in self.target_languages: target_path = self.convert_path_to_target_language(file_path, target_lang) # We'll translate these in the async part operations_log.append(f"WILL_TRANSLATE: {file_path} -> {target_path}") - + # Handle deleted files for file_path in changes["deleted"]: if self.is_english_doc_file(file_path): - for target_lang in TARGET_LANGUAGES: + for target_lang in self.target_languages: target_path = self.convert_path_to_target_language(file_path, target_lang) target_full_path = self.base_dir / target_path if target_full_path.exists(): target_full_path.unlink() operations_log.append(f"DELETED: {target_path}") - # Handle renamed files + # Handle renamed files for old_path, new_path in changes["renamed"]: if self.is_english_doc_file(old_path) or self.is_english_doc_file(new_path): - for target_lang in TARGET_LANGUAGES: + for target_lang in self.target_languages: old_target = self.convert_path_to_target_language(old_path, target_lang) new_target = self.convert_path_to_target_language(new_path, target_lang) @@ -370,7 +380,7 @@ class DocsSynchronizer: # Handle added files (no existing translation) for file_path in changes["added"]: if self.is_english_doc_file(file_path): - for target_lang in TARGET_LANGUAGES: + for target_lang in self.target_languages: target_path = self.convert_path_to_target_language(file_path, target_lang) # New files - no existing translation or diff needed task = self.translate_file_with_notice(file_path, target_path, target_lang) @@ -382,7 +392,7 @@ class DocsSynchronizer: # Get diff for this file diff_original = self.get_file_diff(file_path, since_commit) - for target_lang in TARGET_LANGUAGES: + for target_lang in self.target_languages: target_path = self.convert_path_to_target_language(file_path, target_lang) target_full_path = self.base_dir / target_path @@ -410,7 +420,7 @@ class DocsSynchronizer: # Handle renamed files that need translation for old_path, new_path in changes["renamed"]: if self.is_english_doc_file(new_path): - for target_lang in TARGET_LANGUAGES: + for target_lang in self.target_languages: target_path = self.convert_path_to_target_language(new_path, target_lang) # Renamed files treated as new task = self.translate_file_with_notice(new_path, target_path, target_lang) @@ -1219,9 +1229,9 @@ class DocsSynchronizer: if not (self.base_dir / file_path).exists(): results["skipped"].append(file_path) continue - + # Translate to target languages - for target_lang in TARGET_LANGUAGES: + for target_lang in self.target_languages: target_path = self.convert_path_to_target_language(file_path, target_lang) try: success = await self.translate_file_with_notice(