refactor: unify language config into single source of truth

## Changes

### Unified Configuration
- Centralized all language configuration in `tools/translate/config.json`
- Added `source_language`, `target_languages`, and `languages` structure
- Merged translation notices from `notices.json` into language configs
- Each language now has: code, name, directory, and translation_notice

### Updated sync_and_translate.py
- Removed hardcoded LANGUAGES dict and TARGET_LANGUAGES list
- Enhanced load_config() with validation
- Added helper methods for language info access
- All methods now use config-based language properties

### Updated main.py
- Added config loading at module level
- Dynamically builds docs_structure from config
- Keeps plugin-dev/versioned paths hardcoded as requested

### Updated workflow
- .github/workflows/sync_docs_execute.yml now loads config
- Replaced all hardcoded language references with config values

### Cleanup
- Removed deprecated notices.json

### Test File
- Added en/testing/config-refactor-test.mdx to test the refactoring
- Added to docs.json to trigger auto-translation workflow

## Benefits
- Single source of truth for language configuration
- Adding new languages requires only config.json changes
- No code changes needed to add/modify languages
- Better validation and error handling

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Gu
2025-10-31 10:36:51 -07:00
parent e9571f2db3
commit ad6a4a61c7
7 changed files with 237 additions and 121 deletions

View File

@@ -317,6 +317,16 @@ jobs:
async def secure_sync():
work_dir = sys.argv[1]
# Load translation config
config_path = Path("../../tools/translate/config.json")
with open(config_path, 'r', encoding='utf-8') as f:
translation_config = json.load(f)
# Get language settings from config
SOURCE_LANGUAGE = translation_config.get("source_language", "en")
TARGET_LANGUAGES = translation_config.get("target_languages", ["cn", "jp"])
source_dir = translation_config["languages"][SOURCE_LANGUAGE]["directory"]
# Load sync plan
with open(f"{work_dir}/sync_plan.json") as f:
sync_plan = json.load(f)
@@ -333,9 +343,9 @@ jobs:
print(f"Security error: Invalid path {file_path}")
return False
# Allow en/ files and docs.json
if not (file_path.startswith("en/") or file_path == "docs.json"):
print(f"Security error: File outside en/ directory: {file_path}")
# Allow source language files and docs.json
if not (file_path.startswith(f"{source_dir}/") or file_path == "docs.json"):
print(f"Security error: File outside {source_dir}/ directory: {file_path}")
return False
# Initialize synchronizer
@@ -381,11 +391,11 @@ jobs:
except Exception as e:
print(f"Warning: Could not detect file status: {e}")
# Fallback: treat all as added
added_files = [f["path"] for f in files_to_sync if f["path"].startswith("en/")]
added_files = [f["path"] for f in files_to_sync if f["path"].startswith(f"{source_dir}/")]
# If we couldn't detect, treat all as added (safe fallback)
if not added_files and not modified_files:
added_files = [f["path"] for f in files_to_sync if f["path"].startswith("en/")]
added_files = [f["path"] for f in files_to_sync if f["path"].startswith(f"{source_dir}/")]
print(f"Detected {len(added_files)} added files, {len(modified_files)} modified files")
@@ -422,8 +432,9 @@ jobs:
except Exception as e:
print(f" Warning: Could not get diff for {file_path}: {e}")
for target_lang in ["cn", "jp"]:
target_path = file_path.replace("en/", f"{target_lang}/")
for target_lang in TARGET_LANGUAGES:
target_dir = translation_config["languages"][target_lang]["directory"]
target_path = file_path.replace(f"{source_dir}/", f"{target_dir}/")
# Load existing translation for modified files
the_doc_exist = None
@@ -463,7 +474,7 @@ jobs:
print("Syncing docs.json structure (incremental mode)...")
try:
# Get added files (those we just translated)
added_files = [f["path"] for f in files_to_sync if f["path"].startswith("en/")]
added_files = [f["path"] for f in files_to_sync if f["path"].startswith(f"{source_dir}/")]
# Get deleted files from git diff
# Use the metadata to get base and head SHAs
@@ -490,7 +501,7 @@ jobs:
for line in result.stdout.strip().split('\n'):
if line and line.startswith('D\t'):
file_path = line.split('\t')[1]
if file_path.startswith("en/"):
if file_path.startswith(f"{source_dir}/"):
deleted_files.append(file_path)
print(f" Found deleted file: {file_path}")
except Exception as e:
@@ -505,10 +516,11 @@ jobs:
# Delete corresponding translation files
if deleted_files:
print(f"\nDeleting corresponding translation files for {len(deleted_files)} deleted English files...")
for en_file in deleted_files:
for target_lang in ["cn", "jp"]:
target_file = en_file.replace("en/", f"{target_lang}/")
print(f"\nDeleting corresponding translation files for {len(deleted_files)} deleted source files...")
for source_file in deleted_files:
for target_lang in TARGET_LANGUAGES:
target_dir = translation_config["languages"][target_lang]["directory"]
target_file = source_file.replace(f"{source_dir}/", f"{target_dir}/")
target_path = Path(f"../../{target_file}")
if target_path.exists():

View File

@@ -35,7 +35,8 @@
"pages": [
"en/documentation/pages/getting-started/introduction",
"en/documentation/pages/getting-started/quick-start",
"en/documentation/pages/getting-started/key-concepts"
"en/documentation/pages/getting-started/key-concepts",
"en/testing/config-refactor-test"
]
},
{

View File

@@ -0,0 +1,52 @@
---
title: Config Refactor Test
description: Test file to verify unified language configuration refactoring
---
# Config Refactor Test
This is a test document created to verify that the language configuration refactoring works correctly.
## Purpose
The refactoring unified all language configuration into a single source of truth at `tools/translate/config.json`. This test verifies:
1. **Auto-translation triggers correctly** - The workflow should detect this new English file
2. **Config-based language loading** - Languages are loaded from config, not hardcoded
3. **Translation to all target languages** - Should create `cn/testing/config-refactor-test.mdx` and `jp/testing/config-refactor-test.mdx`
4. **Translation notices** - Each translated file should have the correct AI notice from config
5. **docs.json sync** - Navigation should be updated for all languages
## Expected Results
After this PR is merged:
- Chinese translation should appear in `cn/testing/config-refactor-test.mdx`
- Japanese translation should appear in `jp/testing/config-refactor-test.mdx`
- Both translations should have AI notices with correct relative links
- `docs.json` should include this file in all three language sections
## Configuration Details
The new unified configuration allows adding new languages by simply editing `config.json`:
```json
{
"source_language": "en",
"target_languages": ["cn", "jp"],
"languages": {
"en": {
"code": "en",
"name": "English",
"directory": "en"
},
"cn": {
"code": "cn",
"name": "Chinese",
"directory": "cn",
"translation_notice": "..."
}
}
}
```
No code changes required to add a new language!

View File

@@ -1,10 +1,27 @@
{
"path_mappings": {
"en": ["cn", "jp"],
"en/documentation": ["cn/documentation", "jp/documentation"],
"en/self-hosting": ["cn/self-hosting", "jp/self-hosting"],
"en/api-reference": ["cn/api-reference", "jp/api-reference"]
"source_language": "en",
"target_languages": ["cn", "jp"],
"languages": {
"en": {
"code": "en",
"name": "English",
"directory": "en"
},
"cn": {
"code": "cn",
"name": "Chinese",
"directory": "cn",
"translation_notice": "<Note> ⚠️ 本文档由 AI 自动翻译。如有任何不准确之处,请参考[英文原版]({source_path})。</Note>\n\n"
},
"jp": {
"code": "jp",
"name": "Japanese",
"directory": "jp",
"translation_notice": "<Note> ⚠️ このドキュメントはAIによって自動翻訳されています。不正確な部分がある場合は、[英語版]({source_path})を参照してください。</Note>\n\n"
}
},
"label_translations": {
"Getting Started": {
"cn": "快速开始",
@@ -74,10 +91,5 @@
"cn": "开发",
"jp": "開発"
}
},
"docs_json_language_mapping": {
"en": "en",
"cn": "cn",
"jp": "jp"
}
}
}

View File

@@ -3,34 +3,67 @@ import os
import sys
import asyncio
import aiofiles
import json
from pathlib import Path
docs_structure = {
"general_help": {
"English": "en",
"Chinese": "cn",
"Japanese": "jp"
},
"plugin_dev": {
# Load translation config
SCRIPT_DIR = Path(__file__).resolve().parent
CONFIG_PATH = SCRIPT_DIR / "config.json"
def load_translation_config():
"""Load language configuration"""
if CONFIG_PATH.exists():
with open(CONFIG_PATH, 'r', encoding='utf-8') as f:
return json.load(f)
return None
TRANSLATION_CONFIG = load_translation_config()
def build_docs_structure():
"""Build docs structure from config and hardcoded plugin-dev paths"""
structure = {}
# General docs from config
if TRANSLATION_CONFIG and "languages" in TRANSLATION_CONFIG:
general_help = {}
for lang_code, lang_info in TRANSLATION_CONFIG["languages"].items():
general_help[lang_info["name"]] = lang_info["directory"]
structure["general_help"] = general_help
else:
# Fallback if config not available
structure["general_help"] = {
"English": "en",
"Chinese": "cn",
"Japanese": "jp"
}
# Plugin dev paths (keep hardcoded for now as requested)
structure["plugin_dev"] = {
"English": "plugin-dev-en",
"Chinese": "plugin-dev-zh",
"Japanese": "plugin-dev-ja"
},
"version_28x": {
}
# Versioned docs (keep hardcoded)
structure["version_28x"] = {
"English": "versions/2-8-x/en-us",
"Chinese": "versions/2-8-x/zh-cn",
"Japanese": "versions/2-8-x/jp"
},
"version_30x": {
}
structure["version_30x"] = {
"English": "versions/3-0-x/en-us",
"Chinese": "versions/3-0-x/zh-cn",
"Japanese": "versions/3-0-x/jp"
},
"version_31x": {
}
structure["version_31x"] = {
"English": "versions/3-1-x/en-us",
"Chinese": "versions/3-1-x/zh-cn",
"Japanese": "versions/3-1-x/jp"
}
}
return structure
docs_structure = build_docs_structure()
async def translate_text(file_path, dify_api_key, original_language, target_language1, termbase_path=None, max_retries=5, the_doc_exist=None, diff_original=None):

View File

@@ -1,4 +0,0 @@
{
"cn": "<Note> ⚠️ 本文档由 AI 自动翻译。如有任何不准确之处,请参考[英文原版]({en_path})。</Note>\n\n",
"jp": "<Note> ⚠️ このドキュメントはAIによって自動翻訳されています。不正確な部分がある場合は、[英語版]({en_path})を参照してください。</Note>\n\n"
}

View File

@@ -35,27 +35,6 @@ SCRIPT_DIR = Path(__file__).resolve().parent
BASE_DIR = SCRIPT_DIR.parent.parent
DOCS_JSON_PATH = BASE_DIR / "docs.json"
# Language configurations
LANGUAGES = {
"en": {
"name": "English",
"base_path": "en",
"code": "en"
},
"cn": {
"name": "Chinese",
"base_path": "cn",
"code": "cn"
},
"jp": {
"name": "Japanese",
"base_path": "jp",
"code": "jp"
}
}
TARGET_LANGUAGES = ["cn", "jp"]
class DocsSynchronizer:
def __init__(self, dify_api_key: str, enable_security: bool = False):
self.dify_api_key = dify_api_key
@@ -68,7 +47,6 @@ class DocsSynchronizer:
if enable_security and create_validator:
self.validator = create_validator(self.base_dir)
self.config = self.load_config()
self.notices = self.load_notices()
def validate_file_path(self, file_path: str) -> Tuple[bool, Optional[str]]:
"""Validate file path for security if security is enabled"""
@@ -96,25 +74,55 @@ class DocsSynchronizer:
config_path = SCRIPT_DIR / "config.json"
if config_path.exists():
with open(config_path, 'r', encoding='utf-8') as f:
return json.load(f)
return {
"path_mappings": {
"en": ["cn", "jp"]
},
"label_translations": {}
}
def load_notices(self) -> Dict[str, str]:
"""Load AI translation notice templates"""
notices_path = SCRIPT_DIR / "notices.json"
if notices_path.exists():
with open(notices_path, 'r', encoding='utf-8') as f:
return json.load(f)
return {
"cn": "> ⚠️ 本文档由 AI 自动翻译。如有任何不准确之处,请参考[英文原版]({en_path})。\n\n",
"jp": "> ⚠️ このドキュメントはAIによって自動翻訳されています。不正確な部分がある場合は、[英語版]({en_path})を参照してください。\n\n"
}
config = json.load(f)
# Validate required fields
required = ["source_language", "target_languages", "languages"]
for field in required:
if field not in config:
raise ValueError(f"Missing required field in config.json: {field}")
# Validate all referenced languages exist
all_langs = [config["source_language"]] + config["target_languages"]
for lang in all_langs:
if lang not in config["languages"]:
raise ValueError(f"Language '{lang}' referenced but not defined in languages")
# Validate target languages have translation_notice
for lang in config["target_languages"]:
if "translation_notice" not in config["languages"][lang]:
raise ValueError(f"Target language '{lang}' missing translation_notice")
return config
raise FileNotFoundError(f"Config file not found: {config_path}")
@property
def source_language(self) -> str:
"""Get source language code from config"""
return self.config["source_language"]
@property
def target_languages(self) -> List[str]:
"""Get list of target language codes from config"""
return self.config["target_languages"]
def get_language_info(self, lang_code: str) -> Dict[str, Any]:
"""Get full language information for a language code"""
return self.config["languages"].get(lang_code, {})
def get_language_name(self, lang_code: str) -> str:
"""Get human-readable language name (e.g., 'English', 'Chinese')"""
return self.get_language_info(lang_code).get("name", "")
def get_language_directory(self, lang_code: str) -> str:
"""Get directory path for a language (e.g., 'en', 'cn')"""
return self.get_language_info(lang_code).get("directory", lang_code)
def get_translation_notice(self, lang_code: str) -> str:
"""Get AI translation notice template for a target language"""
return self.get_language_info(lang_code).get("translation_notice", "")
def get_changed_files(self, since_commit: str = "HEAD~1") -> Dict[str, List[str]]:
"""Get changed files using git diff"""
try:
@@ -168,32 +176,34 @@ class DocsSynchronizer:
return None
def is_english_doc_file(self, file_path: str) -> bool:
"""Check if file is an English documentation file that should be synced"""
return (file_path.startswith("en/") and
"""Check if file is a source language documentation file that should be synced"""
source_dir = self.get_language_directory(self.source_language)
return (file_path.startswith(f"{source_dir}/") and
(file_path.endswith('.md') or file_path.endswith('.mdx')) and
not file_path.startswith("en/api-reference/"))
not file_path.startswith(f"{source_dir}/api-reference/"))
def convert_path_to_target_language(self, en_path: str, target_lang: str) -> str:
"""Convert English path to target language path"""
if en_path.startswith("en/"):
return en_path.replace("en/", f"{target_lang}/", 1)
return en_path
def convert_path_to_target_language(self, source_path: str, target_lang: str) -> str:
"""Convert source language path to target language path"""
source_dir = self.get_language_directory(self.source_language)
target_dir = self.get_language_directory(target_lang)
if source_path.startswith(f"{source_dir}/"):
return source_path.replace(f"{source_dir}/", f"{target_dir}/", 1)
return source_path
def get_relative_en_path_for_notice(self, target_path: str) -> str:
"""Get relative path to English version for AI notice"""
# Convert cn/documentation/pages/getting-started/faq.md
# to ../../en/documentation/pages/getting-started/faq.md
if target_path.startswith("cn/"):
en_path = target_path.replace("cn/", "en/", 1)
elif target_path.startswith("jp/"):
en_path = target_path.replace("jp/", "en/", 1)
else:
return ""
def get_relative_source_path_for_notice(self, target_path: str) -> str:
"""Get relative path to source language version for AI notice"""
source_dir = self.get_language_directory(self.source_language)
# Count directory levels to create relative path
target_dir_levels = len(Path(target_path).parent.parts)
relative_prefix = "../" * target_dir_levels
return relative_prefix + en_path
# Find which target language directory this path is in
for target_lang in self.target_languages:
target_dir = self.get_language_directory(target_lang)
if target_path.startswith(f"{target_dir}/"):
source_path = target_path.replace(f"{target_dir}/", f"{source_dir}/", 1)
target_dir_levels = len(Path(target_path).parent.parts)
relative_prefix = "../" * target_dir_levels
return relative_prefix + source_path
return ""
def insert_notice_under_title(self, content: str, notice: str) -> str:
"""Insert notice after frontmatter or first heading to keep it under the doc title."""
@@ -282,14 +292,14 @@ class DocsSynchronizer:
target_dir.mkdir(parents=True, exist_ok=True)
# Get language names for translation API
en_lang_name = LANGUAGES["en"]["name"]
target_lang_name = LANGUAGES[target_lang]["name"]
source_lang_name = self.get_language_name(self.source_language)
target_lang_name = self.get_language_name(target_lang)
# Translate content
translated_content = await translate_text(
str(self.base_dir / en_file_path),
self.dify_api_key,
en_lang_name,
source_lang_name,
target_lang_name,
the_doc_exist=the_doc_exist,
diff_original=diff_original
@@ -300,8 +310,8 @@ class DocsSynchronizer:
return False
# Prepare AI notice
en_relative_path = self.get_relative_en_path_for_notice(target_file_path)
notice = self.notices.get(target_lang, "").format(en_path=en_relative_path)
source_relative_path = self.get_relative_source_path_for_notice(target_file_path)
notice = self.get_translation_notice(target_lang).format(source_path=source_relative_path)
# Combine notice and translated content
final_content = self.insert_notice_under_title(translated_content, notice)
@@ -324,25 +334,25 @@ class DocsSynchronizer:
# Handle added files
for file_path in changes["added"]:
if self.is_english_doc_file(file_path):
for target_lang in TARGET_LANGUAGES:
for target_lang in self.target_languages:
target_path = self.convert_path_to_target_language(file_path, target_lang)
# We'll translate these in the async part
operations_log.append(f"WILL_TRANSLATE: {file_path} -> {target_path}")
# Handle deleted files
for file_path in changes["deleted"]:
if self.is_english_doc_file(file_path):
for target_lang in TARGET_LANGUAGES:
for target_lang in self.target_languages:
target_path = self.convert_path_to_target_language(file_path, target_lang)
target_full_path = self.base_dir / target_path
if target_full_path.exists():
target_full_path.unlink()
operations_log.append(f"DELETED: {target_path}")
# Handle renamed files
# Handle renamed files
for old_path, new_path in changes["renamed"]:
if self.is_english_doc_file(old_path) or self.is_english_doc_file(new_path):
for target_lang in TARGET_LANGUAGES:
for target_lang in self.target_languages:
old_target = self.convert_path_to_target_language(old_path, target_lang)
new_target = self.convert_path_to_target_language(new_path, target_lang)
@@ -370,7 +380,7 @@ class DocsSynchronizer:
# Handle added files (no existing translation)
for file_path in changes["added"]:
if self.is_english_doc_file(file_path):
for target_lang in TARGET_LANGUAGES:
for target_lang in self.target_languages:
target_path = self.convert_path_to_target_language(file_path, target_lang)
# New files - no existing translation or diff needed
task = self.translate_file_with_notice(file_path, target_path, target_lang)
@@ -382,7 +392,7 @@ class DocsSynchronizer:
# Get diff for this file
diff_original = self.get_file_diff(file_path, since_commit)
for target_lang in TARGET_LANGUAGES:
for target_lang in self.target_languages:
target_path = self.convert_path_to_target_language(file_path, target_lang)
target_full_path = self.base_dir / target_path
@@ -410,7 +420,7 @@ class DocsSynchronizer:
# Handle renamed files that need translation
for old_path, new_path in changes["renamed"]:
if self.is_english_doc_file(new_path):
for target_lang in TARGET_LANGUAGES:
for target_lang in self.target_languages:
target_path = self.convert_path_to_target_language(new_path, target_lang)
# Renamed files treated as new
task = self.translate_file_with_notice(new_path, target_path, target_lang)
@@ -1219,9 +1229,9 @@ class DocsSynchronizer:
if not (self.base_dir / file_path).exists():
results["skipped"].append(file_path)
continue
# Translate to target languages
for target_lang in TARGET_LANGUAGES:
for target_lang in self.target_languages:
target_path = self.convert_path_to_target_language(file_path, target_lang)
try:
success = await self.translate_file_with_notice(