mirror of
https://github.com/langgenius/dify-docs.git
synced 2026-03-26 13:18:34 +07:00
support configuring ignored files in auto sync (#629)
* Add ignore_files config to exclude specific files from translation Adds ability to specify source language files that should not be translated: - New `ignore_files` array in config.json - Validation ensures paths start with source dir, have valid extension, no traversal - Filtering applied in PRAnalyzer.categorize_files() and SyncPlanGenerator.generate_sync_plan() 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * update config --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -2,6 +2,10 @@
|
||||
"source_language": "en",
|
||||
"target_languages": ["zh", "ja"],
|
||||
|
||||
"ignore_files": [
|
||||
"en/self-host/configuration/environments.mdx"
|
||||
],
|
||||
|
||||
"processing_limits": {
|
||||
"max_files_per_run": 10,
|
||||
"max_openapi_files_per_run": 5
|
||||
|
||||
@@ -27,6 +27,9 @@ class PRAnalyzer:
|
||||
self.source_language = self.config.get('source_language', 'en')
|
||||
self.target_languages = self.config.get('target_languages', ['zh', 'ja'])
|
||||
|
||||
# Load and validate ignore files
|
||||
self.ignore_files = self._load_ignore_files()
|
||||
|
||||
def _load_config(self) -> Dict:
|
||||
"""Load translation configuration."""
|
||||
config_path = Path(__file__).parent / "config.json"
|
||||
@@ -35,6 +38,55 @@ class PRAnalyzer:
|
||||
return json.load(f)
|
||||
return {}
|
||||
|
||||
def _load_ignore_files(self) -> List[str]:
|
||||
"""Load and validate ignore_files configuration.
|
||||
|
||||
Validates that:
|
||||
- Each path starts with source language directory
|
||||
- No directory traversal (..)
|
||||
- Valid file extension (.md, .mdx)
|
||||
|
||||
Returns:
|
||||
List of validated ignore file paths
|
||||
"""
|
||||
ignore_files = self.config.get('ignore_files', [])
|
||||
if not ignore_files:
|
||||
return []
|
||||
|
||||
validated = []
|
||||
source_dir = self.get_language_directory(self.source_language)
|
||||
|
||||
for path in ignore_files:
|
||||
# Must start with source language directory
|
||||
if not path.startswith(f"{source_dir}/"):
|
||||
print(f"Warning: Ignore path must start with '{source_dir}/': {path} (skipping)")
|
||||
continue
|
||||
|
||||
# No directory traversal
|
||||
if ".." in path:
|
||||
print(f"Warning: Invalid ignore path (contains '..'): {path} (skipping)")
|
||||
continue
|
||||
|
||||
# Must have valid extension
|
||||
if not any(path.endswith(ext) for ext in ['.md', '.mdx']):
|
||||
print(f"Warning: Ignore path must end with .md or .mdx: {path} (skipping)")
|
||||
continue
|
||||
|
||||
validated.append(path)
|
||||
|
||||
return validated
|
||||
|
||||
def _is_file_ignored(self, file_path: str) -> bool:
|
||||
"""Check if a file should be ignored from translation.
|
||||
|
||||
Args:
|
||||
file_path: Path to check (e.g., 'en/guides/some-file.md')
|
||||
|
||||
Returns:
|
||||
True if file is in ignore list, False otherwise
|
||||
"""
|
||||
return file_path in self.ignore_files
|
||||
|
||||
def get_language_directory(self, lang_code: str) -> str:
|
||||
"""Get directory name for a language code from config."""
|
||||
if 'languages' in self.config and lang_code in self.config['languages']:
|
||||
@@ -184,16 +236,19 @@ class PRAnalyzer:
|
||||
if file == 'docs.json':
|
||||
categories['docs_json'].append(file)
|
||||
elif file.startswith(f'{source_dir}/'):
|
||||
if file.endswith(('.md', '.mdx')):
|
||||
# Check if file is in ignore list
|
||||
if self._is_file_ignored(file):
|
||||
categories['other'].append(file) # Treat as 'other' so it's not processed
|
||||
elif file.endswith(('.md', '.mdx')):
|
||||
categories['source'].append(file)
|
||||
elif self.is_openapi_file(file): # NEW
|
||||
elif self.is_openapi_file(file):
|
||||
categories['source_openapi'].append(file)
|
||||
else:
|
||||
categories['other'].append(file)
|
||||
elif any(file.startswith(f'{target_dir}/') for target_dir in target_dirs):
|
||||
if file.endswith(('.md', '.mdx')):
|
||||
categories['translation'].append(file)
|
||||
elif self.is_openapi_file(file): # NEW
|
||||
elif self.is_openapi_file(file):
|
||||
categories['translation_openapi'].append(file)
|
||||
else:
|
||||
categories['other'].append(file)
|
||||
@@ -462,6 +517,10 @@ class SyncPlanGenerator:
|
||||
docs_json_changed = True
|
||||
continue
|
||||
|
||||
# Skip ignored files
|
||||
if self.analyzer._is_file_ignored(filepath):
|
||||
continue
|
||||
|
||||
# Process source language markdown files
|
||||
if filepath.startswith('en/') and filepath.endswith(('.md', '.mdx')):
|
||||
file_size = self.get_file_size(filepath)
|
||||
|
||||
Reference in New Issue
Block a user