diff --git a/tools/letsgo.py b/tools/apply_docs_json.py similarity index 90% rename from tools/letsgo.py rename to tools/apply_docs_json.py index e17fa2df..3638ea89 100644 --- a/tools/letsgo.py +++ b/tools/apply_docs_json.py @@ -89,6 +89,48 @@ PLUGIN_DEV_EN = { ] } +# --- 日本語設定 (Japanese Configuration) --- +PLUGIN_DEV_JA = { + "DOCS_DIR": "plugin_dev_ja", # プラグイン開発ドキュメントディレクトリ + "LANGUAGE_CODE": "日本語", #注意:変数名は LANGUAGE_CODE ですが、docs.json の 'version' 値としてデプロイされます。 + "FILE_EXTENSION": ".ja.mdx", + "TARGET_TAB_NAME": "プラグイン開発", # 対象タブ名 + "FILENAME_PATTERN": re.compile(r'^(\d{4})-(.*?)\.ja\.mdx$'), # ファイル名照合パターン + "PWX_TO_GROUP_MAP": { + # --- PWX からグループ名へのマッピング(「プラグイン開発」タブに統一)--- + # (P, W, X) -> (tab_name, group_name, nested_group_name) + # Tab: プラグイン開発 + # Group: 概念と概要 + ('0', '1', '1'): ("プラグイン開発", "概念と概要", "概要"), + ('0', '1', '3'): ("プラグイン開発", "概念と概要", None), + # Group: 開発実践 + ('0', '2', '1'): ("プラグイン開発", "開発実践", "クイックスタート"), + ('0', '2', '2'): ("プラグイン開発", "開発実践", "Difyプラグインの開発"), + # Group: 貢献と公開 + ('0', '3', '1'): ("プラグイン開発", "貢献と公開", "行動規範と基準"), + ('0', '3', '2'): ("プラグイン開発", "貢献と公開", "公開と掲載"), + ('0', '3', '3'): ("プラグイン開発", "貢献と公開", "よくある質問 (FAQ)"), + # Group: 実践例とユースケース + ('0', '4', '3'): ("プラグイン開発", "実践例とユースケース", "開発例"), + # Group: 高度な開発 + ('9', '2', '2'): ("プラグイン開発", "高度な開発", "Extension と Agent"), + ('9', '2', '3'): ("プラグイン開発", "高度な開発", "Extension と Agent"), + ('9', '4', '3'): ("プラグイン開発", "高度な開発", "Extension と Agent"), + ('9', '2', '4'): ("プラグイン開発", "高度な開発", "リバースコール"), # Reverse Calling + # Group: リファレンスと仕様 + ('0', '4', '1'): ("プラグイン開発", "リファレンスと仕様", "コア仕様と機能"), + }, + "DESIRED_GROUP_ORDER": [ + "概念と概要", + "開発実践", + "貢献と公開", + "実践例とユースケース", + "高度な開発", + "リファレンスと仕様" # これが最後になるように確認 + ] +} + + # --- 辅助函数 --- def clear_tabs_if_refresh(navigation_data, version_code, target_tab_name, do_refresh): @@ -167,14 +209,18 @@ def extract_existing_pages(navigation_data, version_code, target_tab_name): return existing_pages, None, None # 返回三个值 # 在目标版本中查找目标 Tab - for tab in target_version_nav.get('tabs', []): - if isinstance(tab, dict) and tab.get('tab') == target_tab_name: - target_tab_nav = tab # 存储找到的 Tab 对象 - # 仅从目标 Tab 中提取页面 - for group in tab.get('groups', []): - if isinstance(group, dict): - _recursive_extract(group, existing_pages) - break # 找到目标 Tab 后即可退出循环 + if 'tabs' in target_version_nav and isinstance(target_version_nav['tabs'], list): + for tab in target_version_nav['tabs']: + if isinstance(tab, dict) and tab.get('tab') == target_tab_name: + target_tab_nav = tab # 存储找到的 Tab 对象 + # 仅从目标 Tab 中提取页面 + for group in tab.get('groups', []): + if isinstance(group, dict): + _recursive_extract(group, existing_pages) + break # 找到目标 Tab 后即可退出循环 + else: # 'tabs' might not exist or not be a list + target_version_nav['tabs'] = [] + if not target_tab_nav: print(f"警告: 在版本 '{version_code}' 中未找到 Tab '{target_tab_name}',无法提取现有页面。") @@ -594,8 +640,9 @@ if __name__ == "__main__": # 定义要处理的配置列表 CONFIGS_TO_PROCESS = [ PLUGIN_DEV_ZH, - PLUGIN_DEV_EN, # 取消注释以处理英文配置 + PLUGIN_DEV_EN, + PLUGIN_DEV_JA, ] # 调用主处理函数 - process_configurations(CONFIGS_TO_PROCESS, DOCS_JSON_PATH) + process_configurations(CONFIGS_TO_PROCESS, DOCS_JSON_PATH) \ No newline at end of file diff --git a/tools/rename_by_dimensions.py b/tools/rename_by_dimensions.py new file mode 100644 index 00000000..b9f504ce --- /dev/null +++ b/tools/rename_by_dimensions.py @@ -0,0 +1,448 @@ +import os +import yaml # pip install pyyaml +import re +import datetime +from pathlib import Path +import shutil + +class Config: + # --- Path Setup --- + BASE_DIR = Path(__file__).resolve().parent.parent + LANGUAGES = ["zh", "en", "ja"] # Languages to process + TIMESTAMP = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + + # --- Directory Naming Templates --- + # Original directory for a language, e.g., "plugin_dev_zh" + # This will be renamed to become the source for processing. + ORIGINAL_LANG_DIR_TEMPLATE = "plugin_dev_{lang}" + + # Name template for the source directory after renaming the original. + # e.g., "plugin_dev_zh_20231027_103000" + SOURCE_DIR_FROM_ORIGINAL_TEMPLATE = "plugin_dev_{lang}_{timestamp}" + + # Name template for an empty source if the original lang dir doesn't exist. + # e.g., "plugin_dev_zh_empty_source_20231027_103000" + EMPTY_SOURCE_DIR_TEMPLATE = "plugin_dev_{lang}_empty_source_{timestamp}" + + # Name template for the final output directory (will be newly created). + # e.g., "plugin_dev_zh" + TARGET_DIR_TEMPLATE = "plugin_dev_{lang}" + + # Prefix for archiving the TARGET_DIR if it unexpectedly exists before creation. + # e.g., "plugin_dev_zh_processed_archive_" + # Note: TARGET_DIR should ideally be empty or non-existent after source dir renaming. + # This archive is a safety net. + ARCHIVE_TARGET_PREFIX_TEMPLATE = "plugin_dev_{lang}_archive_" + + # --- PWXY Mappings --- + PRIMARY_TYPE_MAP = { + "conceptual": 1, + "implementation": 2, + "operational": 3, + "reference": 4, + } + DEFAULT_W = 0 + DETAIL_TYPE_MAPS = { + "conceptual": {"introduction": 1, "principles": 2, "architecture": 3}, + "implementation": {"basic": 1, "standard": 2, "high": 3, "advanced": 4}, + "operational": {"setup": 1, "deployment": 2, "maintenance": 3}, + "reference": {"core": 1, "configuration": 2, "examples": 3}, + } + DEFAULT_X = 0 + LEVEL_MAP = { + "beginner": 1, + "intermediate": 2, + "advanced": 3, + } + DEFAULT_Y = 0 + PRIORITY_NORMAL = 0 + PRIORITY_HIGH = 9 + PRIORITY_ADVANCED_LEVEL_KEY = "advanced" + PRIORITY_IMPLEMENTATION_PRIMARY_KEY = "implementation" + PRIORITY_IMPLEMENTATION_DETAIL_KEYS = {"high", "advanced"} + +# --- Helper Functions --- + +def extract_front_matter(content: str): + match = re.match(r"^\s*---\s*$(.*?)^---\s*$(.*)", content, re.DOTALL | re.MULTILINE) + if match: + yaml_str = match.group(1).strip() + markdown_content = match.group(2).strip() + try: + front_matter = yaml.safe_load(yaml_str) + if front_matter is None: # Handles empty YAML (--- \n ---) + return {}, markdown_content + return ( + front_matter if isinstance(front_matter, dict) else {} + ), markdown_content + except yaml.YAMLError as e: + print(f" [Error] YAML Parsing Failed: {e}") + return None, content # Indicate error + else: + return {}, content # No front matter found + +def sanitize_filename_part(part: str) -> str: + if not isinstance(part, str): + part = str(part) + part = part.lower() + part = part.replace("&", "and").replace("@", "at") + part = re.sub(r"\s+", "-", part) + part = re.sub(r"[^\w\-]+", "", part) + part = part.strip(".-_") + return part or "untitled" + +def _calculate_pwxy_and_warnings(front_matter: dict, config: Config) -> tuple[int, int, int, int, list[str]]: + """Calculates P, W, X, Y values and generates warnings for missing/unmapped data.""" + warnings_messages = [] + dimensions = front_matter.get("dimensions", {}) + type_info = dimensions.get("type", {}) + primary = type_info.get("primary") + detail = type_info.get("detail") + level = dimensions.get("level") + + P = config.PRIORITY_NORMAL + if level == config.PRIORITY_ADVANCED_LEVEL_KEY: + P = config.PRIORITY_HIGH + if ( + primary == config.PRIORITY_IMPLEMENTATION_PRIMARY_KEY + and detail in config.PRIORITY_IMPLEMENTATION_DETAIL_KEYS + ): + P = config.PRIORITY_HIGH + + W = config.PRIMARY_TYPE_MAP.get(primary, config.DEFAULT_W) + primary_detail_map = config.DETAIL_TYPE_MAPS.get(primary, {}) + X = primary_detail_map.get(detail, config.DEFAULT_X) + Y = config.LEVEL_MAP.get(level, config.DEFAULT_Y) + + if primary is None: + warnings_messages.append("Missing dimensions.type.primary") + elif W == config.DEFAULT_W: + warnings_messages.append(f"Unmapped primary type: '{primary}'. Using W={config.DEFAULT_W}") + if detail is None: + warnings_messages.append("Missing dimensions.type.detail") + elif X == config.DEFAULT_X and primary in config.DETAIL_TYPE_MAPS: + warnings_messages.append(f"Unmapped detail type: '{detail}' for primary '{primary}'. Using X={config.DEFAULT_X}") + elif primary not in config.DETAIL_TYPE_MAPS and primary is not None: + warnings_messages.append(f"No detail map defined for primary type: '{primary}'. Using X={config.DEFAULT_X}") + if level is None: + warnings_messages.append("Missing dimensions.level") + elif Y == config.DEFAULT_Y: + warnings_messages.append(f"Unmapped level: '{level}'. Using Y={config.DEFAULT_Y}") + + return P, W, X, Y, warnings_messages + +def _generate_filename_parts( + P: int, W: int, X: int, Y: int, + front_matter: dict, + original_filename_stem: str +) -> tuple[str | None, str, str, list[str]]: + """Generates padded prefix, sanitized title, language suffix, and any warnings.""" + warnings_messages = [] + + # Padded Prefix + prefix_str = f"{P}{W}{X}{Y}" + try: + numeric_prefix = int(prefix_str) + padded_prefix = f"{numeric_prefix:04d}" + except ValueError: + # This case should ideally not happen if P,W,X,Y are always numeric + warnings_messages.append(f"Could not form numeric prefix from P={P},W={W},X={X},Y={Y}. Using '0000'.") + padded_prefix = "0000" # Fallback, but indicates an issue + + # Sanitized Title + standard_title = front_matter.get("standard_title") + title_part_to_use = standard_title + if not title_part_to_use: + warnings_messages.append("Missing 'standard_title'. Using original filename stem as fallback.") + title_part_to_use = original_filename_stem + sanitized_title = sanitize_filename_part(title_part_to_use) + + # Language Suffix + lang_suffix = "" + language_fm = front_matter.get("language") # Language from frontmatter + if language_fm: + lang_code = str(language_fm).strip().lower() + if lang_code: + lang_suffix = f".{lang_code}" + else: + warnings_messages.append("Empty 'language' field in frontmatter. Omitting language suffix.") + else: + warnings_messages.append("Missing 'language' field in frontmatter. Omitting language suffix.") + + return padded_prefix, sanitized_title, lang_suffix, warnings_messages + +# --- Core Processing Functions --- + +def setup_paths_for_lang(lang: str, config: Config) -> tuple[Path | None, Path | None]: + """ + Sets up source and target paths for a given language. + Renames original lang_dir to be the source_dir for processing. + Returns (source_dir_path, target_dir_path) or (None, None) on critical error. + """ + original_lang_dir_name = config.ORIGINAL_LANG_DIR_TEMPLATE.format(lang=lang) + original_lang_dir_path = config.BASE_DIR / original_lang_dir_name + + target_dir_name = config.TARGET_DIR_TEMPLATE.format(lang=lang) + target_dir_path = config.BASE_DIR / target_dir_name + + source_dir_path: Path + source_dir_created_empty = False + + if original_lang_dir_path.exists(): + if not original_lang_dir_path.is_dir(): + print(f"[ERROR] Path '{original_lang_dir_path}' exists but is not a directory. Skipping language '{lang}'.") + return None, None + + source_dir_name = config.SOURCE_DIR_FROM_ORIGINAL_TEMPLATE.format(lang=lang, timestamp=config.TIMESTAMP) + source_dir_path = config.BASE_DIR / source_dir_name + try: + # Ensure no conflict if a previous timestamped dir exists (unlikely but possible) + if source_dir_path.exists(): + print(f"[WARNING] Timestamped source dir '{source_dir_path}' already exists. This might be from a rapid re-run or manual creation. Trying to use it.") + else: + original_lang_dir_path.rename(source_dir_path) + print(f"Using '{source_dir_path}' (renamed from '{original_lang_dir_path}') as source for '{lang}'.") + except OSError as e: + print(f"[ERROR] Failed to rename '{original_lang_dir_path}' to '{source_dir_path}': {e}. Skipping language '{lang}'.") + return None, None + else: + print(f"Warning: Original directory '{original_lang_dir_path}' not found for language '{lang}'.") + source_dir_name = config.EMPTY_SOURCE_DIR_TEMPLATE.format(lang=lang, timestamp=config.TIMESTAMP) + source_dir_path = config.BASE_DIR / source_dir_name + source_dir_path.mkdir(parents=True, exist_ok=True) + source_dir_created_empty = True + print(f"Created empty source directory: '{source_dir_path}' for '{lang}'.") + + return source_dir_path, target_dir_path, source_dir_created_empty, source_dir_name + + +def archive_and_create_target_dir(target_dir_path: Path, archive_prefix: str, timestamp: str) -> bool: + """ + Archives the target directory if it exists, then creates a new empty target directory. + Returns True on success, False on failure. + """ + if target_dir_path.exists(): + if target_dir_path.is_dir(): + archive_dir_name = f"{archive_prefix}{timestamp}" + archive_dir_path = target_dir_path.parent / archive_dir_name + try: + # shutil.move is more robust for renaming across different filesystems (though unlikely here) + # and can overwrite if archive_dir_path somehow exists (very unlikely) + if archive_dir_path.exists(): + print(f" [Warning] Archive destination '{archive_dir_path}' already exists. Removing it first.") + shutil.rmtree(archive_dir_path) # Or handle differently + shutil.move(str(target_dir_path), str(archive_dir_path)) + print(f" Archived existing target directory to: {archive_dir_path}") + except OSError as e: + print(f" [Error] Failed to archive existing target dir '{target_dir_path}': {e}") + print(" Aborting for this language to prevent data loss.") + return False + else: + print(f" [Error] Target path '{target_dir_path}' exists but is not a file/directory. Please remove/rename manually.") + print(" Aborting for this language.") + return False + try: + target_dir_path.mkdir(parents=True, exist_ok=False) # Should not exist now + print(f" Created new target directory: {target_dir_path}") + return True + except OSError as e: + print(f" [Error] Failed to create target directory '{target_dir_path}': {e}") + print(" Aborting for this language.") + return False + + +def process_single_mdx_file( + mdx_filepath: Path, + target_dir: Path, + config: Config +) -> dict: + """ + Processes a single MDX file: extracts metadata for new filename, + and copies the original file content to the new location. + Returns stats. + """ + stats = {"status": "processed", "warnings": [], "error_message": None} + relative_path = mdx_filepath.relative_to(config.BASE_DIR).as_posix() + file_warnings = [] + + try: + content = mdx_filepath.read_text(encoding="utf-8") + front_matter, _ = extract_front_matter(content) # markdown_content not needed for re-writing + + if front_matter is None: # YAML parsing error from extract_front_matter + stats["status"] = "error" + stats["error_message"] = "YAML Error in file." + print(f"\nProcessing: {relative_path}") + print(f" [Skipping] {stats['error_message']}") + return stats + + # Calculate PWXY and related warnings + P, W, X, Y, pwxy_warnings = _calculate_pwxy_and_warnings(front_matter, config) + file_warnings.extend(pwxy_warnings) + + # Generate filename parts and related warnings + padded_prefix, sanitized_title, lang_suffix, fname_warnings = _generate_filename_parts( + P, W, X, Y, front_matter, mdx_filepath.stem + ) + file_warnings.extend(fname_warnings) + + if padded_prefix is None: # Critical error in prefix generation + stats["status"] = "error" + stats["error_message"] = "Could not form numeric prefix." + print(f"\nProcessing: {relative_path}") + print(f" [Error] {stats['error_message']} P={P},W={W},X={X},Y={Y}. Skipping.") + return stats + + new_filename = f"{padded_prefix}-{sanitized_title}{lang_suffix}.mdx" + target_filepath = target_dir / new_filename + + if target_filepath.exists(): + stats["status"] = "skipped_exists" + print(f"\nProcessing: {relative_path}") + print(f" [Skipping] Target file already exists: {new_filename}") + return stats + + # Copy original file instead of rewriting content + try: + shutil.copy2(mdx_filepath, target_filepath) # copy2 preserves metadata + except Exception as copy_error: + stats["status"] = "error" + stats["error_message"] = f"Failed to copy file: {copy_error}" + print(f"\nProcessing: {relative_path}") + print(f" [Error] {stats['error_message']}") + return stats + + stats["warnings"] = file_warnings + if file_warnings: + print(f"\nProcessing: {relative_path} -> {new_filename}") + for warning_msg in file_warnings: + print(f" [Warning] {warning_msg}") + # No print needed for successful processing without warnings unless verbose mode + + except FileNotFoundError: + stats["status"] = "error" + stats["error_message"] = f"File not found during processing: {mdx_filepath}" + print(f"\nProcessing: {relative_path}") + print(f" [Error] {stats['error_message']}") + except Exception as e: + stats["status"] = "error" + stats["error_message"] = f"Unexpected error: {e}" + print(f"\nProcessing: {relative_path}") + print(f" [Error] Unexpected error processing file: {e}") + import traceback + traceback.print_exc() + return stats + + +def run_processing_for_language( + source_dir: Path, + target_dir: Path, + archive_prefix: str, + config: Config +) -> dict: + """Processes all MDX files in the source_dir and outputs them to target_dir.""" + print(f"Starting processing for source: {source_dir}") + print(f"Target directory: {target_dir}") + + lang_stats = { + "processed_count": 0, + "skipped_count": 0, + "error_count": 0, + "warning_files_count": 0, # Files with at least one warning + "status": "OK" + } + + if not source_dir.exists() or not source_dir.is_dir(): + print(f"[Error] Source directory '{source_dir}' does not exist or is not a directory.") + lang_stats["status"] = "SOURCE_DIR_ERROR" + return lang_stats + + # Archive existing target directory (if any) and create a new one + if not archive_and_create_target_dir(target_dir, archive_prefix, config.TIMESTAMP): + lang_stats["status"] = "TARGET_DIR_SETUP_ERROR" + return lang_stats # Abort if target dir setup fails + + mdx_files = list(source_dir.rglob("*.mdx")) + total_files = len(mdx_files) + print(f"Found {total_files} MDX files to process in '{source_dir}'.") + + if not mdx_files and source_dir.name.startswith(config.ORIGINAL_LANG_DIR_TEMPLATE.format(lang="")[:-1]): # Heuristic check if it was an original dir + pass # It's fine for an original directory to be empty. + + for i, mdx_filepath in enumerate(mdx_files): + result = process_single_mdx_file(mdx_filepath, target_dir, config) + + if result["status"] == "processed": + lang_stats["processed_count"] += 1 + if result["warnings"]: + lang_stats["warning_files_count"] +=1 + elif result["status"] == "skipped_exists": + lang_stats["skipped_count"] += 1 + elif result["status"] == "error": + lang_stats["error_count"] += 1 + + if (i + 1) % 10 == 0 or (i + 1) == total_files: + if total_files > 0 : # Avoid division by zero for empty source + print(f"Progress: {i+1}/{total_files} files evaluated.", end="\r") + + print("\n" + "-" * 20) # Newline after progress + print(f"Language Processing Summary ({source_dir.name}):") + print(f" Successfully processed: {lang_stats['processed_count']}") + print(f" Skipped (target exists): {lang_stats['skipped_count']}") + print(f" Files with warnings: {lang_stats['warning_files_count']}") + print(f" Errors encountered: {lang_stats['error_count']}") + print("-" * 20) + return lang_stats + +# --- Main Orchestration --- + +def main(): + config = Config() + print(f"Base directory: {config.BASE_DIR}") + print(f"Timestamp for this run: {config.TIMESTAMP}") + + overall_summary = {} + + for lang in config.LANGUAGES: + print(f"\n{'='*10} Processing Language: {lang.upper()} {'='*10}") + + source_dir_path, target_dir_path, source_created_empty, source_actual_name = setup_paths_for_lang(lang, config) + + if not source_dir_path or not target_dir_path: + overall_summary[lang] = {"status": "SETUP_ERROR", "message": f"Failed to setup paths for {lang}."} + continue # Skip to next language + + archive_prefix_for_lang = config.ARCHIVE_TARGET_PREFIX_TEMPLATE.format(lang=lang) + lang_results = run_processing_for_language(source_dir_path, target_dir_path, archive_prefix_for_lang, config) + overall_summary[lang] = lang_results + + # Clean up empty source directory if it was created for this run + if source_created_empty: + try: + # Check if it's truly empty (no files processed into it by mistake) + if not any(source_dir_path.iterdir()): + source_dir_path.rmdir() + print(f"Removed temporary empty source directory: {source_dir_path}") + else: + print(f"Note: Temporary empty source '{source_dir_path}' was not empty. Not removed.") + except OSError as e: + print(f"Note: Could not remove temporary empty source directory '{source_dir_path}': {e}") + + print("\n\n" + "=" * 20 + " Overall Summary " + "=" * 20) + for lang, summary in overall_summary.items(): + print(f"\nLanguage: {lang.upper()}") + if summary.get("status") == "OK": + print(f" Status: OK") + print(f" Processed: {summary.get('processed_count', 0)}") + print(f" Skipped: {summary.get('skipped_count', 0)}") + print(f" Files with Warnings: {summary.get('warning_files_count', 0)}") + print(f" Errors: {summary.get('error_count', 0)}") + else: + print(f" Status: {summary.get('status', 'UNKNOWN_ERROR')}") + if "message" in summary: + print(f" Message: {summary['message']}") + print("=" * (40 + len(" Overall Summary "))) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tools/rename_en.py b/tools/rename_en.py deleted file mode 100644 index 45b36969..00000000 --- a/tools/rename_en.py +++ /dev/null @@ -1,352 +0,0 @@ -import os -import yaml -import re -import datetime - -# --- Path Setup --- -# BASE_DIR should be the project root, which is the parent of the 'tools' directory -BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - -# --- Configuration --- -timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") -plugin_dev_en_path = os.path.join(BASE_DIR, "plugin_dev_en") -if os.path.exists(plugin_dev_en_path): - plugin_dev_en_timestamp = f"plugin_dev_en_{timestamp}" - os.rename(plugin_dev_en_path, os.path.join(BASE_DIR, plugin_dev_en_timestamp)) - SOURCE_DIR_NAME = plugin_dev_en_timestamp - TARGET_DIR_NAME = "plugin_dev_en" -else: - print(f"Warning: 'plugin_dev_en' directory not found in {BASE_DIR}") - print("Creating a new 'plugin_dev_en' directory...") - SOURCE_DIR_NAME = "plugin_dev_en_empty_source" - TARGET_DIR_NAME = "plugin_dev_en" - os.makedirs( - os.path.join(BASE_DIR, SOURCE_DIR_NAME), exist_ok=True - ) - -ARCHIVE_PREFIX = "plugin_dev_en_new_archive_" # Prefix for archived directories - -# --- Mapping Configuration --- -# (Mappings remain the same as the previous version) -PRIMARY_TYPE_MAP = { - "conceptual": 1, - "implementation": 2, - "operational": 3, - "reference": 4, -} -DEFAULT_W = 0 -DETAIL_TYPE_MAPS = { - "conceptual": {"introduction": 1, "principles": 2, "architecture": 3}, - "implementation": {"basic": 1, "standard": 2, "high": 3, "advanced": 4}, - "operational": {"setup": 1, "deployment": 2, "maintenance": 3}, - "reference": {"core": 1, "configuration": 2, "examples": 3}, -} -DEFAULT_X = 0 -LEVEL_MAP = { - "beginner": 1, - "intermediate": 2, - "advanced": 3, -} -DEFAULT_Y = 0 -PRIORITY_NORMAL = 0 -PRIORITY_HIGH = 9 -PRIORITY_ADVANCED_LEVEL_KEY = "advanced" -PRIORITY_IMPLEMENTATION_PRIMARY_KEY = "implementation" -PRIORITY_IMPLEMENTATION_DETAIL_KEYS = {"high", "advanced"} - -# --- Configuration End --- - -# --- Helper Functions --- -# (extract_front_matter remains the same) - - -def extract_front_matter(content): - match = re.match(r"^\s*---\s*$(.*?)^---\s*$(.*)", content, re.DOTALL | re.MULTILINE) - if match: - yaml_str = match.group(1).strip() - markdown_content = match.group(2).strip() - try: - front_matter = yaml.safe_load(yaml_str) - if front_matter is None: - return {}, markdown_content - return ( - front_matter if isinstance(front_matter, dict) else {} - ), markdown_content - except yaml.YAMLError as e: - print(f" [Error] YAML Parsing Failed: {e}") - return None, content - else: - return {}, content - - -# (sanitize_filename_part remains mostly the same, ensures non-empty return) - - -def sanitize_filename_part(part): - if not isinstance(part, str): - part = str(part) - part = part.lower() - # Replace common problematic characters first - part = part.replace("&", "and").replace("@", "at") - part = re.sub(r"\s+", "-", part) # Whitespace to hyphen - # Keep letters, numbers, underscore, hyphen. Remove others. - part = re.sub(r"[^\w\-]+", "", part) - part = part.strip(".-_") # Remove leading/trailing separators - # Ensure it's not empty, provide a default if it becomes empty - return part or "untitled" - - -# --- Main Processing Function --- - - -def process_markdown_files(source_dir, target_dir): - """ - Processes mdx files, archives old target dir, uses PWXY-[title].en.mdx format. - """ - print("Starting processing...") - print(f"Source Directory: {source_dir}") - print(f"Target Directory: {target_dir}") - - # --- Archive Existing Target Directory --- - if os.path.exists(target_dir): - if os.path.isdir(target_dir): - timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") - archive_dir = os.path.join(BASE_DIR, f"{ARCHIVE_PREFIX}{timestamp}") - try: - os.rename(target_dir, archive_dir) - print(f"Archived existing target directory to: {archive_dir}") - except OSError as e: - print(f"[Error] Failed to archive existing target directory: {e}") - print("Aborting to prevent data loss.") - return # Stop execution if archiving fails - else: - print( - f"[Error] Target path '{target_dir}' exists but is not a directory. Please remove or rename it manually." - ) - print("Aborting.") - return - - # --- Create New Target Directory --- - try: - # Should not exist after archiving - os.makedirs(target_dir, exist_ok=False) - print(f"Created new target directory: {target_dir}") - except OSError as e: - print(f"[Error] Failed to create target directory '{target_dir}': {e}") - print("Aborting.") - return - - processed_count = 0 - skipped_count = 0 - error_count = 0 - warning_count = 0 # Counts files with at least one warning - - total_files = sum( - 1 - for root, _, files in os.walk(source_dir) - for file in files - if file.lower().endswith(".mdx") - ) - print(f"Found {total_files} MDX files to process") - - for root, _, files in os.walk(source_dir): - for filename in files: - if not filename.lower().endswith(".mdx"): - continue - - original_filepath = os.path.join(root, filename) - relative_path = os.path.relpath(original_filepath, BASE_DIR).replace( - os.sep, "/" - ) - - current_warnings = 0 - - try: - with open(original_filepath, "r", encoding="utf-8") as f: - content = f.read() - - front_matter, markdown_content = extract_front_matter(content) - - if front_matter is None: - print(f"\nProcessing: {relative_path}") - print(" [Skipping] YAML Error in file.") - error_count += 1 - continue - - # --- Extract Metadata (including new fields) --- - dimensions = front_matter.get("dimensions", {}) - type_info = dimensions.get("type", {}) - primary = type_info.get("primary") - detail = type_info.get("detail") - level = dimensions.get("level") - standard_title = front_matter.get("standard_title") - # language = front_matter.get("language") # No longer needed for filename - - # --- Determine P, W, X, Y (Logic remains the same) --- - P = PRIORITY_NORMAL - # (Priority logic based on level and implementation/detail) - if level == PRIORITY_ADVANCED_LEVEL_KEY: - P = PRIORITY_HIGH - if ( - primary == PRIORITY_IMPLEMENTATION_PRIMARY_KEY - and detail in PRIORITY_IMPLEMENTATION_DETAIL_KEYS - ): - P = PRIORITY_HIGH - - W = PRIMARY_TYPE_MAP.get(primary, DEFAULT_W) - primary_detail_map = DETAIL_TYPE_MAPS.get(primary, {}) - X = primary_detail_map.get(detail, DEFAULT_X) - Y = LEVEL_MAP.get(level, DEFAULT_Y) - - # --- Warnings for missing dimension data (same as before) --- - warnings_messages = [] - if primary is None: - current_warnings += 1 - warnings_messages.append( - " [Warning] Missing dimensions.type.primary" - ) - elif W == DEFAULT_W: - current_warnings += 1 - warnings_messages.append( - f" [Warning] Unmapped primary type: '{primary}'. Using W={DEFAULT_W}" - ) - if detail is None: - current_warnings += 1 - warnings_messages.append( - " [Warning] Missing dimensions.type.detail" - ) - elif X == DEFAULT_X and primary in DETAIL_TYPE_MAPS: - current_warnings += 1 - warnings_messages.append( - f" [Warning] Unmapped detail type: '{detail}' for primary '{primary}'. Using X={DEFAULT_X}" - ) - elif primary not in DETAIL_TYPE_MAPS and primary is not None: - current_warnings += 1 - warnings_messages.append( - f" [Warning] No detail map defined for primary type: '{primary}'. Using X={DEFAULT_X}" - ) - if level is None: - current_warnings += 1 - warnings_messages.append(" [Warning] Missing dimensions.level") - elif Y == DEFAULT_Y: - current_warnings += 1 - warnings_messages.append( - f" [Warning] Unmapped level: '{level}'. Using Y={DEFAULT_Y}" - ) - - # --- Construct New Filename using standard_title and hardcoded .en suffix --- - prefix_str = f"{P}{W}{X}{Y}" - try: - numeric_prefix = int(prefix_str) - padded_prefix = f"{numeric_prefix:04d}" - except ValueError: - print(f"\nProcessing: {relative_path}") - print( - f" [Error] Could not form numeric prefix from P={P}, W={W}, X={X}, Y={Y}. Using '0000'." - ) - error_count += 1 - continue # Skip file - - # Determine title part (use standard_title or fallback) - title_part_to_use = standard_title - if not title_part_to_use: - current_warnings += 1 - warnings_messages.append( - " [Warning] Missing 'standard_title'. Using original filename base as fallback." - ) - title_part_to_use = os.path.splitext(filename)[0] # Fallback - - sanitized_title = sanitize_filename_part(title_part_to_use) - - # --- Removed language suffix logic --- - # No longer check front_matter['language'] - - # Combine parts with hardcoded '.en.mdx' suffix - new_filename = f"{padded_prefix}-{sanitized_title}.en.mdx" - target_filepath = os.path.join(target_dir, new_filename) - - # --- Check for Collisions --- - if os.path.exists(target_filepath): - print(f"\nProcessing: {relative_path}") - print(f" [Skipping] Target file already exists: {new_filename}") - skipped_count += 1 - continue - - # --- Prepare New Content --- - try: - # Ensure language field is preserved if it exists, or add it if missing - if 'language' not in front_matter: - front_matter['language'] = 'en' - elif not front_matter['language']: # Handle empty language field - front_matter['language'] = 'en' - - new_yaml_str = yaml.dump( - front_matter, - allow_unicode=True, - default_flow_style=False, - sort_keys=False, - ) - except Exception as dump_error: - print(f"\nProcessing: {relative_path}") - print(f" [Error] Failed to dump updated YAML: {dump_error}") - error_count += 1 - continue - - new_content = f"---\n{new_yaml_str}---\n\n{markdown_content}" - - # --- Write New File --- - with open(target_filepath, "w", encoding="utf-8") as f: - f.write(new_content) - - if current_warnings > 0: - print(f"\nProcessing: {relative_path}") - for warning in warnings_messages: - print(warning) - warning_count += ( - 1 # Increment file warning count if this file had warnings - ) - - processed_count += 1 - if processed_count % 10 == 0 or processed_count == total_files: - print( - f"Progress: {processed_count}/{total_files} files processed", - end="\r", - ) - - except FileNotFoundError: - print(f"\nProcessing: {relative_path}") - print( - f" [Error] File not found during processing: {original_filepath}" - ) - error_count += 1 - except Exception as e: - print(f"\nProcessing: {relative_path}") - print( - f" [Error] Unexpected error processing file '{relative_path}': {e}" - ) - import traceback - - traceback.print_exc() - error_count += 1 - - print("\n") # Add a newline after progress counter - # --- Final Report --- - print("\n--- Processing Complete ---") - print(f"Successfully processed: {processed_count} files") - print(f"Skipped (target exists): {skipped_count} files") - print(f"Files with warnings (missing/unmapped data): {warning_count}") - print(f"Errors encountered: {error_count} files") - print("-" * 27) - - -if __name__ == "__main__": - SOURCE_PATH = os.path.join(BASE_DIR, SOURCE_DIR_NAME) - TARGET_PATH = os.path.join(BASE_DIR, TARGET_DIR_NAME) - process_markdown_files(SOURCE_PATH, TARGET_PATH) - - if SOURCE_DIR_NAME == "plugin_dev_en_empty_source" and os.path.exists(SOURCE_PATH): - try: - os.rmdir(SOURCE_PATH) - print(f"Removed temporary source directory: {SOURCE_PATH}") - except OSError as e: - print(f"Note: Could not remove temporary directory: {e}") diff --git a/tools/rename_zh.py b/tools/rename_zh.py deleted file mode 100644 index 61480a64..00000000 --- a/tools/rename_zh.py +++ /dev/null @@ -1,361 +0,0 @@ -import os -import yaml # pip install pyyaml -import re -import datetime - -# --- Path Setup --- -# BASE_DIR should be the project root, which is the parent of the 'tools' directory -BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - -# --- Configuration --- -timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") -plugin_dev_zh_path = os.path.join(BASE_DIR, "plugin_dev_zh") -if os.path.exists(plugin_dev_zh_path): - plugin_dev_zh_timestamp = f"plugin_dev_zh_{timestamp}" - os.rename(plugin_dev_zh_path, os.path.join(BASE_DIR, plugin_dev_zh_timestamp)) - SOURCE_DIR_NAME = plugin_dev_zh_timestamp - TARGET_DIR_NAME = "plugin_dev_zh" -else: - print(f"Warning: 'plugin_dev_zh' directory not found in {BASE_DIR}") - print("Creating a new 'plugin_dev_zh' directory...") - SOURCE_DIR_NAME = "plugin_dev_zh_empty_source" - TARGET_DIR_NAME = "plugin_dev_zh" - os.makedirs( - os.path.join(BASE_DIR, SOURCE_DIR_NAME), exist_ok=True - ) - -ARCHIVE_PREFIX = "plugin_dev_zh_new_archive_" # Prefix for archived directories - -# --- Mapping Configuration --- -# (Mappings remain the same as the previous version) -PRIMARY_TYPE_MAP = { - "conceptual": 1, - "implementation": 2, - "operational": 3, - "reference": 4, -} -DEFAULT_W = 0 -DETAIL_TYPE_MAPS = { - "conceptual": {"introduction": 1, "principles": 2, "architecture": 3}, - "implementation": {"basic": 1, "standard": 2, "high": 3, "advanced": 4}, - "operational": {"setup": 1, "deployment": 2, "maintenance": 3}, - "reference": {"core": 1, "configuration": 2, "examples": 3}, -} -DEFAULT_X = 0 -LEVEL_MAP = { - "beginner": 1, - "intermediate": 2, - "advanced": 3, -} -DEFAULT_Y = 0 -PRIORITY_NORMAL = 0 -PRIORITY_HIGH = 9 -PRIORITY_ADVANCED_LEVEL_KEY = "advanced" -PRIORITY_IMPLEMENTATION_PRIMARY_KEY = "implementation" -PRIORITY_IMPLEMENTATION_DETAIL_KEYS = {"high", "advanced"} - -# --- Configuration End --- - -# --- Helper Functions --- -# (extract_front_matter remains the same) - - -def extract_front_matter(content): - match = re.match(r"^\s*---\s*$(.*?)^---\s*$(.*)", content, re.DOTALL | re.MULTILINE) - if match: - yaml_str = match.group(1).strip() - markdown_content = match.group(2).strip() - try: - front_matter = yaml.safe_load(yaml_str) - if front_matter is None: - return {}, markdown_content - return ( - front_matter if isinstance(front_matter, dict) else {} - ), markdown_content - except yaml.YAMLError as e: - print(f" [Error] YAML Parsing Failed: {e}") - return None, content - else: - return {}, content - - -# (sanitize_filename_part remains mostly the same, ensures non-empty return) - - -def sanitize_filename_part(part): - if not isinstance(part, str): - part = str(part) - part = part.lower() - # Replace common problematic characters first - part = part.replace("&", "and").replace("@", "at") - part = re.sub(r"\s+", "-", part) # Whitespace to hyphen - # Keep letters, numbers, underscore, hyphen. Remove others. - part = re.sub(r"[^\w\-]+", "", part) - part = part.strip(".-_") # Remove leading/trailing separators - # Ensure it's not empty, provide a default if it becomes empty - return part or "untitled" - - -# --- Main Processing Function --- - - -def process_markdown_files(source_dir, target_dir): - """ - Processes mdx files, archives old target dir, uses PWXY-[title].lang.mdx format. - """ - print("Starting processing...") - print(f"Source Directory: {source_dir}") - print(f"Target Directory: {target_dir}") - - # --- Archive Existing Target Directory --- - if os.path.exists(target_dir): - if os.path.isdir(target_dir): - timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") - archive_dir = os.path.join(BASE_DIR, f"{ARCHIVE_PREFIX}{timestamp}") - try: - os.rename(target_dir, archive_dir) - print(f"Archived existing target directory to: {archive_dir}") - except OSError as e: - print(f"[Error] Failed to archive existing target directory: {e}") - print("Aborting to prevent data loss.") - return # Stop execution if archiving fails - else: - print( - f"[Error] Target path '{target_dir}' exists but is not a directory. Please remove or rename it manually." - ) - print("Aborting.") - return - - # --- Create New Target Directory --- - try: - # Should not exist after archiving - os.makedirs(target_dir, exist_ok=False) - print(f"Created new target directory: {target_dir}") - except OSError as e: - print(f"[Error] Failed to create target directory '{target_dir}': {e}") - print("Aborting.") - return - - processed_count = 0 - skipped_count = 0 - error_count = 0 - warning_count = 0 # Counts files with at least one warning - - total_files = sum( - 1 - for root, _, files in os.walk(source_dir) - for file in files - if file.lower().endswith(".mdx") # Changed from .md - ) - print(f"Found {total_files} MDX files to process") # Changed from Markdown - - for root, _, files in os.walk(source_dir): - for filename in files: - if not filename.lower().endswith(".mdx"): # Changed from .md - continue - - original_filepath = os.path.join(root, filename) - relative_path = os.path.relpath(original_filepath, BASE_DIR).replace( - os.sep, "/" - ) - - current_warnings = 0 - - try: - with open(original_filepath, "r", encoding="utf-8") as f: - content = f.read() - - front_matter, markdown_content = extract_front_matter(content) - - if front_matter is None: - print(f"\nProcessing: {relative_path}") - print(" [Skipping] YAML Error in file.") - error_count += 1 - continue - - # --- Extract Metadata (including new fields) --- - dimensions = front_matter.get("dimensions", {}) - type_info = dimensions.get("type", {}) - primary = type_info.get("primary") - detail = type_info.get("detail") - level = dimensions.get("level") - standard_title = front_matter.get("standard_title") # New - language = front_matter.get("language") # New - - # --- Determine P, W, X, Y (Logic remains the same) --- - P = PRIORITY_NORMAL - # (Priority logic based on level and implementation/detail) - if level == PRIORITY_ADVANCED_LEVEL_KEY: - P = PRIORITY_HIGH - if ( - primary == PRIORITY_IMPLEMENTATION_PRIMARY_KEY - and detail in PRIORITY_IMPLEMENTATION_DETAIL_KEYS - ): - P = PRIORITY_HIGH - - W = PRIMARY_TYPE_MAP.get(primary, DEFAULT_W) - primary_detail_map = DETAIL_TYPE_MAPS.get(primary, {}) - X = primary_detail_map.get(detail, DEFAULT_X) - Y = LEVEL_MAP.get(level, DEFAULT_Y) - - # --- Warnings for missing dimension data (same as before) --- - warnings_messages = [] - if primary is None: - current_warnings += 1 - warnings_messages.append( - " [Warning] Missing dimensions.type.primary" - ) - elif W == DEFAULT_W: - current_warnings += 1 - warnings_messages.append( - f" [Warning] Unmapped primary type: '{primary}'. Using W={DEFAULT_W}" - ) - if detail is None: - current_warnings += 1 - warnings_messages.append( - " [Warning] Missing dimensions.type.detail" - ) - elif X == DEFAULT_X and primary in DETAIL_TYPE_MAPS: - current_warnings += 1 - warnings_messages.append( - f" [Warning] Unmapped detail type: '{detail}' for primary '{primary}'. Using X={DEFAULT_X}" - ) - elif primary not in DETAIL_TYPE_MAPS and primary is not None: - current_warnings += 1 - warnings_messages.append( - f" [Warning] No detail map defined for primary type: '{primary}'. Using X={DEFAULT_X}" - ) - if level is None: - current_warnings += 1 - warnings_messages.append(" [Warning] Missing dimensions.level") - elif Y == DEFAULT_Y: - current_warnings += 1 - warnings_messages.append( - f" [Warning] Unmapped level: '{level}'. Using Y={DEFAULT_Y}" - ) - - # --- Construct New Filename using standard_title and language --- - prefix_str = f"{P}{W}{X}{Y}" - try: - numeric_prefix = int(prefix_str) - padded_prefix = f"{numeric_prefix:04d}" - except ValueError: - print(f"\nProcessing: {relative_path}") - print( - f" [Error] Could not form numeric prefix from P={P}, W={W}, X={X}, Y={Y}. Using '0000'." - ) - error_count += 1 - continue # Skip file - - # Determine title part (use standard_title or fallback) - title_part_to_use = standard_title - if not title_part_to_use: - current_warnings += 1 - warnings_messages.append( - " [Warning] Missing 'standard_title'. Using original filename base as fallback." - ) - title_part_to_use = os.path.splitext(filename)[0] # Fallback - - sanitized_title = sanitize_filename_part(title_part_to_use) - - # Determine language suffix - lang_suffix = "" - if language: - lang_code = str(language).strip().lower() - if lang_code: - lang_suffix = f".{lang_code}" - else: - current_warnings += 1 - warnings_messages.append( - " [Warning] Empty 'language' field found. Omitting suffix." - ) - else: - current_warnings += 1 - warnings_messages.append( - " [Warning] Missing 'language' field. Omitting suffix." - ) - - # Combine parts - # Removed brackets around sanitized_title - new_filename = f"{padded_prefix}-{sanitized_title}{lang_suffix}.mdx" - target_filepath = os.path.join(target_dir, new_filename) - - # --- Check for Collisions --- - if os.path.exists(target_filepath): - print(f"\nProcessing: {relative_path}") - print(f" [Skipping] Target file already exists: {new_filename}") - skipped_count += 1 - continue - - # --- Prepare New Content --- - try: - new_yaml_str = yaml.dump( - front_matter, - allow_unicode=True, - default_flow_style=False, - sort_keys=False, - ) - except Exception as dump_error: - print(f"\nProcessing: {relative_path}") - print(f" [Error] Failed to dump updated YAML: {dump_error}") - error_count += 1 - continue - - new_content = f"---\n{new_yaml_str}---\n\n{markdown_content}" - - # --- Write New File --- - with open(target_filepath, "w", encoding="utf-8") as f: - f.write(new_content) - - if current_warnings > 0: - print(f"\nProcessing: {relative_path}") - for warning in warnings_messages: - print(warning) - warning_count += ( - 1 # Increment file warning count if this file had warnings - ) - - processed_count += 1 - if processed_count % 10 == 0 or processed_count == total_files: - print( - f"Progress: {processed_count}/{total_files} files processed", - end="\r", - ) - - except FileNotFoundError: - print(f"\nProcessing: {relative_path}") - print( - f" [Error] File not found during processing: {original_filepath}" - ) - error_count += 1 - except Exception as e: - print(f"\nProcessing: {relative_path}") - print( - f" [Error] Unexpected error processing file '{relative_path}': {e}" - ) - import traceback - - traceback.print_exc() - error_count += 1 - - print("\n") # Add a newline after progress counter - # --- Final Report --- - print("\n--- Processing Complete ---") - print(f"Successfully processed: {processed_count} files") - print(f"Skipped (target exists): {skipped_count} files") - print(f"Files with warnings (missing/unmapped data): {warning_count}") - print(f"Errors encountered: {error_count} files") - print("-" * 27) - - -if __name__ == "__main__": - SOURCE_PATH = os.path.join(BASE_DIR, SOURCE_DIR_NAME) - TARGET_PATH = os.path.join(BASE_DIR, TARGET_DIR_NAME) - process_markdown_files(SOURCE_PATH, TARGET_PATH) - - if SOURCE_DIR_NAME == "plugin_dev_zh_empty_source" and os.path.exists(SOURCE_PATH): - try: - os.rmdir(SOURCE_PATH) - print(f"Removed temporary source directory: {SOURCE_PATH}") - except OSError as e: - print(f"Note: Could not remove temporary directory: {e}")