From 10f1f354dbdb256b0a389b2875249cab0f7b3e8f Mon Sep 17 00:00:00 2001 From: Alter-xyz <88554920+alterxyz@users.noreply.github.com> Date: Sat, 17 May 2025 20:30:38 +0800 Subject: [PATCH] feat: auto fix reference --- tools/1_rename_by_dimensions.py | 161 +++++++++++++++++++++----------- 1 file changed, 104 insertions(+), 57 deletions(-) diff --git a/tools/1_rename_by_dimensions.py b/tools/1_rename_by_dimensions.py index 4df18ad3..0058681e 100644 --- a/tools/1_rename_by_dimensions.py +++ b/tools/1_rename_by_dimensions.py @@ -77,7 +77,8 @@ def sanitize_filename_part(part: str) -> str: part = part.lower() part = part.replace("&", "and").replace("@", "at") part = re.sub(r"\s+", "-", part) - part = re.sub(r"[^\w\-]+", "", part) + # Allow dots for language suffix in stem + part = re.sub(r"[^\w\-.]+", "", part) part = part.strip(".-_") return part or "untitled" @@ -213,9 +214,6 @@ def get_or_create_lang_dir(lang: str, config: Config) -> tuple[Path | None, bool return lang_dir_path, was_newly_created -# This function is now a general utility, not directly tied to the old finalization flow. -# It could be used if a pre-processing archive step is desired. - def archive_existing_directory(path_to_archive: Path, archive_prefix_template: str, lang: str, timestamp: str) -> bool: """ @@ -255,9 +253,15 @@ def process_single_mdx_file( """ Processes a single MDX file: extracts metadata, generates new filename, and renames the file in place. - Returns stats. + Returns stats, including old and new filename stems if renamed. """ - stats = {"status": "processed", "warnings": [], "error_message": None} + stats = { + "status": "processed", + "warnings": [], + "error_message": None, + "old_filename_stem_for_replace": None, + "new_filename_stem_for_replace": None, + } display_path = mdx_filepath.name if mdx_filepath.parent != config.BASE_DIR: try: @@ -284,8 +288,10 @@ def process_single_mdx_file( front_matter, config) file_warnings.extend(pwxy_warnings) + original_stem_for_title_fallback = mdx_filepath.stem # Used if standard_title is missing + padded_prefix, sanitized_title, lang_suffix, fname_warnings = _generate_filename_parts( - P, W, X, Y, front_matter, mdx_filepath.stem + P, W, X, Y, front_matter, original_stem_for_title_fallback ) file_warnings.extend(fname_warnings) @@ -297,10 +303,14 @@ def process_single_mdx_file( stats["status"] = "skipped_no_change" elif new_filepath.exists(): stats["status"] = "skipped_target_exists" - # Do not print here, summary will handle it. Let main loop print progress. else: try: + original_stem_before_rename = mdx_filepath.stem # Capture actual stem before rename mdx_filepath.rename(new_filepath) + stats["status"] = "processed" + # Store stems for content replacement phase + stats["old_filename_stem_for_replace"] = original_stem_before_rename + stats["new_filename_stem_for_replace"] = new_filepath.stem except Exception as rename_error: stats["status"] = "error" stats["error_message"] = f"Failed to rename file to '{new_filename}': {rename_error}" @@ -308,10 +318,12 @@ def process_single_mdx_file( return stats stats["warnings"] = file_warnings + action_taken = new_filepath != mdx_filepath and stats["status"] == "processed" + # Only print details if there are warnings or an actual change/error for this file - if file_warnings or (stats["status"] == "processed" and new_filepath != mdx_filepath) or stats["status"].startswith("error") or stats["status"] == "skipped_target_exists": + if file_warnings or action_taken or stats["status"].startswith("error") or stats["status"] == "skipped_target_exists": print( - f"\nProcessing: {display_path} -> {new_filename if new_filepath != mdx_filepath else '(no change)'}") + f"\nProcessing: {display_path} -> {new_filename if action_taken else '(no change or skipped)'}") for warning_msg in file_warnings: print(f" [Warning] {warning_msg}") if stats["status"] == "skipped_target_exists": @@ -339,7 +351,8 @@ def run_processing_for_language( lang_dir_path: Path, config: Config ) -> dict: - """Processes all MDX files in the lang_dir_path by renaming them in place.""" + """Processes all MDX files in the lang_dir_path by renaming them in place, + then updates internal content references.""" print(f"Starting in-place processing for: {lang_dir_path.name}") lang_stats = { @@ -349,8 +362,9 @@ def run_processing_for_language( "error_count": 0, "warning_files_count": 0, "status": "OK", - # For summary - "dir_path_str": str(lang_dir_path.relative_to(config.BASE_DIR)) + "dir_path_str": str(lang_dir_path.relative_to(config.BASE_DIR)), + "content_replacements_made_count": 0, + "content_replacement_errors_count": 0, } if not lang_dir_path.exists() or not lang_dir_path.is_dir(): @@ -359,16 +373,24 @@ def run_processing_for_language( lang_stats["status"] = "LANG_DIR_ERROR" return lang_stats - # Sort for consistent processing order + # --- Phase 1: Rename files --- + print(f"\n--- Phase 1: Renaming files in '{lang_dir_path.name}' ---") mdx_files = sorted(list(lang_dir_path.rglob("*.mdx"))) total_files = len(mdx_files) - print(f"Found {total_files} MDX files to process in '{lang_dir_path.name}'.") + print(f"Found {total_files} MDX files to process for renaming.") + + rename_mappings = [] # List to store (old_stem, new_stem) for content replacement for i, mdx_filepath in enumerate(mdx_files): result = process_single_mdx_file(mdx_filepath, config) if result["status"] == "processed": lang_stats["processed_count"] += 1 + # Check if stems were provided and different (meaning a rename happened) + old_stem = result.get("old_filename_stem_for_replace") + new_stem = result.get("new_filename_stem_for_replace") + if old_stem and new_stem and old_stem != new_stem: + rename_mappings.append((old_stem, new_stem)) elif result["status"] == "skipped_no_change": lang_stats["skipped_no_change_count"] += 1 elif result["status"] == "skipped_target_exists": @@ -376,32 +398,78 @@ def run_processing_for_language( elif result["status"] == "error": lang_stats["error_count"] += 1 - if result["warnings"]: # Count files with warnings regardless of status + if result["warnings"]: lang_stats["warning_files_count"] += 1 if total_files > 0: progress = (i + 1) / total_files * 100 print( - f"Progress for {lang_dir_path.name}: {i+1}/{total_files} files ({progress:.1f}%) evaluated.", end="\r") + f"Rename Progress ({lang_dir_path.name}): {i+1}/{total_files} files ({progress:.1f}%) evaluated.", end="\r") if total_files > 0: print() # Newline after progress bar + print("--- Phase 1: Renaming files complete. ---") + + # --- Phase 2: Update content references --- + if rename_mappings: + print(f"\n--- Phase 2: Updating content references in '{lang_dir_path.name}' ---") + print(f"Found {len(rename_mappings)} filename changes to propagate.") + # Re-glob for files, as their names might have changed. + # Also, we need to process all files, not just the renamed ones. + all_mdx_files_after_rename = sorted(list(lang_dir_path.rglob("*.mdx"))) + total_files_for_replacement = len(all_mdx_files_after_rename) + print(f"Scanning {total_files_for_replacement} .mdx files for content updates.") + + files_content_updated = 0 + for i, file_to_scan_path in enumerate(all_mdx_files_after_rename): + try: + original_content = file_to_scan_path.read_text(encoding="utf-8") + modified_content = original_content + file_actually_changed_by_replacement = False + + for old_stem, new_stem in rename_mappings: + if old_stem in modified_content: # Check if old_stem exists before replacing + temp_content = modified_content.replace(old_stem, new_stem) + if temp_content != modified_content: + modified_content = temp_content + file_actually_changed_by_replacement = True + + if file_actually_changed_by_replacement: + file_to_scan_path.write_text(modified_content, encoding="utf-8") + files_content_updated +=1 + print(f" Updated references in: {file_to_scan_path.relative_to(lang_dir_path)}") + except Exception as e: + print(f" [Error] Failed to update references in {file_to_scan_path.name}: {e}") + lang_stats["content_replacement_errors_count"] += 1 + + if total_files_for_replacement > 0: + progress = (i + 1) / total_files_for_replacement * 100 + print( + f"Content Update Progress ({lang_dir_path.name}): {i+1}/{total_files_for_replacement} files ({progress:.1f}%) scanned.", end="\r") + + if total_files_for_replacement > 0: + print() # Newline after progress bar + + lang_stats["content_replacements_made_count"] = files_content_updated + print(f"Content replacement phase: {files_content_updated} files had their content updated.") + print("--- Phase 2: Content references update complete. ---") + else: + print("\nNo renames occurred, skipping content reference update phase.") + print("-" * 20) print(f"Language Processing Summary ({lang_dir_path.name}):") - print( - f" Successfully processed (renamed): {lang_stats['processed_count']}") - # Clarified term - print( - f" Checked (filename no change): {lang_stats['skipped_no_change_count']}") - print( - f" Skipped (target filename exists): {lang_stats['skipped_target_exists_count']}") + print(f" Successfully processed (renamed): {lang_stats['processed_count']}") + print(f" Checked (filename no change): {lang_stats['skipped_no_change_count']}") + print(f" Skipped (target filename exists): {lang_stats['skipped_target_exists_count']}") print(f" Files with warnings: {lang_stats['warning_files_count']}") - print( - f" Errors encountered during file processing: {lang_stats['error_count']}") + print(f" Errors during file processing: {lang_stats['error_count']}") + if rename_mappings: # Only show if phase 2 ran + print(f" Files with content updated (references): {lang_stats['content_replacements_made_count']}") + print(f" Errors during content update: {lang_stats['content_replacement_errors_count']}") print("-" * 20) - if lang_stats["error_count"] > 0: + if lang_stats["error_count"] > 0 or lang_stats["content_replacement_errors_count"] > 0: lang_stats["status"] = "ERRORS_IN_PROCESSING" return lang_stats @@ -414,9 +482,7 @@ def main(): print(f"Timestamp for this run: {config.TIMESTAMP}") overall_summary = {} - # Store if the lang dir was newly created for cleanup decisions lang_dir_newly_created_flags = {} - # Store the Path object of the language directory for each lang lang_dirs_map = {} for lang in config.LANGUAGES: @@ -432,44 +498,22 @@ def main(): "status": "SETUP_ERROR", "message": f"Failed to get or create language directory for {lang}."} continue - # Optional: Add a pre-processing archive step if desired for non-Git backups - # For example: - # if current_lang_dir.exists() and any(current_lang_dir.iterdir()): # if dir exists and is not empty - # print(f"Attempting to archive '{current_lang_dir.name}' before processing...") - # if not archive_existing_directory(current_lang_dir, config.ARCHIVE_LANG_DIR_PREFIX_TEMPLATE, lang, config.TIMESTAMP): - # print(f" [CRITICAL ERROR] Archiving failed. Skipping processing for {lang}.") - # overall_summary[lang] = {"status": "PRE_ARCHIVE_ERROR", "message": f"Failed to archive existing directory {current_lang_dir.name}."} - # continue - # # After archiving, the original path is gone, so we need to re-create it to process into - # current_lang_dir, was_newly_created = get_or_create_lang_dir(lang, config) # This will re-create it empty - # lang_dir_newly_created_flags[lang] = True # Mark as newly created for potential cleanup - # lang_dirs_map[lang] = current_lang_dir - # if not current_lang_dir: - # overall_summary[lang] = {"status": "SETUP_ERROR_POST_ARCHIVE", "message": f"Failed to re-create lang directory for {lang} after archiving."} - # continue - lang_results = run_processing_for_language(current_lang_dir, config) overall_summary[lang] = lang_results - # --- Finalization for this language (mainly cleanup) --- - if current_lang_dir: # Should always be true if we reached here - # Processed, even if with errors + if current_lang_dir: if lang_results["status"] in ["OK", "ERRORS_IN_PROCESSING"]: - # If the directory was newly created by this script AND it's still empty after processing - # (e.g., no MDX files were found or created in it), then remove it. if was_newly_created and current_lang_dir.exists() and not any(current_lang_dir.iterdir()): try: current_lang_dir.rmdir() print( f" Removed empty newly created language directory: {current_lang_dir.name}") - lang_dirs_map[lang] = None # It's gone + lang_dirs_map[lang] = None lang_results["message"] = lang_results.get( "message", "") + " Empty newly created directory removed." except OSError as e: print( f" Note: Could not remove empty newly created directory '{current_lang_dir.name}': {e}") - # No renaming logic needed as we operate in-place. - # The status messages in lang_results already indicate success/failure of content processing. print("\n\n" + "=" * 20 + " Overall Script Summary " + "=" * 20) for lang_code in config.LANGUAGES: @@ -480,7 +524,7 @@ def main(): status = summary.get("status", "UNKNOWN") print(f" Status: {status}") - if "message" in summary: # Print setup/archive messages + if "message" in summary: print(f" Message: {summary['message']}") if status not in ["SETUP_ERROR", "SETUP_ERROR_POST_ARCHIVE", "PRE_ARCHIVE_ERROR", "LANG_DIR_ERROR"]: @@ -495,13 +539,16 @@ def main(): f" Files with Warnings: {summary.get('warning_files_count', 0)}") print( f" Errors during file processing: {summary.get('error_count', 0)}") + if summary.get('processed_count', 0) > 0 or "content_replacements_made_count" in summary : # Show only if relevant + print(f" Files with content updated (references): {summary.get('content_replacements_made_count',0)}") + print(f" Errors during content update: {summary.get('content_replacement_errors_count',0)}") + if lang_dir_path_obj and lang_dir_path_obj.exists(): print(f" Final directory location: {lang_dir_path_obj.name}") - # If it was new and now gone elif lang_dir_newly_created_flags.get(lang_code) and not lang_dir_path_obj: print(" Note: Empty newly created directory was removed as expected.") - elif not lang_dir_path_obj and status != "SETUP_ERROR": # If it wasn't a setup error but dir is None + elif not lang_dir_path_obj and status != "SETUP_ERROR": print( f" Note: Language directory '{config.LANG_DIR_TEMPLATE.format(lang=lang_code)}' may have been archived or removed.") @@ -510,4 +557,4 @@ def main(): if __name__ == "__main__": - main() + main() \ No newline at end of file