From 10f1f354dbdb256b0a389b2875249cab0f7b3e8f Mon Sep 17 00:00:00 2001
From: Alter-xyz <88554920+alterxyz@users.noreply.github.com>
Date: Sat, 17 May 2025 20:30:38 +0800
Subject: [PATCH] feat: auto fix reference

---
 tools/1_rename_by_dimensions.py | 161 +++++++++++++++++++++-----------
 1 file changed, 104 insertions(+), 57 deletions(-)

diff --git a/tools/1_rename_by_dimensions.py b/tools/1_rename_by_dimensions.py
index 4df18ad3..0058681e 100644
--- a/tools/1_rename_by_dimensions.py
+++ b/tools/1_rename_by_dimensions.py
@@ -77,7 +77,8 @@ def sanitize_filename_part(part: str) -> str:
     part = part.lower()
     part = part.replace("&", "and").replace("@", "at")
     part = re.sub(r"\s+", "-", part)
-    part = re.sub(r"[^\w\-]+", "", part)
+    # Allow dots for language suffix in stem
+    part = re.sub(r"[^\w\-.]+", "", part)
     part = part.strip(".-_")
     return part or "untitled"
 
@@ -213,9 +214,6 @@ def get_or_create_lang_dir(lang: str, config: Config) -> tuple[Path | None, bool
 
     return lang_dir_path, was_newly_created
 
-# This function is now a general utility, not directly tied to the old finalization flow.
-# It could be used if a pre-processing archive step is desired.
-
 
 def archive_existing_directory(path_to_archive: Path, archive_prefix_template: str, lang: str, timestamp: str) -> bool:
     """
@@ -255,9 +253,15 @@ def process_single_mdx_file(
     """
     Processes a single MDX file: extracts metadata, generates new filename,
     and renames the file in place.
-    Returns stats.
+    Returns stats, including old and new filename stems if renamed.
     """
-    stats = {"status": "processed", "warnings": [], "error_message": None}
+    stats = {
+        "status": "processed",
+        "warnings": [],
+        "error_message": None,
+        "old_filename_stem_for_replace": None,
+        "new_filename_stem_for_replace": None,
+    }
     display_path = mdx_filepath.name
     if mdx_filepath.parent != config.BASE_DIR:
         try:
@@ -284,8 +288,10 @@ def process_single_mdx_file(
             front_matter, config)
         file_warnings.extend(pwxy_warnings)
 
+        original_stem_for_title_fallback = mdx_filepath.stem # Used if standard_title is missing
+        
         padded_prefix, sanitized_title, lang_suffix, fname_warnings = _generate_filename_parts(
-            P, W, X, Y, front_matter, mdx_filepath.stem
+            P, W, X, Y, front_matter, original_stem_for_title_fallback
         )
         file_warnings.extend(fname_warnings)
 
@@ -297,10 +303,14 @@ def process_single_mdx_file(
             stats["status"] = "skipped_no_change"
         elif new_filepath.exists():
             stats["status"] = "skipped_target_exists"
-            # Do not print here, summary will handle it. Let main loop print progress.
         else:
             try:
+                original_stem_before_rename = mdx_filepath.stem # Capture actual stem before rename
                 mdx_filepath.rename(new_filepath)
+                stats["status"] = "processed"
+                # Store stems for content replacement phase
+                stats["old_filename_stem_for_replace"] = original_stem_before_rename
+                stats["new_filename_stem_for_replace"] = new_filepath.stem
             except Exception as rename_error:
                 stats["status"] = "error"
                 stats["error_message"] = f"Failed to rename file to '{new_filename}': {rename_error}"
@@ -308,10 +318,12 @@ def process_single_mdx_file(
                 return stats
 
         stats["warnings"] = file_warnings
+        action_taken = new_filepath != mdx_filepath and stats["status"] == "processed"
+        
         # Only print details if there are warnings or an actual change/error for this file
-        if file_warnings or (stats["status"] == "processed" and new_filepath != mdx_filepath) or stats["status"].startswith("error") or stats["status"] == "skipped_target_exists":
+        if file_warnings or action_taken or stats["status"].startswith("error") or stats["status"] == "skipped_target_exists":
             print(
-                f"\nProcessing: {display_path} -> {new_filename if new_filepath != mdx_filepath else '(no change)'}")
+                f"\nProcessing: {display_path} -> {new_filename if action_taken else '(no change or skipped)'}")
             for warning_msg in file_warnings:
                 print(f"  [Warning] {warning_msg}")
             if stats["status"] == "skipped_target_exists":
@@ -339,7 +351,8 @@ def run_processing_for_language(
     lang_dir_path: Path,
     config: Config
 ) -> dict:
-    """Processes all MDX files in the lang_dir_path by renaming them in place."""
+    """Processes all MDX files in the lang_dir_path by renaming them in place,
+       then updates internal content references."""
     print(f"Starting in-place processing for: {lang_dir_path.name}")
 
     lang_stats = {
@@ -349,8 +362,9 @@ def run_processing_for_language(
         "error_count": 0,
         "warning_files_count": 0,
         "status": "OK",
-        # For summary
-        "dir_path_str": str(lang_dir_path.relative_to(config.BASE_DIR))
+        "dir_path_str": str(lang_dir_path.relative_to(config.BASE_DIR)),
+        "content_replacements_made_count": 0,
+        "content_replacement_errors_count": 0,
     }
 
     if not lang_dir_path.exists() or not lang_dir_path.is_dir():
@@ -359,16 +373,24 @@ def run_processing_for_language(
         lang_stats["status"] = "LANG_DIR_ERROR"
         return lang_stats
 
-    # Sort for consistent processing order
+    # --- Phase 1: Rename files ---
+    print(f"\n--- Phase 1: Renaming files in '{lang_dir_path.name}' ---")
     mdx_files = sorted(list(lang_dir_path.rglob("*.mdx")))
     total_files = len(mdx_files)
-    print(f"Found {total_files} MDX files to process in '{lang_dir_path.name}'.")
+    print(f"Found {total_files} MDX files to process for renaming.")
+
+    rename_mappings = [] # List to store (old_stem, new_stem) for content replacement
 
     for i, mdx_filepath in enumerate(mdx_files):
         result = process_single_mdx_file(mdx_filepath, config)
 
         if result["status"] == "processed":
             lang_stats["processed_count"] += 1
+            # Check if stems were provided and different (meaning a rename happened)
+            old_stem = result.get("old_filename_stem_for_replace")
+            new_stem = result.get("new_filename_stem_for_replace")
+            if old_stem and new_stem and old_stem != new_stem:
+                rename_mappings.append((old_stem, new_stem))
         elif result["status"] == "skipped_no_change":
             lang_stats["skipped_no_change_count"] += 1
         elif result["status"] == "skipped_target_exists":
@@ -376,32 +398,78 @@ def run_processing_for_language(
         elif result["status"] == "error":
             lang_stats["error_count"] += 1
 
-        if result["warnings"]:  # Count files with warnings regardless of status
+        if result["warnings"]:
             lang_stats["warning_files_count"] += 1
 
         if total_files > 0:
             progress = (i + 1) / total_files * 100
             print(
-                f"Progress for {lang_dir_path.name}: {i+1}/{total_files} files ({progress:.1f}%) evaluated.", end="\r")
+                f"Rename Progress ({lang_dir_path.name}): {i+1}/{total_files} files ({progress:.1f}%) evaluated.", end="\r")
 
     if total_files > 0:
         print()  # Newline after progress bar
+    print("--- Phase 1: Renaming files complete. ---")
+
+    # --- Phase 2: Update content references ---
+    if rename_mappings:
+        print(f"\n--- Phase 2: Updating content references in '{lang_dir_path.name}' ---")
+        print(f"Found {len(rename_mappings)} filename changes to propagate.")
+        # Re-glob for files, as their names might have changed.
+        # Also, we need to process all files, not just the renamed ones.
+        all_mdx_files_after_rename = sorted(list(lang_dir_path.rglob("*.mdx")))
+        total_files_for_replacement = len(all_mdx_files_after_rename)
+        print(f"Scanning {total_files_for_replacement} .mdx files for content updates.")
+
+        files_content_updated = 0
+        for i, file_to_scan_path in enumerate(all_mdx_files_after_rename):
+            try:
+                original_content = file_to_scan_path.read_text(encoding="utf-8")
+                modified_content = original_content
+                file_actually_changed_by_replacement = False
+
+                for old_stem, new_stem in rename_mappings:
+                    if old_stem in modified_content: # Check if old_stem exists before replacing
+                        temp_content = modified_content.replace(old_stem, new_stem)
+                        if temp_content != modified_content:
+                            modified_content = temp_content
+                            file_actually_changed_by_replacement = True
+                
+                if file_actually_changed_by_replacement:
+                    file_to_scan_path.write_text(modified_content, encoding="utf-8")
+                    files_content_updated +=1
+                    print(f"  Updated references in: {file_to_scan_path.relative_to(lang_dir_path)}")
+            except Exception as e:
+                print(f"  [Error] Failed to update references in {file_to_scan_path.name}: {e}")
+                lang_stats["content_replacement_errors_count"] += 1
+            
+            if total_files_for_replacement > 0:
+                progress = (i + 1) / total_files_for_replacement * 100
+                print(
+                    f"Content Update Progress ({lang_dir_path.name}): {i+1}/{total_files_for_replacement} files ({progress:.1f}%) scanned.", end="\r")
+        
+        if total_files_for_replacement > 0:
+            print() # Newline after progress bar
+
+        lang_stats["content_replacements_made_count"] = files_content_updated
+        print(f"Content replacement phase: {files_content_updated} files had their content updated.")
+        print("--- Phase 2: Content references update complete. ---")
+    else:
+        print("\nNo renames occurred, skipping content reference update phase.")
+
 
     print("-" * 20)
     print(f"Language Processing Summary ({lang_dir_path.name}):")
-    print(
-        f"  Successfully processed (renamed): {lang_stats['processed_count']}")
-    # Clarified term
-    print(
-        f"  Checked (filename no change): {lang_stats['skipped_no_change_count']}")
-    print(
-        f"  Skipped (target filename exists): {lang_stats['skipped_target_exists_count']}")
+    print(f"  Successfully processed (renamed): {lang_stats['processed_count']}")
+    print(f"  Checked (filename no change): {lang_stats['skipped_no_change_count']}")
+    print(f"  Skipped (target filename exists): {lang_stats['skipped_target_exists_count']}")
     print(f"  Files with warnings: {lang_stats['warning_files_count']}")
-    print(
-        f"  Errors encountered during file processing: {lang_stats['error_count']}")
+    print(f"  Errors during file processing: {lang_stats['error_count']}")
+    if rename_mappings: # Only show if phase 2 ran
+        print(f"  Files with content updated (references): {lang_stats['content_replacements_made_count']}")
+        print(f"  Errors during content update: {lang_stats['content_replacement_errors_count']}")
     print("-" * 20)
 
-    if lang_stats["error_count"] > 0:
+    if lang_stats["error_count"] > 0 or lang_stats["content_replacement_errors_count"] > 0:
         lang_stats["status"] = "ERRORS_IN_PROCESSING"
     return lang_stats
 
@@ -414,9 +482,7 @@ def main():
     print(f"Timestamp for this run: {config.TIMESTAMP}")
 
     overall_summary = {}
-    # Store if the lang dir was newly created for cleanup decisions
     lang_dir_newly_created_flags = {}
-    # Store the Path object of the language directory for each lang
     lang_dirs_map = {}
 
     for lang in config.LANGUAGES:
@@ -432,44 +498,22 @@ def main():
                 "status": "SETUP_ERROR", "message": f"Failed to get or create language directory for {lang}."}
             continue
 
-        # Optional: Add a pre-processing archive step if desired for non-Git backups
-        # For example:
-        # if current_lang_dir.exists() and any(current_lang_dir.iterdir()): # if dir exists and is not empty
-        #     print(f"Attempting to archive '{current_lang_dir.name}' before processing...")
-        #     if not archive_existing_directory(current_lang_dir, config.ARCHIVE_LANG_DIR_PREFIX_TEMPLATE, lang, config.TIMESTAMP):
-        #         print(f"  [CRITICAL ERROR] Archiving failed. Skipping processing for {lang}.")
-        #         overall_summary[lang] = {"status": "PRE_ARCHIVE_ERROR", "message": f"Failed to archive existing directory {current_lang_dir.name}."}
-        #         continue
-        #     # After archiving, the original path is gone, so we need to re-create it to process into
-        #     current_lang_dir, was_newly_created = get_or_create_lang_dir(lang, config) # This will re-create it empty
-        #     lang_dir_newly_created_flags[lang] = True # Mark as newly created for potential cleanup
-        #     lang_dirs_map[lang] = current_lang_dir
-        #     if not current_lang_dir:
-        #         overall_summary[lang] = {"status": "SETUP_ERROR_POST_ARCHIVE", "message": f"Failed to re-create lang directory for {lang} after archiving."}
-        #         continue
-
         lang_results = run_processing_for_language(current_lang_dir, config)
         overall_summary[lang] = lang_results
 
-        # --- Finalization for this language (mainly cleanup) ---
-        if current_lang_dir:  # Should always be true if we reached here
-            # Processed, even if with errors
+        if current_lang_dir:
             if lang_results["status"] in ["OK", "ERRORS_IN_PROCESSING"]:
-                # If the directory was newly created by this script AND it's still empty after processing
-                # (e.g., no MDX files were found or created in it), then remove it.
                 if was_newly_created and current_lang_dir.exists() and not any(current_lang_dir.iterdir()):
                     try:
                         current_lang_dir.rmdir()
                         print(
                             f"  Removed empty newly created language directory: {current_lang_dir.name}")
-                        lang_dirs_map[lang] = None  # It's gone
+                        lang_dirs_map[lang] = None
                         lang_results["message"] = lang_results.get(
                             "message", "") + " Empty newly created directory removed."
                     except OSError as e:
                         print(
                             f"  Note: Could not remove empty newly created directory '{current_lang_dir.name}': {e}")
-            # No renaming logic needed as we operate in-place.
-            # The status messages in lang_results already indicate success/failure of content processing.
 
     print("\n\n" + "=" * 20 + " Overall Script Summary " + "=" * 20)
     for lang_code in config.LANGUAGES:
@@ -480,7 +524,7 @@ def main():
         status = summary.get("status", "UNKNOWN")
         print(f"  Status: {status}")
 
-        if "message" in summary:  # Print setup/archive messages
+        if "message" in summary:
             print(f"  Message: {summary['message']}")
 
         if status not in ["SETUP_ERROR", "SETUP_ERROR_POST_ARCHIVE", "PRE_ARCHIVE_ERROR", "LANG_DIR_ERROR"]:
@@ -495,13 +539,16 @@ def main():
                 f"  Files with Warnings: {summary.get('warning_files_count', 0)}")
             print(
                 f"  Errors during file processing: {summary.get('error_count', 0)}")
+            if summary.get('processed_count', 0) > 0 or "content_replacements_made_count" in summary : # Show only if relevant
+                print(f"  Files with content updated (references): {summary.get('content_replacements_made_count',0)}")
+                print(f"  Errors during content update: {summary.get('content_replacement_errors_count',0)}")
+
 
         if lang_dir_path_obj and lang_dir_path_obj.exists():
             print(f"  Final directory location: {lang_dir_path_obj.name}")
-        # If it was new and now gone
         elif lang_dir_newly_created_flags.get(lang_code) and not lang_dir_path_obj:
             print("  Note: Empty newly created directory was removed as expected.")
-        elif not lang_dir_path_obj and status != "SETUP_ERROR":  # If it wasn't a setup error but dir is None
+        elif not lang_dir_path_obj and status != "SETUP_ERROR":
             print(
                 f"  Note: Language directory '{config.LANG_DIR_TEMPLATE.format(lang=lang_code)}' may have been archived or removed.")
 
@@ -510,4 +557,4 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file