feat: add CI quality gate workflows for documentation (#571)

* feat: add CI quality gate workflows for documentation Add three new GitHub Actions workflows to enforce documentation quality: - build-test.yml: Validates Mintlify build succeeds on PRs - quality-check.yml: Checks frontmatter, links, images, code blocks - validate-docs-json.yml: Validates docs.json structure and file references These workflows complement the existing translation automation by providing quality gates that block PRs with documentation issues. Fix: Remove continue-on-error to ensure workflows properly fail PRs. * chore: remove build-test.yml (redundant with Mintlify App)
2026-03-27 13:28:32 +07:00 · 2025-12-02 12:16:26 +08:00
parent af100ab333
commit c55d6e824a
2 changed files with 563 additions and 0 deletions
--- a/.github/workflows/quality-check.yml
+++ b/.github/workflows/quality-check.yml
@@ -0,0 +1,288 @@
+name: Documentation Quality Check
+
+on:
+  pull_request:
+    branches: [main, revamp]
+    paths:
+      - '**/*.md'
+      - '**/*.mdx'
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  quality-check:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Setup Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.9'
+
+      - name: Install dependencies
+        run: |
+          pip install pyyaml
+
+      - name: Get changed MDX files
+        id: get-files
+        run: |
+          CHANGED_FILES=$(git diff --name-only origin/${{ github.base_ref }} HEAD | grep -E '\.(md|mdx)$' || echo "")
+          
+          if [ -z "$CHANGED_FILES" ]; then
+            echo "No MDX files changed"
+            echo "has_changes=false" >> $GITHUB_OUTPUT
+            exit 0
+          fi
+          
+          echo "$CHANGED_FILES" > /tmp/changed_files.txt
+          echo "has_changes=true" >> $GITHUB_OUTPUT
+          echo "Changed files:"
+          echo "$CHANGED_FILES"
+
+      - name: Create quality check script
+        if: steps.get-files.outputs.has_changes == 'true'
+        run: |
+          cat > /tmp/quality_check.py << 'EOFPYTHON'
+          import re
+          import sys
+          from pathlib import Path
+          from typing import List, Dict, Tuple
+          import yaml
+
+          class DocumentationQualityChecker:
+              def __init__(self):
+                  self.errors = []
+                  self.warnings = []
+                  
+              def extract_frontmatter(self, content: str) -> Tuple[Dict, str]:
+                  frontmatter_pattern = r'^---\s*\n(.*?)\n---\s*\n(.*)$'
+                  match = re.match(frontmatter_pattern, content, re.DOTALL)
+                  
+                  if not match:
+                      return {}, content
+                  
+                  try:
+                      frontmatter = yaml.safe_load(match.group(1))
+                      body = match.group(2)
+                      return frontmatter or {}, body
+                  except yaml.YAMLError as e:
+                      return {}, content
+
+              def check_frontmatter(self, file_path: str, content: str):
+                  frontmatter, _ = self.extract_frontmatter(content)
+                  
+                  if not frontmatter:
+                      self.errors.append(f"{file_path}: Missing frontmatter")
+                      return
+                  
+                  if 'title' not in frontmatter or not frontmatter['title']:
+                      self.errors.append(f"{file_path}: Missing or empty 'title' in frontmatter")
+                  
+                  if 'description' not in frontmatter or not frontmatter['description']:
+                      self.errors.append(f"{file_path}: Missing or empty 'description' in frontmatter")
+
+              def check_internal_links(self, file_path: str, content: str):
+                  _, body = self.extract_frontmatter(content)
+                  
+                  markdown_link_pattern = r'\[([^\]]+)\]\(([^\)]+)\)'
+                  links = re.findall(markdown_link_pattern, body)
+                  
+                  for link_text, link_url in links:
+                      if link_url.startswith(('http://', 'https://', 'mailto:', '#')):
+                          continue
+                      
+                      if link_url.startswith('/'):
+                          self.warnings.append(
+                              f"{file_path}: Absolute internal link detected: {link_url}. "
+                              "Consider using relative paths for internal links."
+                          )
+
+              def check_image_paths(self, file_path: str, content: str):
+                  _, body = self.extract_frontmatter(content)
+                  
+                  markdown_img_pattern = r'!\[([^\]]*)\]\(([^\)]+)\)'
+                  markdown_matches = re.findall(markdown_img_pattern, body)
+                  for alt_text, img_path in markdown_matches:
+                      if img_path.startswith(('http://', 'https://', 'data:')):
+                          continue
+                      
+                      resolved_path = Path(file_path).parent / img_path
+                      
+                      if not resolved_path.exists() and not img_path.startswith('/'):
+                          self.warnings.append(
+                              f"{file_path}: Image path may not exist: {img_path}"
+                          )
+                  
+                  html_img_pattern = r'<img[^>]+src=["\']([^"\']+)["\']'
+                  html_matches = re.findall(html_img_pattern, body)
+                  for img_path in html_matches:
+                      if img_path.startswith(('http://', 'https://', 'data:')):
+                          continue
+                      
+                      resolved_path = Path(file_path).parent / img_path
+                      
+                      if not resolved_path.exists() and not img_path.startswith('/'):
+                          self.warnings.append(
+                              f"{file_path}: Image path may not exist: {img_path}"
+                          )
+
+              def check_mintlify_components(self, file_path: str, content: str):
+                  _, body = self.extract_frontmatter(content)
+                  
+                  component_pattern = r'<(\w+)([^>]*)>(.*?)</\1>|<(\w+)([^>]*)/>'
+                  matches = re.finditer(component_pattern, body, re.DOTALL)
+                  
+                  known_components = {
+                      'Note', 'Info', 'Warning', 'Tip', 'Check', 'CodeGroup',
+                      'Code', 'Accordion', 'AccordionGroup', 'Card', 'CardGroup',
+                      'Steps', 'Step', 'Tabs', 'Tab', 'Frame', 'Icon'
+                  }
+                  
+                  for match in matches:
+                      component_name = match.group(1) or match.group(4)
+                      
+                      if component_name and component_name[0].isupper():
+                          if component_name not in known_components:
+                              self.warnings.append(
+                                  f"{file_path}: Unknown Mintlify component: <{component_name}>"
+                              )
+
+              def check_code_blocks(self, file_path: str, content: str):
+                  _, body = self.extract_frontmatter(content)
+                  
+                  code_block_pattern = r'```([^\n]*)\n(.*?)```'
+                  code_blocks = re.findall(code_block_pattern, body, re.DOTALL)
+                  
+                  for i, (language_tag, code_content) in enumerate(code_blocks):
+                      if not language_tag.strip():
+                          self.warnings.append(
+                              f"{file_path}: Code block #{i+1} missing language tag"
+                          )
+
+              def check_file(self, file_path: str):
+                  try:
+                      with open(file_path, 'r', encoding='utf-8') as f:
+                          content = f.read()
+                  except Exception as e:
+                      self.errors.append(f"{file_path}: Could not read file: {e}")
+                      return
+                  
+                  self.check_frontmatter(file_path, content)
+                  self.check_internal_links(file_path, content)
+                  self.check_image_paths(file_path, content)
+                  self.check_mintlify_components(file_path, content)
+                  self.check_code_blocks(file_path, content)
+
+              def run_checks(self, file_list: List[str]) -> bool:
+                  for file_path in file_list:
+                      if Path(file_path).exists():
+                          self.check_file(file_path)
+                  
+                  return len(self.errors) == 0
+
+          if __name__ == "__main__":
+              with open('/tmp/changed_files.txt', 'r') as f:
+                  files = [line.strip() for line in f if line.strip()]
+              
+              checker = DocumentationQualityChecker()
+              success = checker.run_checks(files)
+              
+              if checker.errors:
+                  print("ERRORS:")
+                  for error in checker.errors:
+                      print(f"  ❌ {error}")
+              
+              if checker.warnings:
+                  print("\nWARNINGS:")
+                  for warning in checker.warnings:
+                      print(f"  ⚠️  {warning}")
+              
+              if success:
+                  if checker.warnings:
+                      print(f"\n✅ Quality check passed with {len(checker.warnings)} warning(s)")
+                  else:
+                      print("\n✅ All quality checks passed")
+                  sys.exit(0)
+              else:
+                  print(f"\n❌ Quality check failed with {len(checker.errors)} error(s)")
+                  sys.exit(1)
+          EOFPYTHON
+
+      - name: Run quality checks
+        if: steps.get-files.outputs.has_changes == 'true'
+        id: check
+        run: |
+          python /tmp/quality_check.py 2>&1 | tee /tmp/quality_output.log
+          CHECK_EXIT_CODE=${PIPESTATUS[0]}
+          
+          if [ $CHECK_EXIT_CODE -ne 0 ]; then
+            echo "check_failed=true" >> $GITHUB_OUTPUT
+            exit 1
+          else
+            echo "check_failed=false" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Comment quality check results on PR
+        if: always() && steps.get-files.outputs.has_changes == 'true'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+            let qualityLog = '';
+            
+            try {
+              qualityLog = fs.readFileSync('/tmp/quality_output.log', 'utf8');
+            } catch (e) {
+              qualityLog = 'Could not read quality check log';
+            }
+
+            const checkFailed = '${{ steps.check.outputs.check_failed }}' === 'true';
+            const hasErrors = qualityLog.includes('ERRORS:');
+            const hasWarnings = qualityLog.includes('WARNINGS:');
+
+            if (!hasErrors && !hasWarnings) {
+              const comment = `## ✅ Documentation Quality Check Passed
+
+            All documentation quality checks passed successfully!`;
+
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number,
+                body: comment
+              });
+              return;
+            }
+
+            let statusEmoji = checkFailed ? '❌' : '⚠️';
+            let statusText = checkFailed ? 'Failed' : 'Passed with Warnings';
+
+            const comment = `## ${statusEmoji} Documentation Quality Check ${statusText}
+
+            ${qualityLog}
+
+            ${checkFailed ? `
+            **Action Required**: Please fix the errors listed above before merging.
+            ` : `
+            **Warnings**: Consider addressing these warnings to improve documentation quality.
+            `}
+
+            Common fixes:
+            - Add \`title\` and \`description\` to frontmatter in all MDX files
+            - Use relative paths for internal links
+            - Add language tags to code blocks (e.g., \`\`\`python, \`\`\`javascript)
+            - Verify image paths are correct
+            - Check Mintlify component syntax`;
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: comment
+            });
--- a/.github/workflows/validate-docs-json.yml
+++ b/.github/workflows/validate-docs-json.yml
@@ -0,0 +1,275 @@
+name: Validate docs.json
+
+on:
+  pull_request:
+    branches: [main, revamp]
+    paths:
+      - 'docs.json'
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  validate-structure:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Setup Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.9'
+
+      - name: Create validation script
+        run: |
+          cat > /tmp/validate_docs_json.py << 'EOFPYTHON'
+          import json
+          import sys
+          from pathlib import Path
+          from typing import Set, List, Dict, Any
+
+          class DocsJsonValidator:
+              def __init__(self, repo_root: Path):
+                  self.repo_root = repo_root
+                  self.docs_json_path = repo_root / "docs.json"
+                  self.errors = []
+                  self.warnings = []
+                  
+                  with open("tools/translate/config.json", "r") as f:
+                      config = json.load(f)
+                      self.source_lang = config.get("source_language", "en")
+                      self.target_langs = config.get("target_languages", ["zh", "ja"])
+                      self.all_langs = [self.source_lang] + self.target_langs
+                      
+                      self.lang_dirs = {}
+                      for lang_code in self.all_langs:
+                          if lang_code in config.get("languages", {}):
+                              self.lang_dirs[lang_code] = config["languages"][lang_code]["directory"]
+                          else:
+                              self.lang_dirs[lang_code] = lang_code
+
+              def load_docs_json(self) -> Dict:
+                  try:
+                      with open(self.docs_json_path, 'r', encoding='utf-8') as f:
+                          return json.load(f)
+                  except json.JSONDecodeError as e:
+                      self.errors.append(f"Invalid JSON syntax in docs.json: {e}")
+                      return {}
+                  except FileNotFoundError:
+                      self.errors.append("docs.json file not found")
+                      return {}
+
+              def validate_json_schema(self, docs_data: Dict) -> bool:
+                  required_fields = ['name', 'navigation']
+                  for field in required_fields:
+                      if field not in docs_data:
+                          self.errors.append(f"Missing required field: {field}")
+                          return False
+                  return True
+
+              def extract_file_paths(self, pages: Any, collected: Set[str]):
+                  if isinstance(pages, str):
+                      collected.add(pages)
+                  elif isinstance(pages, dict):
+                      if 'pages' in pages:
+                          self.extract_file_paths(pages['pages'], collected)
+                      elif 'page' in pages:
+                          collected.add(pages['page'])
+                  elif isinstance(pages, list):
+                      for item in pages:
+                          self.extract_file_paths(item, collected)
+
+              def get_language_section(self, docs_data: Dict, lang: str) -> Dict:
+                  nav = docs_data.get('navigation', {})
+                  
+                  if 'versions' in nav and len(nav['versions']) > 0:
+                      languages = nav['versions'][0].get('languages', [])
+                  elif 'languages' in nav:
+                      languages = nav['languages']
+                  else:
+                      return {}
+                  
+                  for lang_data in languages:
+                      if lang_data.get('language') == lang:
+                          return lang_data
+                  return {}
+
+              def validate_file_existence(self, docs_data: Dict):
+                  for lang in self.all_langs:
+                      lang_section = self.get_language_section(docs_data, lang)
+                      if not lang_section:
+                          continue
+                      
+                      file_paths = set()
+                      if 'dropdowns' in lang_section:
+                          for dropdown in lang_section['dropdowns']:
+                              if 'pages' in dropdown:
+                                  self.extract_file_paths(dropdown['pages'], file_paths)
+                      
+                      for file_path in file_paths:
+                          mdx_path = self.repo_root / f"{file_path}.mdx"
+                          md_path = self.repo_root / f"{file_path}.md"
+                          
+                          if not mdx_path.exists() and not md_path.exists():
+                              self.errors.append(f"Referenced file not found: {file_path} (.md or .mdx)")
+
+              def validate_language_consistency(self, docs_data: Dict):
+                  sections = {}
+                  for lang in self.all_langs:
+                      lang_section = self.get_language_section(docs_data, lang)
+                      if lang_section:
+                          sections[lang] = lang_section
+                  
+                  if len(sections) < len(self.all_langs):
+                      missing = set(self.all_langs) - set(sections.keys())
+                      self.warnings.append(f"Missing language sections: {', '.join(missing)}")
+                  
+                  if len(sections) < 2:
+                      return
+                  
+                  source_section = sections.get(self.source_lang)
+                  if not source_section:
+                      return
+                  
+                  source_dropdowns = source_section.get('dropdowns', [])
+                  source_dropdown_count = len(source_dropdowns)
+                  
+                  for lang in self.target_langs:
+                      if lang not in sections:
+                          continue
+                      
+                      target_dropdowns = sections[lang].get('dropdowns', [])
+                      target_dropdown_count = len(target_dropdowns)
+                      
+                      if source_dropdown_count != target_dropdown_count:
+                          self.warnings.append(
+                              f"Dropdown count mismatch between {self.source_lang} ({source_dropdown_count}) "
+                              f"and {lang} ({target_dropdown_count})"
+                          )
+
+              def validate_no_duplicate_paths(self, docs_data: Dict):
+                  for lang in self.all_langs:
+                      lang_section = self.get_language_section(docs_data, lang)
+                      if not lang_section:
+                          continue
+                      
+                      file_paths = set()
+                      duplicates = []
+                      
+                      def check_duplicates(pages: Any):
+                          if isinstance(pages, str):
+                              if pages in file_paths:
+                                  duplicates.append(pages)
+                              file_paths.add(pages)
+                          elif isinstance(pages, dict):
+                              if 'pages' in pages:
+                                  check_duplicates(pages['pages'])
+                              elif 'page' in pages:
+                                  path = pages['page']
+                                  if path in file_paths:
+                                      duplicates.append(path)
+                                  file_paths.add(path)
+                          elif isinstance(pages, list):
+                              for item in pages:
+                                  check_duplicates(item)
+                      
+                      if 'dropdowns' in lang_section:
+                          for dropdown in lang_section['dropdowns']:
+                              if 'pages' in dropdown:
+                                  check_duplicates(dropdown['pages'])
+                      
+                      if duplicates:
+                          self.errors.append(f"Duplicate file paths in {lang} section: {', '.join(duplicates)}")
+
+              def run_validation(self) -> bool:
+                  docs_data = self.load_docs_json()
+                  if not docs_data:
+                      return False
+                  
+                  if not self.validate_json_schema(docs_data):
+                      return False
+                  
+                  self.validate_file_existence(docs_data)
+                  self.validate_language_consistency(docs_data)
+                  self.validate_no_duplicate_paths(docs_data)
+                  
+                  return len(self.errors) == 0
+
+          if __name__ == "__main__":
+              repo_root = Path.cwd()
+              validator = DocsJsonValidator(repo_root)
+              
+              success = validator.run_validation()
+              
+              if validator.errors:
+                  print("ERRORS:")
+                  for error in validator.errors:
+                      print(f"  ❌ {error}")
+              
+              if validator.warnings:
+                  print("\nWARNINGS:")
+                  for warning in validator.warnings:
+                      print(f"  ⚠️  {warning}")
+              
+              if success:
+                  print("\n✅ docs.json validation passed")
+                  sys.exit(0)
+              else:
+                  print(f"\n❌ docs.json validation failed with {len(validator.errors)} error(s)")
+                  sys.exit(1)
+          EOFPYTHON
+
+      - name: Run validation
+        id: validate
+        run: |
+          python /tmp/validate_docs_json.py 2>&1 | tee /tmp/validation_output.log
+          VALIDATION_EXIT_CODE=${PIPESTATUS[0]}
+          
+          if [ $VALIDATION_EXIT_CODE -ne 0 ]; then
+            echo "validation_failed=true" >> $GITHUB_OUTPUT
+            exit 1
+          else
+            echo "validation_failed=false" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Comment validation results on PR
+        if: failure() && steps.validate.outputs.validation_failed == 'true'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+            let validationLog = '';
+            
+            try {
+              validationLog = fs.readFileSync('/tmp/validation_output.log', 'utf8');
+            } catch (e) {
+              validationLog = 'Could not read validation log';
+            }
+
+            const comment = `## ❌ docs.json Validation Failed
+
+            The docs.json file has structural issues that need to be fixed:
+
+            \`\`\`
+            ${validationLog}
+            \`\`\`
+
+            Please ensure:
+            - All referenced files exist in the repository
+            - Language sections (en/zh/ja) are consistent
+            - No duplicate file paths within a language section
+            - Required fields are present (name, navigation)
+            - Valid JSON syntax
+
+            Refer to the [Mintlify docs.json schema](https://mintlify.com/docs.json) for more details.`;
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: comment
+            });