diff --git a/.github/workflows/quality-check.yml b/.github/workflows/quality-check.yml new file mode 100644 index 00000000..49fa04a7 --- /dev/null +++ b/.github/workflows/quality-check.yml @@ -0,0 +1,288 @@ +name: Documentation Quality Check + +on: + pull_request: + branches: [main, revamp] + paths: + - '**/*.md' + - '**/*.mdx' + +permissions: + contents: read + pull-requests: write + +jobs: + quality-check: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.9' + + - name: Install dependencies + run: | + pip install pyyaml + + - name: Get changed MDX files + id: get-files + run: | + CHANGED_FILES=$(git diff --name-only origin/${{ github.base_ref }} HEAD | grep -E '\.(md|mdx)$' || echo "") + + if [ -z "$CHANGED_FILES" ]; then + echo "No MDX files changed" + echo "has_changes=false" >> $GITHUB_OUTPUT + exit 0 + fi + + echo "$CHANGED_FILES" > /tmp/changed_files.txt + echo "has_changes=true" >> $GITHUB_OUTPUT + echo "Changed files:" + echo "$CHANGED_FILES" + + - name: Create quality check script + if: steps.get-files.outputs.has_changes == 'true' + run: | + cat > /tmp/quality_check.py << 'EOFPYTHON' + import re + import sys + from pathlib import Path + from typing import List, Dict, Tuple + import yaml + + class DocumentationQualityChecker: + def __init__(self): + self.errors = [] + self.warnings = [] + + def extract_frontmatter(self, content: str) -> Tuple[Dict, str]: + frontmatter_pattern = r'^---\s*\n(.*?)\n---\s*\n(.*)$' + match = re.match(frontmatter_pattern, content, re.DOTALL) + + if not match: + return {}, content + + try: + frontmatter = yaml.safe_load(match.group(1)) + body = match.group(2) + return frontmatter or {}, body + except yaml.YAMLError as e: + return {}, content + + def check_frontmatter(self, file_path: str, content: str): + frontmatter, _ = self.extract_frontmatter(content) + + if not frontmatter: + self.errors.append(f"{file_path}: Missing frontmatter") + return + + if 'title' not in frontmatter or not frontmatter['title']: + self.errors.append(f"{file_path}: Missing or empty 'title' in frontmatter") + + if 'description' not in frontmatter or not frontmatter['description']: + self.errors.append(f"{file_path}: Missing or empty 'description' in frontmatter") + + def check_internal_links(self, file_path: str, content: str): + _, body = self.extract_frontmatter(content) + + markdown_link_pattern = r'\[([^\]]+)\]\(([^\)]+)\)' + links = re.findall(markdown_link_pattern, body) + + for link_text, link_url in links: + if link_url.startswith(('http://', 'https://', 'mailto:', '#')): + continue + + if link_url.startswith('/'): + self.warnings.append( + f"{file_path}: Absolute internal link detected: {link_url}. " + "Consider using relative paths for internal links." + ) + + def check_image_paths(self, file_path: str, content: str): + _, body = self.extract_frontmatter(content) + + markdown_img_pattern = r'!\[([^\]]*)\]\(([^\)]+)\)' + markdown_matches = re.findall(markdown_img_pattern, body) + for alt_text, img_path in markdown_matches: + if img_path.startswith(('http://', 'https://', 'data:')): + continue + + resolved_path = Path(file_path).parent / img_path + + if not resolved_path.exists() and not img_path.startswith('/'): + self.warnings.append( + f"{file_path}: Image path may not exist: {img_path}" + ) + + html_img_pattern = r']+src=["\']([^"\']+)["\']' + html_matches = re.findall(html_img_pattern, body) + for img_path in html_matches: + if img_path.startswith(('http://', 'https://', 'data:')): + continue + + resolved_path = Path(file_path).parent / img_path + + if not resolved_path.exists() and not img_path.startswith('/'): + self.warnings.append( + f"{file_path}: Image path may not exist: {img_path}" + ) + + def check_mintlify_components(self, file_path: str, content: str): + _, body = self.extract_frontmatter(content) + + component_pattern = r'<(\w+)([^>]*)>(.*?)|<(\w+)([^>]*)/>' + matches = re.finditer(component_pattern, body, re.DOTALL) + + known_components = { + 'Note', 'Info', 'Warning', 'Tip', 'Check', 'CodeGroup', + 'Code', 'Accordion', 'AccordionGroup', 'Card', 'CardGroup', + 'Steps', 'Step', 'Tabs', 'Tab', 'Frame', 'Icon' + } + + for match in matches: + component_name = match.group(1) or match.group(4) + + if component_name and component_name[0].isupper(): + if component_name not in known_components: + self.warnings.append( + f"{file_path}: Unknown Mintlify component: <{component_name}>" + ) + + def check_code_blocks(self, file_path: str, content: str): + _, body = self.extract_frontmatter(content) + + code_block_pattern = r'```([^\n]*)\n(.*?)```' + code_blocks = re.findall(code_block_pattern, body, re.DOTALL) + + for i, (language_tag, code_content) in enumerate(code_blocks): + if not language_tag.strip(): + self.warnings.append( + f"{file_path}: Code block #{i+1} missing language tag" + ) + + def check_file(self, file_path: str): + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + except Exception as e: + self.errors.append(f"{file_path}: Could not read file: {e}") + return + + self.check_frontmatter(file_path, content) + self.check_internal_links(file_path, content) + self.check_image_paths(file_path, content) + self.check_mintlify_components(file_path, content) + self.check_code_blocks(file_path, content) + + def run_checks(self, file_list: List[str]) -> bool: + for file_path in file_list: + if Path(file_path).exists(): + self.check_file(file_path) + + return len(self.errors) == 0 + + if __name__ == "__main__": + with open('/tmp/changed_files.txt', 'r') as f: + files = [line.strip() for line in f if line.strip()] + + checker = DocumentationQualityChecker() + success = checker.run_checks(files) + + if checker.errors: + print("ERRORS:") + for error in checker.errors: + print(f" ❌ {error}") + + if checker.warnings: + print("\nWARNINGS:") + for warning in checker.warnings: + print(f" ⚠️ {warning}") + + if success: + if checker.warnings: + print(f"\n✅ Quality check passed with {len(checker.warnings)} warning(s)") + else: + print("\n✅ All quality checks passed") + sys.exit(0) + else: + print(f"\n❌ Quality check failed with {len(checker.errors)} error(s)") + sys.exit(1) + EOFPYTHON + + - name: Run quality checks + if: steps.get-files.outputs.has_changes == 'true' + id: check + run: | + python /tmp/quality_check.py 2>&1 | tee /tmp/quality_output.log + CHECK_EXIT_CODE=${PIPESTATUS[0]} + + if [ $CHECK_EXIT_CODE -ne 0 ]; then + echo "check_failed=true" >> $GITHUB_OUTPUT + exit 1 + else + echo "check_failed=false" >> $GITHUB_OUTPUT + fi + + - name: Comment quality check results on PR + if: always() && steps.get-files.outputs.has_changes == 'true' + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + let qualityLog = ''; + + try { + qualityLog = fs.readFileSync('/tmp/quality_output.log', 'utf8'); + } catch (e) { + qualityLog = 'Could not read quality check log'; + } + + const checkFailed = '${{ steps.check.outputs.check_failed }}' === 'true'; + const hasErrors = qualityLog.includes('ERRORS:'); + const hasWarnings = qualityLog.includes('WARNINGS:'); + + if (!hasErrors && !hasWarnings) { + const comment = `## ✅ Documentation Quality Check Passed + + All documentation quality checks passed successfully!`; + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: comment + }); + return; + } + + let statusEmoji = checkFailed ? '❌' : '⚠️'; + let statusText = checkFailed ? 'Failed' : 'Passed with Warnings'; + + const comment = `## ${statusEmoji} Documentation Quality Check ${statusText} + + ${qualityLog} + + ${checkFailed ? ` + **Action Required**: Please fix the errors listed above before merging. + ` : ` + **Warnings**: Consider addressing these warnings to improve documentation quality. + `} + + Common fixes: + - Add \`title\` and \`description\` to frontmatter in all MDX files + - Use relative paths for internal links + - Add language tags to code blocks (e.g., \`\`\`python, \`\`\`javascript) + - Verify image paths are correct + - Check Mintlify component syntax`; + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: comment + }); diff --git a/.github/workflows/validate-docs-json.yml b/.github/workflows/validate-docs-json.yml new file mode 100644 index 00000000..99d0a6aa --- /dev/null +++ b/.github/workflows/validate-docs-json.yml @@ -0,0 +1,275 @@ +name: Validate docs.json + +on: + pull_request: + branches: [main, revamp] + paths: + - 'docs.json' + +permissions: + contents: read + pull-requests: write + +jobs: + validate-structure: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.9' + + - name: Create validation script + run: | + cat > /tmp/validate_docs_json.py << 'EOFPYTHON' + import json + import sys + from pathlib import Path + from typing import Set, List, Dict, Any + + class DocsJsonValidator: + def __init__(self, repo_root: Path): + self.repo_root = repo_root + self.docs_json_path = repo_root / "docs.json" + self.errors = [] + self.warnings = [] + + with open("tools/translate/config.json", "r") as f: + config = json.load(f) + self.source_lang = config.get("source_language", "en") + self.target_langs = config.get("target_languages", ["zh", "ja"]) + self.all_langs = [self.source_lang] + self.target_langs + + self.lang_dirs = {} + for lang_code in self.all_langs: + if lang_code in config.get("languages", {}): + self.lang_dirs[lang_code] = config["languages"][lang_code]["directory"] + else: + self.lang_dirs[lang_code] = lang_code + + def load_docs_json(self) -> Dict: + try: + with open(self.docs_json_path, 'r', encoding='utf-8') as f: + return json.load(f) + except json.JSONDecodeError as e: + self.errors.append(f"Invalid JSON syntax in docs.json: {e}") + return {} + except FileNotFoundError: + self.errors.append("docs.json file not found") + return {} + + def validate_json_schema(self, docs_data: Dict) -> bool: + required_fields = ['name', 'navigation'] + for field in required_fields: + if field not in docs_data: + self.errors.append(f"Missing required field: {field}") + return False + return True + + def extract_file_paths(self, pages: Any, collected: Set[str]): + if isinstance(pages, str): + collected.add(pages) + elif isinstance(pages, dict): + if 'pages' in pages: + self.extract_file_paths(pages['pages'], collected) + elif 'page' in pages: + collected.add(pages['page']) + elif isinstance(pages, list): + for item in pages: + self.extract_file_paths(item, collected) + + def get_language_section(self, docs_data: Dict, lang: str) -> Dict: + nav = docs_data.get('navigation', {}) + + if 'versions' in nav and len(nav['versions']) > 0: + languages = nav['versions'][0].get('languages', []) + elif 'languages' in nav: + languages = nav['languages'] + else: + return {} + + for lang_data in languages: + if lang_data.get('language') == lang: + return lang_data + return {} + + def validate_file_existence(self, docs_data: Dict): + for lang in self.all_langs: + lang_section = self.get_language_section(docs_data, lang) + if not lang_section: + continue + + file_paths = set() + if 'dropdowns' in lang_section: + for dropdown in lang_section['dropdowns']: + if 'pages' in dropdown: + self.extract_file_paths(dropdown['pages'], file_paths) + + for file_path in file_paths: + mdx_path = self.repo_root / f"{file_path}.mdx" + md_path = self.repo_root / f"{file_path}.md" + + if not mdx_path.exists() and not md_path.exists(): + self.errors.append(f"Referenced file not found: {file_path} (.md or .mdx)") + + def validate_language_consistency(self, docs_data: Dict): + sections = {} + for lang in self.all_langs: + lang_section = self.get_language_section(docs_data, lang) + if lang_section: + sections[lang] = lang_section + + if len(sections) < len(self.all_langs): + missing = set(self.all_langs) - set(sections.keys()) + self.warnings.append(f"Missing language sections: {', '.join(missing)}") + + if len(sections) < 2: + return + + source_section = sections.get(self.source_lang) + if not source_section: + return + + source_dropdowns = source_section.get('dropdowns', []) + source_dropdown_count = len(source_dropdowns) + + for lang in self.target_langs: + if lang not in sections: + continue + + target_dropdowns = sections[lang].get('dropdowns', []) + target_dropdown_count = len(target_dropdowns) + + if source_dropdown_count != target_dropdown_count: + self.warnings.append( + f"Dropdown count mismatch between {self.source_lang} ({source_dropdown_count}) " + f"and {lang} ({target_dropdown_count})" + ) + + def validate_no_duplicate_paths(self, docs_data: Dict): + for lang in self.all_langs: + lang_section = self.get_language_section(docs_data, lang) + if not lang_section: + continue + + file_paths = set() + duplicates = [] + + def check_duplicates(pages: Any): + if isinstance(pages, str): + if pages in file_paths: + duplicates.append(pages) + file_paths.add(pages) + elif isinstance(pages, dict): + if 'pages' in pages: + check_duplicates(pages['pages']) + elif 'page' in pages: + path = pages['page'] + if path in file_paths: + duplicates.append(path) + file_paths.add(path) + elif isinstance(pages, list): + for item in pages: + check_duplicates(item) + + if 'dropdowns' in lang_section: + for dropdown in lang_section['dropdowns']: + if 'pages' in dropdown: + check_duplicates(dropdown['pages']) + + if duplicates: + self.errors.append(f"Duplicate file paths in {lang} section: {', '.join(duplicates)}") + + def run_validation(self) -> bool: + docs_data = self.load_docs_json() + if not docs_data: + return False + + if not self.validate_json_schema(docs_data): + return False + + self.validate_file_existence(docs_data) + self.validate_language_consistency(docs_data) + self.validate_no_duplicate_paths(docs_data) + + return len(self.errors) == 0 + + if __name__ == "__main__": + repo_root = Path.cwd() + validator = DocsJsonValidator(repo_root) + + success = validator.run_validation() + + if validator.errors: + print("ERRORS:") + for error in validator.errors: + print(f" ❌ {error}") + + if validator.warnings: + print("\nWARNINGS:") + for warning in validator.warnings: + print(f" ⚠️ {warning}") + + if success: + print("\n✅ docs.json validation passed") + sys.exit(0) + else: + print(f"\n❌ docs.json validation failed with {len(validator.errors)} error(s)") + sys.exit(1) + EOFPYTHON + + - name: Run validation + id: validate + run: | + python /tmp/validate_docs_json.py 2>&1 | tee /tmp/validation_output.log + VALIDATION_EXIT_CODE=${PIPESTATUS[0]} + + if [ $VALIDATION_EXIT_CODE -ne 0 ]; then + echo "validation_failed=true" >> $GITHUB_OUTPUT + exit 1 + else + echo "validation_failed=false" >> $GITHUB_OUTPUT + fi + + - name: Comment validation results on PR + if: failure() && steps.validate.outputs.validation_failed == 'true' + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + let validationLog = ''; + + try { + validationLog = fs.readFileSync('/tmp/validation_output.log', 'utf8'); + } catch (e) { + validationLog = 'Could not read validation log'; + } + + const comment = `## ❌ docs.json Validation Failed + + The docs.json file has structural issues that need to be fixed: + + \`\`\` + ${validationLog} + \`\`\` + + Please ensure: + - All referenced files exist in the repository + - Language sections (en/zh/ja) are consistent + - No duplicate file paths within a language section + - Required fields are present (name, navigation) + - Valid JSON syntax + + Refer to the [Mintlify docs.json schema](https://mintlify.com/docs.json) for more details.`; + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: comment + });