feat: add CI quality gate workflows for documentation (#571)

* feat: add CI quality gate workflows for documentation

Add three new GitHub Actions workflows to enforce documentation quality:

- build-test.yml: Validates Mintlify build succeeds on PRs
- quality-check.yml: Checks frontmatter, links, images, code blocks
- validate-docs-json.yml: Validates docs.json structure and file references

These workflows complement the existing translation automation by
providing quality gates that block PRs with documentation issues.

Fix: Remove continue-on-error to ensure workflows properly fail PRs.

* chore: remove build-test.yml (redundant with Mintlify App)
This commit is contained in:
yyh
2025-12-02 12:16:26 +08:00
committed by GitHub
parent af100ab333
commit c55d6e824a
2 changed files with 563 additions and 0 deletions

288
.github/workflows/quality-check.yml vendored Normal file
View File

@@ -0,0 +1,288 @@
name: Documentation Quality Check
on:
pull_request:
branches: [main, revamp]
paths:
- '**/*.md'
- '**/*.mdx'
permissions:
contents: read
pull-requests: write
jobs:
quality-check:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.9'
- name: Install dependencies
run: |
pip install pyyaml
- name: Get changed MDX files
id: get-files
run: |
CHANGED_FILES=$(git diff --name-only origin/${{ github.base_ref }} HEAD | grep -E '\.(md|mdx)$' || echo "")
if [ -z "$CHANGED_FILES" ]; then
echo "No MDX files changed"
echo "has_changes=false" >> $GITHUB_OUTPUT
exit 0
fi
echo "$CHANGED_FILES" > /tmp/changed_files.txt
echo "has_changes=true" >> $GITHUB_OUTPUT
echo "Changed files:"
echo "$CHANGED_FILES"
- name: Create quality check script
if: steps.get-files.outputs.has_changes == 'true'
run: |
cat > /tmp/quality_check.py << 'EOFPYTHON'
import re
import sys
from pathlib import Path
from typing import List, Dict, Tuple
import yaml
class DocumentationQualityChecker:
def __init__(self):
self.errors = []
self.warnings = []
def extract_frontmatter(self, content: str) -> Tuple[Dict, str]:
frontmatter_pattern = r'^---\s*\n(.*?)\n---\s*\n(.*)$'
match = re.match(frontmatter_pattern, content, re.DOTALL)
if not match:
return {}, content
try:
frontmatter = yaml.safe_load(match.group(1))
body = match.group(2)
return frontmatter or {}, body
except yaml.YAMLError as e:
return {}, content
def check_frontmatter(self, file_path: str, content: str):
frontmatter, _ = self.extract_frontmatter(content)
if not frontmatter:
self.errors.append(f"{file_path}: Missing frontmatter")
return
if 'title' not in frontmatter or not frontmatter['title']:
self.errors.append(f"{file_path}: Missing or empty 'title' in frontmatter")
if 'description' not in frontmatter or not frontmatter['description']:
self.errors.append(f"{file_path}: Missing or empty 'description' in frontmatter")
def check_internal_links(self, file_path: str, content: str):
_, body = self.extract_frontmatter(content)
markdown_link_pattern = r'\[([^\]]+)\]\(([^\)]+)\)'
links = re.findall(markdown_link_pattern, body)
for link_text, link_url in links:
if link_url.startswith(('http://', 'https://', 'mailto:', '#')):
continue
if link_url.startswith('/'):
self.warnings.append(
f"{file_path}: Absolute internal link detected: {link_url}. "
"Consider using relative paths for internal links."
)
def check_image_paths(self, file_path: str, content: str):
_, body = self.extract_frontmatter(content)
markdown_img_pattern = r'!\[([^\]]*)\]\(([^\)]+)\)'
markdown_matches = re.findall(markdown_img_pattern, body)
for alt_text, img_path in markdown_matches:
if img_path.startswith(('http://', 'https://', 'data:')):
continue
resolved_path = Path(file_path).parent / img_path
if not resolved_path.exists() and not img_path.startswith('/'):
self.warnings.append(
f"{file_path}: Image path may not exist: {img_path}"
)
html_img_pattern = r'<img[^>]+src=["\']([^"\']+)["\']'
html_matches = re.findall(html_img_pattern, body)
for img_path in html_matches:
if img_path.startswith(('http://', 'https://', 'data:')):
continue
resolved_path = Path(file_path).parent / img_path
if not resolved_path.exists() and not img_path.startswith('/'):
self.warnings.append(
f"{file_path}: Image path may not exist: {img_path}"
)
def check_mintlify_components(self, file_path: str, content: str):
_, body = self.extract_frontmatter(content)
component_pattern = r'<(\w+)([^>]*)>(.*?)</\1>|<(\w+)([^>]*)/>'
matches = re.finditer(component_pattern, body, re.DOTALL)
known_components = {
'Note', 'Info', 'Warning', 'Tip', 'Check', 'CodeGroup',
'Code', 'Accordion', 'AccordionGroup', 'Card', 'CardGroup',
'Steps', 'Step', 'Tabs', 'Tab', 'Frame', 'Icon'
}
for match in matches:
component_name = match.group(1) or match.group(4)
if component_name and component_name[0].isupper():
if component_name not in known_components:
self.warnings.append(
f"{file_path}: Unknown Mintlify component: <{component_name}>"
)
def check_code_blocks(self, file_path: str, content: str):
_, body = self.extract_frontmatter(content)
code_block_pattern = r'```([^\n]*)\n(.*?)```'
code_blocks = re.findall(code_block_pattern, body, re.DOTALL)
for i, (language_tag, code_content) in enumerate(code_blocks):
if not language_tag.strip():
self.warnings.append(
f"{file_path}: Code block #{i+1} missing language tag"
)
def check_file(self, file_path: str):
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
except Exception as e:
self.errors.append(f"{file_path}: Could not read file: {e}")
return
self.check_frontmatter(file_path, content)
self.check_internal_links(file_path, content)
self.check_image_paths(file_path, content)
self.check_mintlify_components(file_path, content)
self.check_code_blocks(file_path, content)
def run_checks(self, file_list: List[str]) -> bool:
for file_path in file_list:
if Path(file_path).exists():
self.check_file(file_path)
return len(self.errors) == 0
if __name__ == "__main__":
with open('/tmp/changed_files.txt', 'r') as f:
files = [line.strip() for line in f if line.strip()]
checker = DocumentationQualityChecker()
success = checker.run_checks(files)
if checker.errors:
print("ERRORS:")
for error in checker.errors:
print(f" ❌ {error}")
if checker.warnings:
print("\nWARNINGS:")
for warning in checker.warnings:
print(f" ⚠️ {warning}")
if success:
if checker.warnings:
print(f"\n✅ Quality check passed with {len(checker.warnings)} warning(s)")
else:
print("\n✅ All quality checks passed")
sys.exit(0)
else:
print(f"\n❌ Quality check failed with {len(checker.errors)} error(s)")
sys.exit(1)
EOFPYTHON
- name: Run quality checks
if: steps.get-files.outputs.has_changes == 'true'
id: check
run: |
python /tmp/quality_check.py 2>&1 | tee /tmp/quality_output.log
CHECK_EXIT_CODE=${PIPESTATUS[0]}
if [ $CHECK_EXIT_CODE -ne 0 ]; then
echo "check_failed=true" >> $GITHUB_OUTPUT
exit 1
else
echo "check_failed=false" >> $GITHUB_OUTPUT
fi
- name: Comment quality check results on PR
if: always() && steps.get-files.outputs.has_changes == 'true'
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
let qualityLog = '';
try {
qualityLog = fs.readFileSync('/tmp/quality_output.log', 'utf8');
} catch (e) {
qualityLog = 'Could not read quality check log';
}
const checkFailed = '${{ steps.check.outputs.check_failed }}' === 'true';
const hasErrors = qualityLog.includes('ERRORS:');
const hasWarnings = qualityLog.includes('WARNINGS:');
if (!hasErrors && !hasWarnings) {
const comment = `## ✅ Documentation Quality Check Passed
All documentation quality checks passed successfully!`;
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: comment
});
return;
}
let statusEmoji = checkFailed ? '❌' : '⚠️';
let statusText = checkFailed ? 'Failed' : 'Passed with Warnings';
const comment = `## ${statusEmoji} Documentation Quality Check ${statusText}
${qualityLog}
${checkFailed ? `
**Action Required**: Please fix the errors listed above before merging.
` : `
**Warnings**: Consider addressing these warnings to improve documentation quality.
`}
Common fixes:
- Add \`title\` and \`description\` to frontmatter in all MDX files
- Use relative paths for internal links
- Add language tags to code blocks (e.g., \`\`\`python, \`\`\`javascript)
- Verify image paths are correct
- Check Mintlify component syntax`;
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: comment
});

275
.github/workflows/validate-docs-json.yml vendored Normal file
View File

@@ -0,0 +1,275 @@
name: Validate docs.json
on:
pull_request:
branches: [main, revamp]
paths:
- 'docs.json'
permissions:
contents: read
pull-requests: write
jobs:
validate-structure:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.9'
- name: Create validation script
run: |
cat > /tmp/validate_docs_json.py << 'EOFPYTHON'
import json
import sys
from pathlib import Path
from typing import Set, List, Dict, Any
class DocsJsonValidator:
def __init__(self, repo_root: Path):
self.repo_root = repo_root
self.docs_json_path = repo_root / "docs.json"
self.errors = []
self.warnings = []
with open("tools/translate/config.json", "r") as f:
config = json.load(f)
self.source_lang = config.get("source_language", "en")
self.target_langs = config.get("target_languages", ["zh", "ja"])
self.all_langs = [self.source_lang] + self.target_langs
self.lang_dirs = {}
for lang_code in self.all_langs:
if lang_code in config.get("languages", {}):
self.lang_dirs[lang_code] = config["languages"][lang_code]["directory"]
else:
self.lang_dirs[lang_code] = lang_code
def load_docs_json(self) -> Dict:
try:
with open(self.docs_json_path, 'r', encoding='utf-8') as f:
return json.load(f)
except json.JSONDecodeError as e:
self.errors.append(f"Invalid JSON syntax in docs.json: {e}")
return {}
except FileNotFoundError:
self.errors.append("docs.json file not found")
return {}
def validate_json_schema(self, docs_data: Dict) -> bool:
required_fields = ['name', 'navigation']
for field in required_fields:
if field not in docs_data:
self.errors.append(f"Missing required field: {field}")
return False
return True
def extract_file_paths(self, pages: Any, collected: Set[str]):
if isinstance(pages, str):
collected.add(pages)
elif isinstance(pages, dict):
if 'pages' in pages:
self.extract_file_paths(pages['pages'], collected)
elif 'page' in pages:
collected.add(pages['page'])
elif isinstance(pages, list):
for item in pages:
self.extract_file_paths(item, collected)
def get_language_section(self, docs_data: Dict, lang: str) -> Dict:
nav = docs_data.get('navigation', {})
if 'versions' in nav and len(nav['versions']) > 0:
languages = nav['versions'][0].get('languages', [])
elif 'languages' in nav:
languages = nav['languages']
else:
return {}
for lang_data in languages:
if lang_data.get('language') == lang:
return lang_data
return {}
def validate_file_existence(self, docs_data: Dict):
for lang in self.all_langs:
lang_section = self.get_language_section(docs_data, lang)
if not lang_section:
continue
file_paths = set()
if 'dropdowns' in lang_section:
for dropdown in lang_section['dropdowns']:
if 'pages' in dropdown:
self.extract_file_paths(dropdown['pages'], file_paths)
for file_path in file_paths:
mdx_path = self.repo_root / f"{file_path}.mdx"
md_path = self.repo_root / f"{file_path}.md"
if not mdx_path.exists() and not md_path.exists():
self.errors.append(f"Referenced file not found: {file_path} (.md or .mdx)")
def validate_language_consistency(self, docs_data: Dict):
sections = {}
for lang in self.all_langs:
lang_section = self.get_language_section(docs_data, lang)
if lang_section:
sections[lang] = lang_section
if len(sections) < len(self.all_langs):
missing = set(self.all_langs) - set(sections.keys())
self.warnings.append(f"Missing language sections: {', '.join(missing)}")
if len(sections) < 2:
return
source_section = sections.get(self.source_lang)
if not source_section:
return
source_dropdowns = source_section.get('dropdowns', [])
source_dropdown_count = len(source_dropdowns)
for lang in self.target_langs:
if lang not in sections:
continue
target_dropdowns = sections[lang].get('dropdowns', [])
target_dropdown_count = len(target_dropdowns)
if source_dropdown_count != target_dropdown_count:
self.warnings.append(
f"Dropdown count mismatch between {self.source_lang} ({source_dropdown_count}) "
f"and {lang} ({target_dropdown_count})"
)
def validate_no_duplicate_paths(self, docs_data: Dict):
for lang in self.all_langs:
lang_section = self.get_language_section(docs_data, lang)
if not lang_section:
continue
file_paths = set()
duplicates = []
def check_duplicates(pages: Any):
if isinstance(pages, str):
if pages in file_paths:
duplicates.append(pages)
file_paths.add(pages)
elif isinstance(pages, dict):
if 'pages' in pages:
check_duplicates(pages['pages'])
elif 'page' in pages:
path = pages['page']
if path in file_paths:
duplicates.append(path)
file_paths.add(path)
elif isinstance(pages, list):
for item in pages:
check_duplicates(item)
if 'dropdowns' in lang_section:
for dropdown in lang_section['dropdowns']:
if 'pages' in dropdown:
check_duplicates(dropdown['pages'])
if duplicates:
self.errors.append(f"Duplicate file paths in {lang} section: {', '.join(duplicates)}")
def run_validation(self) -> bool:
docs_data = self.load_docs_json()
if not docs_data:
return False
if not self.validate_json_schema(docs_data):
return False
self.validate_file_existence(docs_data)
self.validate_language_consistency(docs_data)
self.validate_no_duplicate_paths(docs_data)
return len(self.errors) == 0
if __name__ == "__main__":
repo_root = Path.cwd()
validator = DocsJsonValidator(repo_root)
success = validator.run_validation()
if validator.errors:
print("ERRORS:")
for error in validator.errors:
print(f" ❌ {error}")
if validator.warnings:
print("\nWARNINGS:")
for warning in validator.warnings:
print(f" ⚠️ {warning}")
if success:
print("\n✅ docs.json validation passed")
sys.exit(0)
else:
print(f"\n❌ docs.json validation failed with {len(validator.errors)} error(s)")
sys.exit(1)
EOFPYTHON
- name: Run validation
id: validate
run: |
python /tmp/validate_docs_json.py 2>&1 | tee /tmp/validation_output.log
VALIDATION_EXIT_CODE=${PIPESTATUS[0]}
if [ $VALIDATION_EXIT_CODE -ne 0 ]; then
echo "validation_failed=true" >> $GITHUB_OUTPUT
exit 1
else
echo "validation_failed=false" >> $GITHUB_OUTPUT
fi
- name: Comment validation results on PR
if: failure() && steps.validate.outputs.validation_failed == 'true'
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
let validationLog = '';
try {
validationLog = fs.readFileSync('/tmp/validation_output.log', 'utf8');
} catch (e) {
validationLog = 'Could not read validation log';
}
const comment = `## ❌ docs.json Validation Failed
The docs.json file has structural issues that need to be fixed:
\`\`\`
${validationLog}
\`\`\`
Please ensure:
- All referenced files exist in the repository
- Language sections (en/zh/ja) are consistent
- No duplicate file paths within a language section
- Required fields are present (name, navigation)
- Valid JSON syntax
Refer to the [Mintlify docs.json schema](https://mintlify.com/docs.json) for more details.`;
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: comment
});