mirror of
https://github.com/langgenius/dify-docs.git
synced 2026-03-27 13:28:32 +07:00
299 lines
11 KiB
YAML
299 lines
11 KiB
YAML
name: Analyze Documentation Changes
|
|
|
|
on:
|
|
pull_request:
|
|
types: [opened, synchronize, reopened]
|
|
paths:
|
|
- 'docs.json'
|
|
- 'en/**/*.md'
|
|
- 'en/**/*.mdx'
|
|
- 'ja-jp/**/*.md'
|
|
- 'ja-jp/**/*.mdx'
|
|
- 'zh-hans/**/*.md'
|
|
- 'zh-hans/**/*.mdx'
|
|
|
|
permissions:
|
|
contents: read
|
|
pull-requests: read
|
|
|
|
jobs:
|
|
analyze:
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Checkout PR
|
|
uses: actions/checkout@v4
|
|
with:
|
|
fetch-depth: 0
|
|
|
|
- name: Set up Python
|
|
uses: actions/setup-python@v4
|
|
with:
|
|
python-version: '3.9'
|
|
|
|
- name: Categorize and validate PR changes
|
|
id: categorize
|
|
run: |
|
|
echo "Categorizing PR changes..."
|
|
|
|
# Get base and head commits
|
|
BASE_SHA="${{ github.event.pull_request.base.sha }}"
|
|
HEAD_SHA="${{ github.event.pull_request.head.sha }}"
|
|
|
|
echo "Base SHA: $BASE_SHA"
|
|
echo "Head SHA: $HEAD_SHA"
|
|
|
|
# Run PR analyzer
|
|
cd tools/translate
|
|
python pr_analyzer.py "$BASE_SHA" "$HEAD_SHA" > /tmp/pr_analysis_output.txt 2>&1
|
|
|
|
# Parse analyzer output
|
|
if [ $? -eq 0 ]; then
|
|
# Successful analysis
|
|
source /tmp/pr_analysis_output.txt
|
|
echo "PR categorization successful"
|
|
echo "PR Type: $pr_type"
|
|
echo "Should Skip: $should_skip"
|
|
|
|
# Set GitHub outputs
|
|
echo "pr_type=$pr_type" >> $GITHUB_OUTPUT
|
|
echo "should_skip=$should_skip" >> $GITHUB_OUTPUT
|
|
|
|
if [ "$should_skip" = "true" ]; then
|
|
if [ "$pr_type" = "translation" ]; then
|
|
echo "✅ Translation-only PR detected. Skipping automation (direct review process)."
|
|
elif [ "$pr_type" = "none" ]; then
|
|
echo "✅ No relevant documentation changes detected. Skipping workflow."
|
|
fi
|
|
exit 0
|
|
fi
|
|
else
|
|
# Analysis failed - likely mixed PR
|
|
echo "PR categorization failed - likely mixed content PR"
|
|
ERROR_MESSAGE=$(cat /tmp/pr_analysis_output.txt | grep "error_message=" | cut -d'=' -f2- || echo "Mixed content PR detected")
|
|
echo "error=mixed_pr" >> $GITHUB_OUTPUT
|
|
echo "error_message<<EOF" >> $GITHUB_OUTPUT
|
|
echo "$ERROR_MESSAGE" >> $GITHUB_OUTPUT
|
|
echo "EOF" >> $GITHUB_OUTPUT
|
|
exit 1
|
|
fi
|
|
|
|
- name: Analyze English changes for translation
|
|
if: steps.categorize.outputs.pr_type == 'english'
|
|
id: analyze
|
|
run: |
|
|
echo "Analyzing English changes for automatic translation..."
|
|
|
|
BASE_SHA="${{ github.event.pull_request.base.sha }}"
|
|
HEAD_SHA="${{ github.event.pull_request.head.sha }}"
|
|
|
|
# Get all changed files (not just English ones for file analysis)
|
|
CHANGED_FILES=$(git diff --name-only $BASE_SHA $HEAD_SHA)
|
|
|
|
# Count changes for security limits
|
|
FILE_COUNT=$(echo "$CHANGED_FILES" | wc -l)
|
|
echo "Changed files count: $FILE_COUNT"
|
|
|
|
# Security check: Limit number of files
|
|
MAX_FILES=50
|
|
if [ "$FILE_COUNT" -gt "$MAX_FILES" ]; then
|
|
echo "Error: Too many files changed ($FILE_COUNT > $MAX_FILES)"
|
|
echo "error=too_many_files" >> $GITHUB_OUTPUT
|
|
exit 1
|
|
fi
|
|
|
|
# Create analysis report
|
|
cat > /tmp/analysis.json <<EOF
|
|
{
|
|
"pr_number": ${{ github.event.pull_request.number }},
|
|
"pr_title": "${{ github.event.pull_request.title }}",
|
|
"pr_author": "${{ github.event.pull_request.user.login }}",
|
|
"base_sha": "$BASE_SHA",
|
|
"head_sha": "$HEAD_SHA",
|
|
"file_count": $FILE_COUNT,
|
|
"timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
|
|
"repository": "${{ github.repository }}",
|
|
"ref": "${{ github.ref }}",
|
|
"pr_type": "english"
|
|
}
|
|
EOF
|
|
|
|
# Save changed files list
|
|
echo "$CHANGED_FILES" > /tmp/changed_files.txt
|
|
|
|
# Analyze file types and sizes for English files that need translation
|
|
> /tmp/file_analysis.txt
|
|
while IFS= read -r file; do
|
|
if [[ "$file" =~ ^en/.*\.(md|mdx)$ ]] && [ -f "$file" ]; then
|
|
SIZE=$(stat -f%z "$file" 2>/dev/null || stat -c%s "$file" 2>/dev/null || echo "0")
|
|
echo "$file|$SIZE" >> /tmp/file_analysis.txt
|
|
|
|
# Security check: File size limit (10MB)
|
|
MAX_SIZE=$((10 * 1024 * 1024))
|
|
if [ "$SIZE" -gt "$MAX_SIZE" ]; then
|
|
echo "Error: File $file exceeds size limit ($SIZE > $MAX_SIZE)"
|
|
echo "error=file_too_large" >> $GITHUB_OUTPUT
|
|
exit 1
|
|
fi
|
|
fi
|
|
done <<< "$CHANGED_FILES"
|
|
|
|
# Check for docs.json changes
|
|
if echo "$CHANGED_FILES" | grep -q '^docs\.json$'; then
|
|
echo "true" > /tmp/docs_json_changed.txt
|
|
|
|
# Use PR analyzer's docs.json analysis
|
|
cd tools/translate
|
|
python3 - <<EOF
|
|
import sys
|
|
sys.path.append('.')
|
|
from pr_analyzer import PRAnalyzer
|
|
|
|
analyzer = PRAnalyzer("$BASE_SHA", "$HEAD_SHA")
|
|
docs_changes = analyzer.analyze_docs_json_changes()
|
|
|
|
structure_changes = {
|
|
"structure_changed": docs_changes["any_docs_json_changes"],
|
|
"navigation_modified": docs_changes["english_section"],
|
|
"languages_affected": ["zh-hans", "ja-jp"] if docs_changes["english_section"] else []
|
|
}
|
|
|
|
import json
|
|
with open("/tmp/structure_changes.json", "w") as f:
|
|
json.dump(structure_changes, f, indent=2)
|
|
EOF
|
|
else
|
|
echo "false" > /tmp/docs_json_changed.txt
|
|
echo '{"structure_changed": false, "navigation_modified": false, "languages_affected": []}' > /tmp/structure_changes.json
|
|
fi
|
|
|
|
echo "has_changes=true" >> $GITHUB_OUTPUT
|
|
echo "Analysis complete"
|
|
|
|
- name: Validate file paths
|
|
if: steps.analyze.outputs.has_changes == 'true'
|
|
run: |
|
|
echo "Validating English file paths for translation..."
|
|
|
|
# Security: Validate English files that will be translated
|
|
while IFS='|' read -r file size; do
|
|
if [ -n "$file" ]; then
|
|
# Check for directory traversal attempts
|
|
if echo "$file" | grep -q '\.\./'; then
|
|
echo "Error: Invalid file path detected: $file"
|
|
exit 1
|
|
fi
|
|
|
|
# Check file extension for English files
|
|
if ! echo "$file" | grep -qE '\.(md|mdx)$'; then
|
|
echo "Error: Invalid file type for translation: $file"
|
|
exit 1
|
|
fi
|
|
|
|
# Check path starts with en/ (only English files need translation)
|
|
if ! echo "$file" | grep -qE '^en/'; then
|
|
echo "Error: Non-English file in translation list: $file"
|
|
exit 1
|
|
fi
|
|
fi
|
|
done < /tmp/file_analysis.txt
|
|
|
|
echo "All English file paths validated for translation"
|
|
|
|
- name: Create analysis summary
|
|
if: steps.analyze.outputs.has_changes == 'true'
|
|
run: |
|
|
echo "Creating analysis summary for English changes..."
|
|
|
|
# Create a comprehensive analysis summary
|
|
python3 - <<'EOF'
|
|
import json
|
|
import os
|
|
|
|
# Load analysis data
|
|
with open("/tmp/analysis.json") as f:
|
|
analysis = json.load(f)
|
|
|
|
# Load file analysis (English files to translate)
|
|
files_to_sync = []
|
|
with open("/tmp/file_analysis.txt") as f:
|
|
for line in f:
|
|
if line.strip():
|
|
file_path, size = line.strip().split("|")
|
|
files_to_sync.append({
|
|
"path": file_path,
|
|
"size": int(size),
|
|
"type": "mdx" if file_path.endswith(".mdx") else "md"
|
|
})
|
|
|
|
# Add docs.json if it changed
|
|
with open("/tmp/docs_json_changed.txt") as f:
|
|
docs_json_changed = f.read().strip() == "true"
|
|
|
|
if docs_json_changed:
|
|
# Get docs.json size (from repo root)
|
|
docs_json_size = os.path.getsize("docs.json")
|
|
files_to_sync.append({
|
|
"path": "docs.json",
|
|
"size": docs_json_size,
|
|
"type": "json"
|
|
})
|
|
|
|
# Load structure changes
|
|
with open("/tmp/structure_changes.json") as f:
|
|
structure_changes = json.load(f)
|
|
|
|
# Create sync plan
|
|
sync_plan = {
|
|
"metadata": analysis,
|
|
"files_to_sync": files_to_sync,
|
|
"structure_changes": structure_changes,
|
|
"target_languages": ["zh-hans", "ja-jp"],
|
|
"sync_required": len(files_to_sync) > 0 or structure_changes.get("structure_changed", False)
|
|
}
|
|
|
|
# Save sync plan
|
|
with open("/tmp/sync_plan.json", "w") as f:
|
|
json.dump(sync_plan, f, indent=2)
|
|
|
|
print(f"English sync plan created: {len(files_to_sync)} files to translate")
|
|
if structure_changes.get("structure_changed"):
|
|
print("Documentation structure changes detected")
|
|
EOF
|
|
|
|
- name: Upload analysis artifacts
|
|
if: steps.analyze.outputs.has_changes == 'true'
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: docs-sync-analysis-${{ github.event.pull_request.number }}
|
|
path: |
|
|
/tmp/analysis.json
|
|
/tmp/changed_files.txt
|
|
/tmp/file_analysis.txt
|
|
/tmp/sync_plan.json
|
|
/tmp/docs_json_changed.txt
|
|
/tmp/structure_changes.json
|
|
retention-days: 1
|
|
|
|
- name: Report mixed PR error
|
|
if: failure() && steps.categorize.outputs.error == 'mixed_pr'
|
|
uses: actions/github-script@v7
|
|
continue-on-error: true
|
|
with:
|
|
script: |
|
|
const errorMessage = `${{ steps.categorize.outputs.error_message }}`;
|
|
|
|
try {
|
|
await github.rest.issues.createComment({
|
|
owner: context.repo.owner,
|
|
repo: context.repo.repo,
|
|
issue_number: context.issue.number,
|
|
body: errorMessage
|
|
});
|
|
console.log('Posted mixed PR error message to PR');
|
|
} catch (error) {
|
|
console.log('Could not comment on PR:', error.message);
|
|
console.log('Error message would have been:');
|
|
console.log(errorMessage);
|
|
}
|
|
|