mirror of
https://github.com/langgenius/dify-docs.git
synced 2026-03-27 13:28:32 +07:00
When a PR is force-pushed, github.event.before points to an orphaned commit that isn't fetched by actions/checkout. This caused the workflow to fail with "Error: command not found" (exit code 127) because: 1. pr_analyzer.py's git diff failed on the inaccessible commit 2. Error messages went to stdout, breaking the source command This fix: - Validates COMPARE_BASE accessibility before use via git cat-file - Falls back to merge-base strategy if commit is orphaned - Sends error messages to stderr for defensive error handling 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
411 lines
16 KiB
YAML
411 lines
16 KiB
YAML
name: Analyze Documentation Changes
|
|
|
|
on:
|
|
pull_request:
|
|
branches: [main, revamp]
|
|
types: [opened, synchronize, reopened]
|
|
paths:
|
|
# IMPORTANT: These paths should match the language directories defined in tools/translate/config.json
|
|
# Currently configured for: en (source), cn, jp (targets)
|
|
# If you add/remove languages in config.json, update these paths accordingly
|
|
- 'docs.json'
|
|
- 'en/**/*.md'
|
|
- 'en/**/*.mdx'
|
|
- 'en/**/openapi*.json'
|
|
- 'zh/**/*.md'
|
|
- 'zh/**/*.mdx'
|
|
- 'zh/**/openapi*.json'
|
|
- 'ja/**/*.md'
|
|
- 'ja/**/*.mdx'
|
|
- 'ja/**/openapi*.json'
|
|
- 'versions/**/*.md'
|
|
- 'versions/**/*.mdx'
|
|
|
|
permissions:
|
|
contents: read
|
|
pull-requests: read
|
|
|
|
jobs:
|
|
analyze:
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Checkout PR
|
|
uses: actions/checkout@v4
|
|
with:
|
|
fetch-depth: 0
|
|
|
|
- name: Set up Python
|
|
uses: actions/setup-python@v4
|
|
with:
|
|
python-version: '3.9'
|
|
|
|
- name: Determine comparison range
|
|
id: determine-range
|
|
env:
|
|
GH_TOKEN: ${{ github.token }}
|
|
run: |
|
|
echo "Determining comparison range..."
|
|
|
|
PR_NUMBER="${{ github.event.pull_request.number }}"
|
|
EVENT_ACTION="${{ github.event.action }}"
|
|
PR_BASE="${{ github.event.pull_request.base.sha }}"
|
|
PR_HEAD="${{ github.event.pull_request.head.sha }}"
|
|
|
|
if [ "$EVENT_ACTION" = "synchronize" ]; then
|
|
echo "🔄 Synchronize event - detecting incremental changes"
|
|
|
|
# Try to get last processed commit from translation PR
|
|
TRANSLATION_PR=$(gh pr list \
|
|
--search "head:docs-sync-pr-${PR_NUMBER} state:open" \
|
|
--json number \
|
|
--jq '.[0].number // empty' 2>/dev/null || echo "")
|
|
|
|
LAST_PROCESSED=""
|
|
if [ -n "$TRANSLATION_PR" ]; then
|
|
echo "Found translation PR #${TRANSLATION_PR}"
|
|
|
|
# Extract last processed commit from comments (reverse order to get latest)
|
|
LAST_PROCESSED=$(gh pr view "$TRANSLATION_PR" \
|
|
--json comments \
|
|
--jq '.comments | reverse | .[] | .body' 2>/dev/null \
|
|
| grep -oP 'Last-Processed-Commit: \K[a-f0-9]+' \
|
|
| head -1 || echo "")
|
|
|
|
if [ -n "$LAST_PROCESSED" ]; then
|
|
echo "✅ Found tracked commit in translation PR: $LAST_PROCESSED"
|
|
fi
|
|
fi
|
|
|
|
# Use tracked commit if available, otherwise fall back to github.event.before
|
|
if [ -n "$LAST_PROCESSED" ]; then
|
|
COMPARE_BASE="$LAST_PROCESSED"
|
|
echo "Using last processed commit: $COMPARE_BASE"
|
|
elif [ -n "${{ github.event.before }}" ] && [ "${{ github.event.before }}" != "0000000000000000000000000000000000000000" ]; then
|
|
COMPARE_BASE="${{ github.event.before }}"
|
|
echo "Using github.event.before: $COMPARE_BASE"
|
|
else
|
|
# Fallback to PR base (first push after PR creation)
|
|
COMPARE_BASE="$PR_BASE"
|
|
echo "⚠️ No previous commit found, using PR base: $COMPARE_BASE"
|
|
fi
|
|
|
|
COMPARE_HEAD="$PR_HEAD"
|
|
IS_INCREMENTAL="true"
|
|
|
|
else
|
|
echo "🆕 New PR event - analyzing full changes"
|
|
|
|
# Use merge-base to find where branch diverged from main
|
|
# This allows stale branches to trigger automation without false "mixed content" errors
|
|
MERGE_BASE=$(git merge-base "$PR_BASE" "$PR_HEAD")
|
|
echo "Branch diverged from main at: $MERGE_BASE"
|
|
|
|
COMPARE_BASE="$MERGE_BASE"
|
|
COMPARE_HEAD="$PR_HEAD"
|
|
IS_INCREMENTAL="false"
|
|
fi
|
|
|
|
# Validate COMPARE_BASE is accessible (handles force-push orphaned commits)
|
|
if ! git cat-file -e "${COMPARE_BASE}^{commit}" 2>/dev/null; then
|
|
echo "⚠️ Commit $COMPARE_BASE not accessible (likely force-pushed away)"
|
|
echo "Falling back to merge-base strategy"
|
|
COMPARE_BASE=$(git merge-base "$PR_BASE" "$PR_HEAD")
|
|
if ! git cat-file -e "${COMPARE_BASE}^{commit}" 2>/dev/null; then
|
|
echo "❌ Error: Could not determine valid comparison base"
|
|
exit 1
|
|
fi
|
|
IS_INCREMENTAL="false"
|
|
echo "Using merge-base: $COMPARE_BASE"
|
|
fi
|
|
|
|
echo "compare_base=$COMPARE_BASE" >> $GITHUB_OUTPUT
|
|
echo "compare_head=$COMPARE_HEAD" >> $GITHUB_OUTPUT
|
|
echo "is_incremental=$IS_INCREMENTAL" >> $GITHUB_OUTPUT
|
|
|
|
echo "📊 Comparison range: $COMPARE_BASE...$COMPARE_HEAD"
|
|
|
|
- name: Categorize and validate PR changes
|
|
id: categorize
|
|
run: |
|
|
echo "Categorizing PR changes..."
|
|
|
|
# Get comparison range from previous step
|
|
BASE_SHA="${{ steps.determine-range.outputs.compare_base }}"
|
|
HEAD_SHA="${{ steps.determine-range.outputs.compare_head }}"
|
|
|
|
echo "Base SHA: $BASE_SHA"
|
|
echo "Head SHA: $HEAD_SHA"
|
|
|
|
# Run PR analyzer (stderr goes to console, stdout to file for sourcing)
|
|
cd tools/translate
|
|
python pr_analyzer.py "$BASE_SHA" "$HEAD_SHA" > /tmp/pr_analysis_output.txt
|
|
|
|
# Parse analyzer output
|
|
if [ $? -eq 0 ]; then
|
|
# Successful analysis
|
|
source /tmp/pr_analysis_output.txt
|
|
echo "PR categorization successful"
|
|
echo "PR Type: $pr_type"
|
|
echo "Should Skip: $should_skip"
|
|
|
|
# Set GitHub outputs
|
|
echo "pr_type=$pr_type" >> $GITHUB_OUTPUT
|
|
echo "should_skip=$should_skip" >> $GITHUB_OUTPUT
|
|
|
|
if [ "$should_skip" = "true" ]; then
|
|
if [ "$pr_type" = "translation" ]; then
|
|
echo "✅ Translation-only PR detected. Skipping automation (direct review process)."
|
|
elif [ "$pr_type" = "none" ]; then
|
|
echo "✅ No relevant documentation changes detected. Skipping workflow."
|
|
fi
|
|
exit 0
|
|
fi
|
|
else
|
|
# Analysis failed - likely mixed PR
|
|
echo "PR categorization failed - likely mixed content PR"
|
|
ERROR_MESSAGE=$(cat /tmp/pr_analysis_output.txt | grep "error_message=" | cut -d'=' -f2- || echo "Mixed content PR detected")
|
|
echo "error=mixed_pr" >> $GITHUB_OUTPUT
|
|
echo "error_message<<EOF" >> $GITHUB_OUTPUT
|
|
echo "$ERROR_MESSAGE" >> $GITHUB_OUTPUT
|
|
echo "EOF" >> $GITHUB_OUTPUT
|
|
exit 1
|
|
fi
|
|
|
|
- name: Analyze source language changes for translation
|
|
if: steps.categorize.outputs.pr_type == 'source'
|
|
id: analyze
|
|
run: |
|
|
echo "Analyzing source language changes for automatic translation..."
|
|
|
|
# Use comparison range from determine-range step
|
|
BASE_SHA="${{ steps.determine-range.outputs.compare_base }}"
|
|
HEAD_SHA="${{ steps.determine-range.outputs.compare_head }}"
|
|
IS_INCREMENTAL="${{ steps.determine-range.outputs.is_incremental }}"
|
|
|
|
echo "Comparison: $BASE_SHA...$HEAD_SHA"
|
|
echo "Incremental: $IS_INCREMENTAL"
|
|
|
|
# Get all changed files (not just English ones for file analysis)
|
|
CHANGED_FILES=$(git diff --name-only $BASE_SHA $HEAD_SHA)
|
|
|
|
# Count changes for security limits
|
|
FILE_COUNT=$(echo "$CHANGED_FILES" | wc -l)
|
|
echo "Changed files count: $FILE_COUNT"
|
|
|
|
# Security check: Limit number of files
|
|
MAX_FILES=50
|
|
if [ "$FILE_COUNT" -gt "$MAX_FILES" ]; then
|
|
echo "Error: Too many files changed ($FILE_COUNT > $MAX_FILES)"
|
|
echo "error=too_many_files" >> $GITHUB_OUTPUT
|
|
exit 1
|
|
fi
|
|
|
|
# Create analysis report
|
|
cat > /tmp/analysis.json <<EOF
|
|
{
|
|
"pr_number": ${{ github.event.pull_request.number }},
|
|
"pr_title": "${{ github.event.pull_request.title }}",
|
|
"pr_author": "${{ github.event.pull_request.user.login }}",
|
|
"base_sha": "$BASE_SHA",
|
|
"head_sha": "$HEAD_SHA",
|
|
"is_incremental": $IS_INCREMENTAL,
|
|
"event_action": "${{ github.event.action }}",
|
|
"file_count": $FILE_COUNT,
|
|
"timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
|
|
"repository": "${{ github.repository }}",
|
|
"ref": "${{ github.ref }}",
|
|
"pr_type": "source"
|
|
}
|
|
EOF
|
|
|
|
# Save changed files list
|
|
echo "$CHANGED_FILES" > /tmp/changed_files.txt
|
|
|
|
# Analyze file types and sizes for source language files that need translation
|
|
> /tmp/file_analysis.txt
|
|
> /tmp/openapi_analysis.txt
|
|
while IFS= read -r file; do
|
|
if [[ "$file" =~ ^en/.*\.(md|mdx)$ ]] && [ -f "$file" ]; then
|
|
SIZE=$(stat -f%z "$file" 2>/dev/null || stat -c%s "$file" 2>/dev/null || echo "0")
|
|
echo "$file|$SIZE|markdown" >> /tmp/file_analysis.txt
|
|
|
|
# Security check: File size limit (10MB)
|
|
MAX_SIZE=$((10 * 1024 * 1024))
|
|
if [ "$SIZE" -gt "$MAX_SIZE" ]; then
|
|
echo "Error: File $file exceeds size limit ($SIZE > $MAX_SIZE)"
|
|
echo "error=file_too_large" >> $GITHUB_OUTPUT
|
|
exit 1
|
|
fi
|
|
elif [[ "$file" =~ ^en/.*/openapi.*\.json$ ]] && [ -f "$file" ]; then
|
|
SIZE=$(stat -f%z "$file" 2>/dev/null || stat -c%s "$file" 2>/dev/null || echo "0")
|
|
echo "$file|$SIZE|openapi_json" >> /tmp/openapi_analysis.txt
|
|
|
|
# Security check: File size limit for OpenAPI JSON (10MB)
|
|
MAX_SIZE=$((10 * 1024 * 1024))
|
|
if [ "$SIZE" -gt "$MAX_SIZE" ]; then
|
|
echo "Error: OpenAPI file $file exceeds size limit ($SIZE > $MAX_SIZE)"
|
|
echo "error=file_too_large" >> $GITHUB_OUTPUT
|
|
exit 1
|
|
fi
|
|
fi
|
|
done <<< "$CHANGED_FILES"
|
|
|
|
# Check for docs.json changes
|
|
if echo "$CHANGED_FILES" | grep -q '^docs\.json$'; then
|
|
echo "true" > /tmp/docs_json_changed.txt
|
|
|
|
# Use PR analyzer's docs.json analysis
|
|
cd tools/translate
|
|
python3 - <<EOF
|
|
import sys
|
|
sys.path.append('.')
|
|
from pr_analyzer import PRAnalyzer
|
|
|
|
analyzer = PRAnalyzer("$BASE_SHA", "$HEAD_SHA")
|
|
docs_changes = analyzer.analyze_docs_json_changes()
|
|
|
|
structure_changes = {
|
|
"structure_changed": docs_changes["any_docs_json_changes"],
|
|
"navigation_modified": docs_changes["source_section"],
|
|
"languages_affected": analyzer.config["target_languages"] if docs_changes["source_section"] else []
|
|
}
|
|
|
|
import json
|
|
with open("/tmp/structure_changes.json", "w") as f:
|
|
json.dump(structure_changes, f, indent=2)
|
|
EOF
|
|
else
|
|
echo "false" > /tmp/docs_json_changed.txt
|
|
echo '{"structure_changed": false, "navigation_modified": false, "languages_affected": []}' > /tmp/structure_changes.json
|
|
fi
|
|
|
|
echo "has_changes=true" >> $GITHUB_OUTPUT
|
|
echo "Analysis complete"
|
|
|
|
- name: Validate file paths
|
|
if: steps.analyze.outputs.has_changes == 'true'
|
|
run: |
|
|
echo "Validating source language file paths for translation..."
|
|
|
|
# Security: Validate source language files that will be translated
|
|
while IFS='|' read -r file size; do
|
|
if [ -n "$file" ]; then
|
|
# Check for directory traversal attempts
|
|
if echo "$file" | grep -q '\.\./'; then
|
|
echo "Error: Invalid file path detected: $file"
|
|
exit 1
|
|
fi
|
|
|
|
# Check file extension for source language files
|
|
if ! echo "$file" | grep -qE '\.(md|mdx)$'; then
|
|
echo "Error: Invalid file type for translation: $file"
|
|
exit 1
|
|
fi
|
|
|
|
# Check path starts with en/ (only source language files need translation)
|
|
if ! echo "$file" | grep -qE '^en/'; then
|
|
echo "Error: Non-source-language file in translation list: $file"
|
|
exit 1
|
|
fi
|
|
fi
|
|
done < /tmp/file_analysis.txt
|
|
|
|
# Validate OpenAPI JSON files
|
|
if [ -f "/tmp/openapi_analysis.txt" ] && [ -s "/tmp/openapi_analysis.txt" ]; then
|
|
while IFS='|' read -r file size file_type; do
|
|
if [ -n "$file" ]; then
|
|
# Check for directory traversal
|
|
if echo "$file" | grep -q '\.\./'; then
|
|
echo "Error: Invalid file path: $file"
|
|
exit 1
|
|
fi
|
|
|
|
# Check file extension
|
|
if ! echo "$file" | grep -qE '\.json$'; then
|
|
echo "Error: Invalid OpenAPI file type: $file"
|
|
exit 1
|
|
fi
|
|
|
|
# Check path starts with en/
|
|
if ! echo "$file" | grep -qE '^en/'; then
|
|
echo "Error: Non-source-language OpenAPI file in translation list: $file"
|
|
exit 1
|
|
fi
|
|
|
|
# Check pattern match (configurable via openapi*.json)
|
|
if ! echo "$file" | grep -qE 'openapi.*\.json$'; then
|
|
echo "Error: File doesn't match OpenAPI pattern: $file"
|
|
exit 1
|
|
fi
|
|
fi
|
|
done < /tmp/openapi_analysis.txt
|
|
fi
|
|
|
|
echo "All source language file paths validated for translation"
|
|
|
|
- name: Create analysis summary
|
|
if: steps.analyze.outputs.has_changes == 'true'
|
|
run: |
|
|
echo "Creating analysis summary for source language changes..."
|
|
|
|
BASE_SHA="${{ steps.determine-range.outputs.compare_base }}"
|
|
HEAD_SHA="${{ steps.determine-range.outputs.compare_head }}"
|
|
PR_NUMBER=${{ github.event.pull_request.number }}
|
|
IS_INCREMENTAL="${{ steps.determine-range.outputs.is_incremental }}"
|
|
|
|
# Use SyncPlanGenerator for consistent logic across workflows
|
|
cd tools/translate
|
|
python3 - <<EOF
|
|
import json
|
|
import sys
|
|
sys.path.append('.')
|
|
from pr_analyzer import SyncPlanGenerator
|
|
|
|
# Generate sync plan using centralized logic
|
|
generator = SyncPlanGenerator("$BASE_SHA", "$HEAD_SHA")
|
|
sync_plan = generator.generate_sync_plan()
|
|
|
|
# Add PR metadata to sync plan
|
|
sync_plan["metadata"].update({
|
|
"pr_number": $PR_NUMBER,
|
|
"pr_title": "${{ github.event.pull_request.title }}",
|
|
"pr_author": "${{ github.event.pull_request.user.login }}",
|
|
"event_action": "${{ github.event.action }}",
|
|
"is_incremental": "$IS_INCREMENTAL" == "true",
|
|
"file_count": len(sync_plan["files_to_sync"]) + len(sync_plan["openapi_files_to_sync"]),
|
|
"timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
|
|
"repository": "${{ github.repository }}",
|
|
"ref": "${{ github.ref }}",
|
|
"pr_type": "source"
|
|
})
|
|
|
|
# Save analysis.json (for backward compatibility with execute workflow)
|
|
with open("/tmp/analysis.json", "w") as f:
|
|
json.dump(sync_plan["metadata"], f, indent=2)
|
|
|
|
# Save sync plan
|
|
with open("/tmp/sync_plan.json", "w") as f:
|
|
json.dump(sync_plan, f, indent=2)
|
|
|
|
print(f"Source language sync plan created:")
|
|
print(f" - {len(sync_plan['files_to_sync'])} markdown files to translate")
|
|
print(f" - {len(sync_plan['openapi_files_to_sync'])} OpenAPI JSON files to translate")
|
|
if sync_plan['structure_changes'].get('structure_changed'):
|
|
print(" - Documentation structure changes detected")
|
|
EOF
|
|
|
|
- name: Upload analysis artifacts
|
|
if: steps.analyze.outputs.has_changes == 'true'
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: docs-sync-analysis-${{ github.run_id }}
|
|
path: |
|
|
/tmp/analysis.json
|
|
/tmp/changed_files.txt
|
|
/tmp/file_analysis.txt
|
|
/tmp/openapi_analysis.txt
|
|
/tmp/sync_plan.json
|
|
/tmp/docs_json_changed.txt
|
|
/tmp/structure_changes.json
|
|
retention-days: 1
|