mirror of
https://github.com/langgenius/dify-docs.git
synced 2026-03-27 13:28:32 +07:00
test: internal contributor workflow test
- Add test documentation file - Update docs.json navigation - Testing two-workflow pattern for internal PRs
This commit is contained in:
125
.github/workflow-config.yml
vendored
Normal file
125
.github/workflow-config.yml
vendored
Normal file
@@ -0,0 +1,125 @@
|
||||
# GitHub Actions Workflow Configuration
|
||||
# Configuration for documentation synchronization workflows
|
||||
|
||||
# Security settings
|
||||
security:
|
||||
# Require manual approval for external PRs
|
||||
require_approval_for_forks: true
|
||||
|
||||
# Maximum files allowed per PR
|
||||
max_files_per_pr: 50
|
||||
|
||||
# Maximum file size in MB
|
||||
max_file_size_mb: 10
|
||||
|
||||
# Allowed file extensions
|
||||
allowed_extensions:
|
||||
- .md
|
||||
- .mdx
|
||||
- .json
|
||||
|
||||
# Trusted contributors (GitHub usernames)
|
||||
trusted_contributors:
|
||||
- guchenhe@gmail.com
|
||||
# Add more trusted contributors here
|
||||
|
||||
# Rate limiting
|
||||
rate_limits:
|
||||
# Maximum sync operations per hour per PR author
|
||||
max_syncs_per_hour: 5
|
||||
|
||||
# Maximum API calls per sync operation
|
||||
max_api_calls_per_sync: 100
|
||||
|
||||
# Translation settings
|
||||
translation:
|
||||
# Target languages
|
||||
target_languages:
|
||||
- zh-hans
|
||||
- ja-jp
|
||||
|
||||
# Maximum files to translate in a single operation
|
||||
max_files_per_batch: 10
|
||||
|
||||
# Timeout for translation operations (seconds)
|
||||
translation_timeout: 300
|
||||
|
||||
# Branch settings
|
||||
branches:
|
||||
# Branches that trigger automatic sync
|
||||
auto_sync_branches:
|
||||
- main
|
||||
- revamp
|
||||
|
||||
# Branch protection for external PRs
|
||||
require_branch_protection: true
|
||||
|
||||
# Prefix for sync branches
|
||||
sync_branch_prefix: "docs-sync-pr-"
|
||||
|
||||
# Notification settings
|
||||
notifications:
|
||||
# Comment on PRs with sync status
|
||||
comment_on_pr: true
|
||||
|
||||
# Include translation preview links
|
||||
include_preview_links: true
|
||||
|
||||
# Notify on sync failures
|
||||
notify_on_failure: true
|
||||
|
||||
# Artifact settings
|
||||
artifacts:
|
||||
# Retention period for analysis artifacts (days)
|
||||
retention_days: 1
|
||||
|
||||
# Maximum artifact size (MB)
|
||||
max_artifact_size_mb: 50
|
||||
|
||||
# Approval workflow
|
||||
approval:
|
||||
# Required approver associations for external PRs
|
||||
required_approver_associations:
|
||||
- OWNER
|
||||
- MEMBER
|
||||
- COLLABORATOR
|
||||
|
||||
# Require review from code owners
|
||||
require_code_owner_review: false
|
||||
|
||||
# Auto-approve for trusted contributors
|
||||
auto_approve_trusted: true
|
||||
|
||||
# Dry run mode (for testing)
|
||||
dry_run:
|
||||
# Enable dry run mode (no actual changes made)
|
||||
enabled: false
|
||||
|
||||
# Show what would be changed
|
||||
show_diff: true
|
||||
|
||||
# Monitoring and logging
|
||||
monitoring:
|
||||
# Log all operations
|
||||
enable_logging: true
|
||||
|
||||
# Include security events in logs
|
||||
log_security_events: true
|
||||
|
||||
# Monitor API usage
|
||||
monitor_api_usage: true
|
||||
|
||||
# Emergency settings
|
||||
emergency:
|
||||
# Disable all workflows
|
||||
disable_workflows: false
|
||||
|
||||
# Disable external PR processing only
|
||||
disable_external_prs: false
|
||||
|
||||
# Emergency contact (GitHub username)
|
||||
emergency_contact: "guchenhe@gmail.com"
|
||||
|
||||
# Version info
|
||||
version: "1.0.0"
|
||||
updated: "2024-08-22"
|
||||
121
.github/workflows/sync_docs.yml
vendored
121
.github/workflows/sync_docs.yml
vendored
@@ -1,121 +0,0 @@
|
||||
name: Sync Documentation Structure
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- revamp
|
||||
paths:
|
||||
- 'docs.json'
|
||||
- 'en/**/*.md'
|
||||
- 'en/**/*.mdx'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
since_commit:
|
||||
description: 'Git commit to compare against (default: HEAD~1)'
|
||||
required: false
|
||||
default: 'HEAD~1'
|
||||
|
||||
jobs:
|
||||
sync-docs:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # Fetch all history for git diff
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.9'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
cd tools/translate
|
||||
pip install httpx aiofiles python-dotenv
|
||||
|
||||
- name: Check for documentation changes
|
||||
id: check-changes
|
||||
run: |
|
||||
# Determine the commit to compare against
|
||||
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
|
||||
SINCE_COMMIT="${{ github.event.inputs.since_commit }}"
|
||||
else
|
||||
SINCE_COMMIT="HEAD~1"
|
||||
fi
|
||||
|
||||
echo "Checking for changes since: $SINCE_COMMIT"
|
||||
|
||||
# Check if there are any English doc changes
|
||||
if git diff --name-only $SINCE_COMMIT HEAD | grep -E '^(docs\.json|en/.*\.(md|mdx))$'; then
|
||||
echo "has_changes=true" >> $GITHUB_OUTPUT
|
||||
echo "since_commit=$SINCE_COMMIT" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "has_changes=false" >> $GITHUB_OUTPUT
|
||||
echo "No documentation changes detected"
|
||||
fi
|
||||
|
||||
- name: Run documentation synchronization
|
||||
if: steps.check-changes.outputs.has_changes == 'true'
|
||||
env:
|
||||
DIFY_API_KEY: ${{ secrets.DIFY_API_KEY }}
|
||||
run: |
|
||||
cd tools/translate
|
||||
echo "Starting documentation synchronization..."
|
||||
echo "Since commit: ${{ steps.check-changes.outputs.since_commit }}"
|
||||
|
||||
python sync_and_translate.py "$DIFY_API_KEY" "${{ steps.check-changes.outputs.since_commit }}"
|
||||
|
||||
- name: Check for sync results
|
||||
if: steps.check-changes.outputs.has_changes == 'true'
|
||||
id: check-sync-results
|
||||
run: |
|
||||
# Check if there are any changes to commit
|
||||
if [[ -n $(git status --porcelain) ]]; then
|
||||
echo "has_sync_changes=true" >> $GITHUB_OUTPUT
|
||||
echo "Sync created changes to commit"
|
||||
else
|
||||
echo "has_sync_changes=false" >> $GITHUB_OUTPUT
|
||||
echo "No changes from sync"
|
||||
fi
|
||||
|
||||
- name: Commit and push synchronized changes
|
||||
if: steps.check-sync-results.outputs.has_sync_changes == 'true'
|
||||
run: |
|
||||
git config --global user.name 'github-actions[bot]'
|
||||
git config --global user.email 'github-actions[bot]@users.noreply.github.com'
|
||||
|
||||
# Add all changes
|
||||
git add .
|
||||
|
||||
# Create commit message
|
||||
COMMIT_MSG="docs: auto-sync documentation structure and translations
|
||||
|
||||
🤖 Generated with [Claude Code](https://claude.ai/code)
|
||||
|
||||
Co-Authored-By: Claude <noreply@anthropic.com>"
|
||||
|
||||
git commit -m "$COMMIT_MSG"
|
||||
|
||||
# Push to the current branch
|
||||
echo "Pushing to branch: ${{ github.ref_name }}"
|
||||
git push origin HEAD:${{ github.ref_name }}
|
||||
|
||||
echo "✓ Documentation synchronization completed and pushed"
|
||||
|
||||
- name: Summary
|
||||
if: always()
|
||||
run: |
|
||||
if [[ "${{ steps.check-changes.outputs.has_changes }}" == "true" ]]; then
|
||||
if [[ "${{ steps.check-sync-results.outputs.has_sync_changes }}" == "true" ]]; then
|
||||
echo "✅ Documentation synchronization completed successfully"
|
||||
else
|
||||
echo "ℹ️ Documentation synchronization ran but no changes were needed"
|
||||
fi
|
||||
else
|
||||
echo "ℹ️ No documentation changes detected, synchronization skipped"
|
||||
fi
|
||||
289
.github/workflows/sync_docs_analyze.yml
vendored
Normal file
289
.github/workflows/sync_docs_analyze.yml
vendored
Normal file
@@ -0,0 +1,289 @@
|
||||
name: Analyze Documentation Changes
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
paths:
|
||||
- 'docs.json'
|
||||
- 'en/**/*.md'
|
||||
- 'en/**/*.mdx'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout PR
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.9'
|
||||
|
||||
- name: Analyze documentation changes
|
||||
id: analyze
|
||||
run: |
|
||||
echo "Analyzing documentation changes..."
|
||||
|
||||
# Get base and head commits
|
||||
BASE_SHA="${{ github.event.pull_request.base.sha }}"
|
||||
HEAD_SHA="${{ github.event.pull_request.head.sha }}"
|
||||
|
||||
echo "Base SHA: $BASE_SHA"
|
||||
echo "Head SHA: $HEAD_SHA"
|
||||
|
||||
# Detect changed files
|
||||
CHANGED_FILES=$(git diff --name-only $BASE_SHA $HEAD_SHA | grep -E '^(docs\.json|en/.*\.(md|mdx))$' || true)
|
||||
|
||||
if [ -z "$CHANGED_FILES" ]; then
|
||||
echo "No documentation changes detected"
|
||||
echo "has_changes=false" >> $GITHUB_OUTPUT
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "has_changes=true" >> $GITHUB_OUTPUT
|
||||
|
||||
# Count changes for security limits
|
||||
FILE_COUNT=$(echo "$CHANGED_FILES" | wc -l)
|
||||
echo "Changed files count: $FILE_COUNT"
|
||||
|
||||
# Security check: Limit number of files
|
||||
MAX_FILES=50
|
||||
if [ "$FILE_COUNT" -gt "$MAX_FILES" ]; then
|
||||
echo "Error: Too many files changed ($FILE_COUNT > $MAX_FILES)"
|
||||
echo "error=too_many_files" >> $GITHUB_OUTPUT
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create analysis report
|
||||
cat > /tmp/analysis.json <<EOF
|
||||
{
|
||||
"pr_number": ${{ github.event.pull_request.number }},
|
||||
"pr_title": "${{ github.event.pull_request.title }}",
|
||||
"pr_author": "${{ github.event.pull_request.user.login }}",
|
||||
"base_sha": "$BASE_SHA",
|
||||
"head_sha": "$HEAD_SHA",
|
||||
"file_count": $FILE_COUNT,
|
||||
"timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
|
||||
"repository": "${{ github.repository }}",
|
||||
"ref": "${{ github.ref }}"
|
||||
}
|
||||
EOF
|
||||
|
||||
# Save changed files list
|
||||
echo "$CHANGED_FILES" > /tmp/changed_files.txt
|
||||
|
||||
# Analyze file types and sizes
|
||||
> /tmp/file_analysis.txt
|
||||
while IFS= read -r file; do
|
||||
if [ -f "$file" ]; then
|
||||
SIZE=$(stat -f%z "$file" 2>/dev/null || stat -c%s "$file" 2>/dev/null || echo "0")
|
||||
echo "$file|$SIZE" >> /tmp/file_analysis.txt
|
||||
|
||||
# Security check: File size limit (10MB)
|
||||
MAX_SIZE=$((10 * 1024 * 1024))
|
||||
if [ "$SIZE" -gt "$MAX_SIZE" ]; then
|
||||
echo "Error: File $file exceeds size limit ($SIZE > $MAX_SIZE)"
|
||||
echo "error=file_too_large" >> $GITHUB_OUTPUT
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
done <<< "$CHANGED_FILES"
|
||||
|
||||
echo "Analysis complete"
|
||||
|
||||
- name: Check for docs.json structure changes
|
||||
if: steps.analyze.outputs.has_changes == 'true'
|
||||
run: |
|
||||
# Check if docs.json was modified
|
||||
if git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} | grep -q '^docs\.json$'; then
|
||||
echo "docs.json structure changes detected"
|
||||
echo "true" > /tmp/docs_json_changed.txt
|
||||
|
||||
# Extract English documentation structure changes
|
||||
python3 - <<'EOF'
|
||||
import json
|
||||
import subprocess
|
||||
|
||||
def get_docs_structure(sha):
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "show", f"{sha}:docs.json"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True
|
||||
)
|
||||
return json.loads(result.stdout)
|
||||
except:
|
||||
return None
|
||||
|
||||
base_sha = "${{ github.event.pull_request.base.sha }}"
|
||||
head_sha = "${{ github.event.pull_request.head.sha }}"
|
||||
|
||||
base_docs = get_docs_structure(base_sha)
|
||||
head_docs = get_docs_structure(head_sha)
|
||||
|
||||
changes = {
|
||||
"structure_changed": base_docs != head_docs if base_docs and head_docs else False,
|
||||
"navigation_modified": False,
|
||||
"languages_affected": []
|
||||
}
|
||||
|
||||
if base_docs and head_docs:
|
||||
# Check navigation changes
|
||||
base_nav = base_docs.get("navigation", {})
|
||||
head_nav = head_docs.get("navigation", {})
|
||||
|
||||
if base_nav != head_nav:
|
||||
changes["navigation_modified"] = True
|
||||
|
||||
# Identify affected languages
|
||||
for lang_data in head_nav.get("languages", []):
|
||||
if lang_data.get("language") == "en":
|
||||
changes["languages_affected"] = ["zh-Hans", "jp"]
|
||||
break
|
||||
|
||||
with open("/tmp/structure_changes.json", "w") as f:
|
||||
json.dump(changes, f, indent=2)
|
||||
EOF
|
||||
else
|
||||
echo "No docs.json changes"
|
||||
echo "false" > /tmp/docs_json_changed.txt
|
||||
fi
|
||||
|
||||
- name: Validate file paths
|
||||
if: steps.analyze.outputs.has_changes == 'true'
|
||||
run: |
|
||||
# Security: Validate all file paths
|
||||
while IFS= read -r file; do
|
||||
# Check for directory traversal attempts
|
||||
if echo "$file" | grep -q '\.\./'; then
|
||||
echo "Error: Invalid file path detected: $file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check file extension
|
||||
if ! echo "$file" | grep -qE '\.(md|mdx|json)$'; then
|
||||
echo "Error: Invalid file type: $file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check path starts with allowed directories
|
||||
if ! echo "$file" | grep -qE '^(en/|docs\.json$)'; then
|
||||
echo "Error: File outside allowed directories: $file"
|
||||
exit 1
|
||||
fi
|
||||
done < /tmp/changed_files.txt
|
||||
|
||||
echo "All file paths validated"
|
||||
|
||||
- name: Create analysis summary
|
||||
if: steps.analyze.outputs.has_changes == 'true'
|
||||
run: |
|
||||
# Create a comprehensive analysis summary
|
||||
python3 - <<'EOF'
|
||||
import json
|
||||
import os
|
||||
|
||||
# Load analysis data
|
||||
with open("/tmp/analysis.json") as f:
|
||||
analysis = json.load(f)
|
||||
|
||||
# Load file analysis
|
||||
files_to_sync = []
|
||||
with open("/tmp/file_analysis.txt") as f:
|
||||
for line in f:
|
||||
if line.strip():
|
||||
file_path, size = line.strip().split("|")
|
||||
files_to_sync.append({
|
||||
"path": file_path,
|
||||
"size": int(size),
|
||||
"type": "mdx" if file_path.endswith(".mdx") else "md" if file_path.endswith(".md") else "json"
|
||||
})
|
||||
|
||||
# Load structure changes if exists
|
||||
structure_changes = {}
|
||||
if os.path.exists("/tmp/structure_changes.json"):
|
||||
with open("/tmp/structure_changes.json") as f:
|
||||
structure_changes = json.load(f)
|
||||
|
||||
# Create sync plan
|
||||
sync_plan = {
|
||||
"metadata": analysis,
|
||||
"files_to_sync": files_to_sync,
|
||||
"structure_changes": structure_changes,
|
||||
"target_languages": ["zh-hans", "ja-jp"],
|
||||
"sync_required": len(files_to_sync) > 0 or structure_changes.get("structure_changed", False)
|
||||
}
|
||||
|
||||
# Save sync plan
|
||||
with open("/tmp/sync_plan.json", "w") as f:
|
||||
json.dump(sync_plan, f, indent=2)
|
||||
|
||||
print(f"Sync plan created: {len(files_to_sync)} files to sync")
|
||||
if structure_changes.get("structure_changed"):
|
||||
print("Documentation structure changes detected")
|
||||
EOF
|
||||
|
||||
- name: Upload analysis artifacts
|
||||
if: steps.analyze.outputs.has_changes == 'true'
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: docs-sync-analysis-${{ github.event.pull_request.number }}
|
||||
path: |
|
||||
/tmp/analysis.json
|
||||
/tmp/changed_files.txt
|
||||
/tmp/file_analysis.txt
|
||||
/tmp/sync_plan.json
|
||||
/tmp/docs_json_changed.txt
|
||||
/tmp/structure_changes.json
|
||||
retention-days: 1
|
||||
|
||||
- name: Comment on PR with analysis
|
||||
if: steps.analyze.outputs.has_changes == 'true'
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const fs = require('fs');
|
||||
const syncPlan = JSON.parse(fs.readFileSync('/tmp/sync_plan.json', 'utf8'));
|
||||
|
||||
const fileCount = syncPlan.files_to_sync.length;
|
||||
const structureChanged = syncPlan.structure_changes.structure_changed || false;
|
||||
|
||||
let comment = '## 📋 Documentation Sync Analysis\n\n';
|
||||
comment += `Found **${fileCount}** documentation file(s) that need synchronization.\n\n`;
|
||||
|
||||
if (fileCount > 0) {
|
||||
comment += '### Files to Sync:\n';
|
||||
syncPlan.files_to_sync.forEach(file => {
|
||||
const sizeKB = (file.size / 1024).toFixed(2);
|
||||
comment += `- \`${file.path}\` (${sizeKB} KB)\n`;
|
||||
});
|
||||
comment += '\n';
|
||||
}
|
||||
|
||||
if (structureChanged) {
|
||||
comment += '### Structure Changes:\n';
|
||||
comment += '- Documentation navigation structure will be updated\n';
|
||||
comment += '- Target languages: Chinese (zh-hans), Japanese (ja-jp)\n\n';
|
||||
}
|
||||
|
||||
comment += '### Next Steps:\n';
|
||||
comment += '1. A maintainer will review and approve the synchronization\n';
|
||||
comment += '2. Once approved, translations will be generated automatically\n';
|
||||
comment += '3. Synchronized files will be added to a new branch for review\n\n';
|
||||
|
||||
comment += '_This analysis was performed automatically. No code from your PR was executed._';
|
||||
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
body: comment
|
||||
});
|
||||
434
.github/workflows/sync_docs_execute.yml
vendored
Normal file
434
.github/workflows/sync_docs_execute.yml
vendored
Normal file
@@ -0,0 +1,434 @@
|
||||
name: Execute Documentation Sync
|
||||
|
||||
on:
|
||||
workflow_run:
|
||||
workflows: ["Analyze Documentation Changes"]
|
||||
types:
|
||||
- completed
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
actions: read
|
||||
|
||||
jobs:
|
||||
execute-sync:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event.workflow_run.conclusion == 'success'
|
||||
steps:
|
||||
- name: Check workflow source
|
||||
id: check-source
|
||||
run: |
|
||||
echo "Checking workflow source..."
|
||||
echo "Event: ${{ github.event.workflow_run.event }}"
|
||||
echo "Repository: ${{ github.event.workflow_run.repository.full_name }}"
|
||||
echo "Head Repository: ${{ github.event.workflow_run.head_repository.full_name }}"
|
||||
echo "Head Branch: ${{ github.event.workflow_run.head_branch }}"
|
||||
|
||||
# Security check: Only process PRs from the same repository or trusted forks
|
||||
if [[ "${{ github.event.workflow_run.event }}" != "pull_request" ]]; then
|
||||
echo "Not a pull request event, skipping"
|
||||
echo "should_process=false" >> $GITHUB_OUTPUT
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Check if this is from a fork
|
||||
IS_FORK="false"
|
||||
if [[ "${{ github.event.workflow_run.repository.full_name }}" != "${{ github.event.workflow_run.head_repository.full_name }}" ]]; then
|
||||
IS_FORK="true"
|
||||
fi
|
||||
|
||||
echo "is_fork=$IS_FORK" >> $GITHUB_OUTPUT
|
||||
echo "should_process=true" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Download analysis artifacts
|
||||
if: steps.check-source.outputs.should_process == 'true'
|
||||
uses: actions/github-script@v7
|
||||
id: download-artifacts
|
||||
with:
|
||||
script: |
|
||||
const artifacts = await github.rest.actions.listWorkflowRunArtifacts({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
run_id: ${{ github.event.workflow_run.id }}
|
||||
});
|
||||
|
||||
const matchArtifact = artifacts.data.artifacts.find(artifact => {
|
||||
return artifact.name.startsWith('docs-sync-analysis-');
|
||||
});
|
||||
|
||||
if (!matchArtifact) {
|
||||
console.log('No analysis artifacts found');
|
||||
return false;
|
||||
}
|
||||
|
||||
const download = await github.rest.actions.downloadArtifact({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
artifact_id: matchArtifact.id,
|
||||
archive_format: 'zip'
|
||||
});
|
||||
|
||||
const fs = require('fs');
|
||||
fs.writeFileSync('/tmp/artifacts.zip', Buffer.from(download.data));
|
||||
|
||||
// Extract PR number from artifact name
|
||||
const prNumber = matchArtifact.name.split('-').pop();
|
||||
core.setOutput('pr_number', prNumber);
|
||||
core.setOutput('artifact_found', 'true');
|
||||
|
||||
return true;
|
||||
|
||||
- name: Extract and validate artifacts
|
||||
if: steps.download-artifacts.outputs.artifact_found == 'true'
|
||||
id: extract-artifacts
|
||||
run: |
|
||||
echo "Extracting artifacts..."
|
||||
|
||||
# Create secure temporary directory
|
||||
WORK_DIR=$(mktemp -d /tmp/sync-XXXXXX)
|
||||
echo "work_dir=$WORK_DIR" >> $GITHUB_OUTPUT
|
||||
|
||||
# Extract to temporary directory
|
||||
cd "$WORK_DIR"
|
||||
unzip /tmp/artifacts.zip
|
||||
|
||||
# Validate extracted files
|
||||
REQUIRED_FILES="analysis.json sync_plan.json changed_files.txt"
|
||||
for file in $REQUIRED_FILES; do
|
||||
if [ ! -f "$file" ]; then
|
||||
echo "Error: Required file $file not found"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# Validate JSON structure
|
||||
python3 -c "
|
||||
import json
|
||||
import sys
|
||||
|
||||
try:
|
||||
with open('analysis.json') as f:
|
||||
analysis = json.load(f)
|
||||
with open('sync_plan.json') as f:
|
||||
sync_plan = json.load(f)
|
||||
|
||||
# Validate required fields
|
||||
assert 'pr_number' in analysis
|
||||
assert 'files_to_sync' in sync_plan
|
||||
assert 'target_languages' in sync_plan
|
||||
|
||||
print('Artifacts validated successfully')
|
||||
except Exception as e:
|
||||
print(f'Validation error: {e}')
|
||||
sys.exit(1)
|
||||
"
|
||||
|
||||
# Extract PR number and other metadata
|
||||
PR_NUMBER=$(python3 -c "import json; print(json.load(open('analysis.json'))['pr_number'])")
|
||||
echo "pr_number=$PR_NUMBER" >> $GITHUB_OUTPUT
|
||||
|
||||
# Check if sync is required
|
||||
SYNC_REQUIRED=$(python3 -c "import json; print(str(json.load(open('sync_plan.json'))['sync_required']).lower())")
|
||||
echo "sync_required=$SYNC_REQUIRED" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Checkout base repository
|
||||
if: steps.extract-artifacts.outputs.sync_required == 'true'
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Python
|
||||
if: steps.extract-artifacts.outputs.sync_required == 'true'
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.9'
|
||||
|
||||
- name: Install dependencies
|
||||
if: steps.extract-artifacts.outputs.sync_required == 'true'
|
||||
run: |
|
||||
cd tools/translate
|
||||
pip install httpx aiofiles python-dotenv
|
||||
|
||||
- name: Check for manual approval requirement
|
||||
if: steps.extract-artifacts.outputs.sync_required == 'true' && steps.check-source.outputs.is_fork == 'true'
|
||||
id: check-approval
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const prNumber = ${{ steps.extract-artifacts.outputs.pr_number }};
|
||||
|
||||
// Get PR details
|
||||
const pr = await github.rest.pulls.get({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
pull_number: prNumber
|
||||
});
|
||||
|
||||
const author = pr.data.user.login;
|
||||
const authorAssociation = pr.data.author_association;
|
||||
|
||||
// Check if author is trusted
|
||||
const trustedAssociations = ['OWNER', 'MEMBER', 'COLLABORATOR'];
|
||||
const trustedContributors = process.env.TRUSTED_CONTRIBUTORS?.split(',') || [];
|
||||
|
||||
const isTrusted = trustedAssociations.includes(authorAssociation) ||
|
||||
trustedContributors.includes(author);
|
||||
|
||||
if (!isTrusted) {
|
||||
// Check for approval from maintainer
|
||||
const reviews = await github.rest.pulls.listReviews({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
pull_number: prNumber
|
||||
});
|
||||
|
||||
const hasApproval = reviews.data.some(review =>
|
||||
review.state === 'APPROVED' &&
|
||||
trustedAssociations.includes(review.author_association)
|
||||
);
|
||||
|
||||
if (!hasApproval) {
|
||||
console.log('PR requires manual approval from a maintainer');
|
||||
core.setOutput('needs_approval', 'true');
|
||||
|
||||
// Comment on PR
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: prNumber,
|
||||
body: '⏸️ **Documentation sync is pending approval**\n\n' +
|
||||
'This PR requires approval from a maintainer before automatic synchronization can proceed.\n\n' +
|
||||
'Once approved, the documentation will be automatically translated and synchronized.'
|
||||
});
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
core.setOutput('needs_approval', 'false');
|
||||
|
||||
- name: Execute safe synchronization
|
||||
if: steps.extract-artifacts.outputs.sync_required == 'true' && steps.check-approval.outputs.needs_approval != 'true'
|
||||
id: sync
|
||||
env:
|
||||
DIFY_API_KEY: ${{ secrets.DIFY_API_KEY }}
|
||||
run: |
|
||||
echo "Executing documentation synchronization..."
|
||||
|
||||
WORK_DIR="${{ steps.extract-artifacts.outputs.work_dir }}"
|
||||
PR_NUMBER="${{ steps.extract-artifacts.outputs.pr_number }}"
|
||||
|
||||
# Create a new branch for the sync results
|
||||
SYNC_BRANCH="docs-sync-pr-${PR_NUMBER}"
|
||||
git checkout -b "$SYNC_BRANCH"
|
||||
|
||||
# Run synchronization with security constraints
|
||||
cd tools/translate
|
||||
|
||||
# Create a secure sync script
|
||||
cat > secure_sync.py <<'EOF'
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.append(os.path.dirname(__file__))
|
||||
from sync_and_translate import DocsSynchronizer
|
||||
|
||||
async def secure_sync():
|
||||
work_dir = sys.argv[1]
|
||||
|
||||
# Load sync plan
|
||||
with open(f"{work_dir}/sync_plan.json") as f:
|
||||
sync_plan = json.load(f)
|
||||
|
||||
# Security: Only sync files from the approved list
|
||||
files_to_sync = sync_plan.get("files_to_sync", [])
|
||||
|
||||
# Validate file paths again
|
||||
for file_info in files_to_sync:
|
||||
file_path = file_info["path"]
|
||||
|
||||
# Security checks
|
||||
if ".." in file_path or file_path.startswith("/"):
|
||||
print(f"Security error: Invalid path {file_path}")
|
||||
return False
|
||||
|
||||
if not file_path.startswith("en/"):
|
||||
print(f"Security error: File outside en/ directory: {file_path}")
|
||||
return False
|
||||
|
||||
# Initialize synchronizer
|
||||
api_key = os.environ.get("DIFY_API_KEY")
|
||||
if not api_key:
|
||||
print("Error: DIFY_API_KEY not set")
|
||||
return False
|
||||
|
||||
synchronizer = DocsSynchronizer(api_key)
|
||||
|
||||
# Perform limited sync
|
||||
results = {
|
||||
"translated": [],
|
||||
"failed": [],
|
||||
"skipped": []
|
||||
}
|
||||
|
||||
for file_info in files_to_sync[:10]: # Limit to 10 files
|
||||
file_path = file_info["path"]
|
||||
print(f"Processing: {file_path}")
|
||||
|
||||
try:
|
||||
# Only translate if file exists and is safe
|
||||
if os.path.exists(f"../../{file_path}"):
|
||||
for target_lang in ["zh-hans", "ja-jp"]:
|
||||
target_path = file_path.replace("en/", f"{target_lang}/")
|
||||
success = await synchronizer.translate_file_with_notice(
|
||||
file_path,
|
||||
target_path,
|
||||
target_lang
|
||||
)
|
||||
if success:
|
||||
results["translated"].append(target_path)
|
||||
else:
|
||||
results["failed"].append(target_path)
|
||||
else:
|
||||
results["skipped"].append(file_path)
|
||||
except Exception as e:
|
||||
print(f"Error processing {file_path}: {e}")
|
||||
results["failed"].append(file_path)
|
||||
|
||||
# Handle docs.json structure sync if needed
|
||||
if sync_plan.get("structure_changes", {}).get("structure_changed"):
|
||||
print("Syncing docs.json structure...")
|
||||
try:
|
||||
sync_log = synchronizer.sync_docs_json_structure()
|
||||
print("\n".join(sync_log))
|
||||
except Exception as e:
|
||||
print(f"Error syncing structure: {e}")
|
||||
|
||||
# Save results
|
||||
with open("/tmp/sync_results.json", "w") as f:
|
||||
json.dump(results, f, indent=2)
|
||||
|
||||
return len(results["failed"]) == 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = asyncio.run(secure_sync())
|
||||
sys.exit(0 if success else 1)
|
||||
EOF
|
||||
|
||||
# Run the secure sync
|
||||
python secure_sync.py "$WORK_DIR"
|
||||
SYNC_EXIT_CODE=$?
|
||||
|
||||
echo "sync_exit_code=$SYNC_EXIT_CODE" >> $GITHUB_OUTPUT
|
||||
|
||||
# Check for changes
|
||||
if [[ -n $(git status --porcelain) ]]; then
|
||||
echo "has_changes=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "has_changes=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Commit sync results
|
||||
if: steps.sync.outputs.has_changes == 'true'
|
||||
id: commit
|
||||
run: |
|
||||
PR_NUMBER="${{ steps.extract-artifacts.outputs.pr_number }}"
|
||||
SYNC_BRANCH="docs-sync-pr-${PR_NUMBER}"
|
||||
|
||||
git config user.name 'github-actions[bot]'
|
||||
git config user.email 'github-actions[bot]@users.noreply.github.com'
|
||||
|
||||
git add .
|
||||
git commit -m "docs: sync translations for PR #${PR_NUMBER}
|
||||
|
||||
Auto-generated translations for documentation changes.
|
||||
Review these changes carefully before merging.
|
||||
|
||||
🤖 Generated with GitHub Actions"
|
||||
|
||||
# Push the branch
|
||||
git push origin "$SYNC_BRANCH" --force
|
||||
|
||||
echo "branch_name=$SYNC_BRANCH" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Comment on PR with results
|
||||
if: steps.extract-artifacts.outputs.sync_required == 'true' && steps.check-approval.outputs.needs_approval != 'true'
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const fs = require('fs');
|
||||
const prNumber = ${{ steps.extract-artifacts.outputs.pr_number }};
|
||||
const hasChanges = '${{ steps.sync.outputs.has_changes }}' === 'true';
|
||||
const branchName = '${{ steps.commit.outputs.branch_name }}';
|
||||
|
||||
let comment = '## ✅ Documentation Synchronization Complete\n\n';
|
||||
|
||||
if (hasChanges) {
|
||||
// Load sync results if available
|
||||
let results = { translated: [], failed: [], skipped: [] };
|
||||
try {
|
||||
results = JSON.parse(fs.readFileSync('/tmp/sync_results.json', 'utf8'));
|
||||
} catch (e) {
|
||||
console.log('Could not load sync results');
|
||||
}
|
||||
|
||||
comment += `Translations have been generated and pushed to branch: \`${branchName}\`\n\n`;
|
||||
|
||||
if (results.translated.length > 0) {
|
||||
comment += `### ✅ Successfully Translated (${results.translated.length}):\n`;
|
||||
results.translated.slice(0, 10).forEach(file => {
|
||||
comment += `- \`${file}\`\n`;
|
||||
});
|
||||
if (results.translated.length > 10) {
|
||||
comment += `- ... and ${results.translated.length - 10} more\n`;
|
||||
}
|
||||
comment += '\n';
|
||||
}
|
||||
|
||||
if (results.failed.length > 0) {
|
||||
comment += `### ⚠️ Failed Translations (${results.failed.length}):\n`;
|
||||
results.failed.forEach(file => {
|
||||
comment += `- \`${file}\`\n`;
|
||||
});
|
||||
comment += '\n';
|
||||
}
|
||||
|
||||
comment += '### Next Steps:\n';
|
||||
comment += '1. Review the generated translations in the sync branch\n';
|
||||
comment += '2. Make any necessary adjustments\n';
|
||||
comment += '3. Merge the sync branch into your PR branch if satisfied\n\n';
|
||||
|
||||
comment += `[View changes](https://github.com/${{ github.repository }}/compare/${{ github.event.workflow_run.head_branch }}...${branchName})`;
|
||||
} else {
|
||||
comment += 'No changes were needed. All documentation is already in sync.';
|
||||
}
|
||||
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: prNumber,
|
||||
body: comment
|
||||
});
|
||||
|
||||
handle-failure:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event.workflow_run.conclusion == 'failure'
|
||||
steps:
|
||||
- name: Report analysis failure
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
// Try to extract PR number from workflow run
|
||||
const workflowRun = context.payload.workflow_run;
|
||||
|
||||
console.log('Analysis workflow failed');
|
||||
console.log('Attempting to notify PR if possible...');
|
||||
|
||||
// This is a best-effort attempt to notify
|
||||
// In practice, you might want to store PR number differently
|
||||
@@ -31,7 +31,8 @@
|
||||
"en/documentation/pages/getting-started/introduction",
|
||||
"en/documentation/pages/getting-started/quick-start",
|
||||
"en/documentation/pages/getting-started/key-concepts",
|
||||
"en/documentation/pages/getting-started/faq"
|
||||
"en/documentation/pages/getting-started/faq",
|
||||
"en/documentation/pages/getting-started/test-internal"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
281
docs/SECURE_WORKFLOW_GUIDE.md
Normal file
281
docs/SECURE_WORKFLOW_GUIDE.md
Normal file
@@ -0,0 +1,281 @@
|
||||
# Secure Documentation Workflow Guide
|
||||
|
||||
This guide explains how the secure two-workflow pattern works for handling documentation synchronization from external PRs (forked repositories).
|
||||
|
||||
## Overview
|
||||
|
||||
The secure workflow system uses a **two-workflow pattern** to safely handle documentation changes from external contributors while maintaining security:
|
||||
|
||||
1. **Analysis Workflow** (`sync_docs_analyze.yml`) - Analyzes changes in an unprivileged environment
|
||||
2. **Execution Workflow** (`sync_docs_execute.yml`) - Executes translations with full permissions after validation
|
||||
|
||||
## Security Architecture
|
||||
|
||||
### Two-Workflow Pattern
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
A[External PR] --> B[Analysis Workflow]
|
||||
B --> C[Create Analysis Artifacts]
|
||||
C --> D[Execution Workflow]
|
||||
D --> E{Manual Approval Required?}
|
||||
E -->|Yes| F[Wait for Approval]
|
||||
E -->|No| G[Execute Sync]
|
||||
F --> G
|
||||
G --> H[Comment on PR with Results]
|
||||
```
|
||||
|
||||
### Security Principles
|
||||
|
||||
1. **Isolation**: Untrusted code runs in `pull_request` context (no secrets)
|
||||
2. **Validation**: All inputs are validated before processing
|
||||
3. **Least Privilege**: Each workflow has minimal required permissions
|
||||
4. **Manual Approval**: External PRs require maintainer approval
|
||||
5. **Rate Limiting**: API calls and file operations are limited
|
||||
|
||||
## Workflow Details
|
||||
|
||||
### 1. Analysis Workflow (`sync_docs_analyze.yml`)
|
||||
|
||||
**Trigger**: `pull_request` events for `docs.json` and `en/**/*.{md,mdx}` files
|
||||
|
||||
**Permissions**: `contents: read`, `pull-requests: read`
|
||||
|
||||
**Security Features**:
|
||||
- No access to secrets or API keys
|
||||
- Validates file paths for directory traversal
|
||||
- Limits file count and size
|
||||
- Creates artifacts with analysis results
|
||||
- Comments on PR with preview
|
||||
|
||||
**Process**:
|
||||
1. Checkout PR code (safe - no secrets available)
|
||||
2. Analyze changed files
|
||||
3. Validate file paths and extensions
|
||||
4. Create sync plan
|
||||
5. Upload artifacts
|
||||
6. Comment on PR with analysis
|
||||
|
||||
### 2. Execution Workflow (`sync_docs_execute.yml`)
|
||||
|
||||
**Trigger**: `workflow_run` completion of analysis workflow
|
||||
|
||||
**Permissions**: `contents: write`, `pull-requests: write`, `actions: read`
|
||||
|
||||
**Security Features**:
|
||||
- Downloads and validates artifacts
|
||||
- Checks contributor trust level
|
||||
- Requires manual approval for external PRs
|
||||
- Limits translation operations
|
||||
- Creates isolated branch for results
|
||||
|
||||
**Process**:
|
||||
1. Download analysis artifacts
|
||||
2. Validate artifact integrity
|
||||
3. Check approval requirements
|
||||
4. Execute secure synchronization
|
||||
5. Create sync branch with results
|
||||
6. Comment on PR with links
|
||||
|
||||
## Security Features
|
||||
|
||||
### Input Validation
|
||||
|
||||
All file paths are validated against:
|
||||
- Directory traversal patterns (`../`, absolute paths)
|
||||
- Allowed file extensions (`.md`, `.mdx`, `.json`)
|
||||
- Allowed directories (`en/`, `zh-hans/`, `ja-jp/`)
|
||||
- File size limits (10MB per file)
|
||||
- File count limits (50 files per PR)
|
||||
|
||||
### Contributor Trust Levels
|
||||
|
||||
1. **Trusted**: OWNER, MEMBER, COLLABORATOR - Auto-approved
|
||||
2. **Listed**: Users in `TRUSTED_CONTRIBUTORS` - Auto-approved
|
||||
3. **External**: Fork contributors - Requires manual approval
|
||||
|
||||
### Rate Limiting
|
||||
|
||||
- Maximum 10 files translated per operation
|
||||
- API call limits enforced
|
||||
- Artifact size limits (50MB)
|
||||
- Processing timeouts (5 minutes)
|
||||
|
||||
## Configuration
|
||||
|
||||
### Environment Variables
|
||||
|
||||
```yaml
|
||||
DIFY_API_KEY: ${{ secrets.DIFY_API_KEY }} # Translation API key
|
||||
TRUSTED_CONTRIBUTORS: "user1,user2,user3" # Comma-separated trusted users
|
||||
```
|
||||
|
||||
### Workflow Configuration
|
||||
|
||||
Edit `.github/workflow-config.yml` to customize:
|
||||
|
||||
```yaml
|
||||
security:
|
||||
require_approval_for_forks: true
|
||||
max_files_per_pr: 50
|
||||
max_file_size_mb: 10
|
||||
trusted_contributors:
|
||||
- your-trusted-user
|
||||
|
||||
translation:
|
||||
max_files_per_batch: 10
|
||||
translation_timeout: 300
|
||||
```
|
||||
|
||||
## Usage for Maintainers
|
||||
|
||||
### Approving External PRs
|
||||
|
||||
1. External contributor creates PR
|
||||
2. Analysis workflow runs automatically
|
||||
3. PR gets comment with analysis results
|
||||
4. **Maintainer reviews the analysis**
|
||||
5. **Maintainer approves the PR** (GitHub review system)
|
||||
6. Execution workflow runs automatically
|
||||
7. Results are posted to sync branch
|
||||
|
||||
### Manual Workflow Dispatch
|
||||
|
||||
For internal changes, you can trigger manually:
|
||||
|
||||
```bash
|
||||
# Via GitHub UI: Actions > Sync Documentation Structure > Run workflow
|
||||
# Or via CLI:
|
||||
gh workflow run sync_docs.yml -f since_commit=HEAD~5
|
||||
```
|
||||
|
||||
### Emergency Controls
|
||||
|
||||
In `.github/workflow-config.yml`:
|
||||
|
||||
```yaml
|
||||
emergency:
|
||||
disable_workflows: true # Disable all workflows
|
||||
disable_external_prs: true # Disable only external PR processing
|
||||
```
|
||||
|
||||
## Development and Testing
|
||||
|
||||
### Local Testing
|
||||
|
||||
Test security features locally:
|
||||
|
||||
```bash
|
||||
cd tools/translate
|
||||
python test_security.py
|
||||
```
|
||||
|
||||
### Validation Tools
|
||||
|
||||
- `security_validator.py` - Input validation and sanitization
|
||||
- `test_security.py` - Security test suite
|
||||
- `sync_and_translate.py` - Enhanced with security checks
|
||||
|
||||
### Adding New Security Rules
|
||||
|
||||
1. Update `security_validator.py` with new validation rules
|
||||
2. Add test cases to `test_security.py`
|
||||
3. Update workflow configuration if needed
|
||||
4. Test locally before deploying
|
||||
|
||||
## Monitoring and Alerts
|
||||
|
||||
### What to Monitor
|
||||
|
||||
- Failed approvals or validations
|
||||
- Unusual file patterns or sizes
|
||||
- API rate limit hits
|
||||
- Security validation failures
|
||||
|
||||
### Log Analysis
|
||||
|
||||
Check GitHub Actions logs for:
|
||||
- `Security error:` messages
|
||||
- `Validation error:` messages
|
||||
- Failed artifact downloads
|
||||
- Approval requirement triggers
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
1. **"Needs Approval" Status**
|
||||
- External PRs require maintainer approval
|
||||
- Add contributor to trusted list or approve PR
|
||||
|
||||
2. **"Security Validation Failed"**
|
||||
- Check file paths for dangerous patterns
|
||||
- Verify file extensions are allowed
|
||||
- Check file size limits
|
||||
|
||||
3. **"Artifact Not Found"**
|
||||
- Analysis workflow may have failed
|
||||
- Check analysis workflow logs
|
||||
- Re-run analysis if needed
|
||||
|
||||
4. **Translation Failures**
|
||||
- Check DIFY_API_KEY configuration
|
||||
- Verify API rate limits
|
||||
- Check file content for issues
|
||||
|
||||
### Getting Help
|
||||
|
||||
- Check workflow logs in GitHub Actions
|
||||
- Review security test results locally
|
||||
- Contact repository maintainers
|
||||
- Open GitHub issue with details
|
||||
|
||||
## Best Practices
|
||||
|
||||
### For Contributors
|
||||
|
||||
1. **Keep PRs focused** - Limit to necessary documentation changes
|
||||
2. **Use standard paths** - Follow existing directory structure
|
||||
3. **Test locally** - Verify markdown renders correctly
|
||||
4. **Be patient** - External PRs require approval
|
||||
|
||||
### For Maintainers
|
||||
|
||||
1. **Review analysis carefully** - Check file changes before approval
|
||||
2. **Monitor for abuse** - Watch for suspicious patterns
|
||||
3. **Keep trusted list updated** - Add regular contributors
|
||||
4. **Test configuration changes** - Validate workflow updates
|
||||
|
||||
### Security Checklist
|
||||
|
||||
- [ ] Workflows use minimal required permissions
|
||||
- [ ] External PRs require approval
|
||||
- [ ] File validation is comprehensive
|
||||
- [ ] API keys are properly secured
|
||||
- [ ] Rate limits are enforced
|
||||
- [ ] Artifacts are validated
|
||||
- [ ] Emergency controls are in place
|
||||
|
||||
## Updates and Maintenance
|
||||
|
||||
### Regular Tasks
|
||||
|
||||
- Review and update trusted contributors list
|
||||
- Monitor security logs for patterns
|
||||
- Update validation rules as needed
|
||||
- Test workflows after GitHub Actions updates
|
||||
- Review and rotate API keys
|
||||
|
||||
### Version Updates
|
||||
|
||||
When updating the workflow:
|
||||
|
||||
1. Test changes in a fork first
|
||||
2. Update version in `workflow-config.yml`
|
||||
3. Update documentation
|
||||
4. Notify team of changes
|
||||
5. Monitor first few PRs carefully
|
||||
|
||||
---
|
||||
|
||||
For questions or issues, contact the repository maintainers or open a GitHub issue.
|
||||
55
en/documentation/pages/getting-started/test-internal.mdx
Normal file
55
en/documentation/pages/getting-started/test-internal.mdx
Normal file
@@ -0,0 +1,55 @@
|
||||
---
|
||||
title: "Test Internal Workflow"
|
||||
description: "Testing documentation sync for internal contributors"
|
||||
icon: "flask"
|
||||
---
|
||||
|
||||
This is a test document to verify the internal contributor workflow for automatic documentation synchronization.
|
||||
|
||||
## Testing Features
|
||||
|
||||
<AccordionGroup>
|
||||
<Accordion title="Workflow Testing">
|
||||
This document tests the two-workflow pattern:
|
||||
- Analysis workflow (read-only)
|
||||
- Execution workflow (with permissions)
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="Auto-Approval">
|
||||
Internal contributors should be auto-approved since they're in the trusted list.
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="Translation Generation">
|
||||
This content should be automatically translated to:
|
||||
- Chinese (zh-hans)
|
||||
- Japanese (ja-jp)
|
||||
</Accordion>
|
||||
</AccordionGroup>
|
||||
|
||||
## Expected Results
|
||||
|
||||
<CardGroup cols={2}>
|
||||
<Card title="Sync Branch" icon="code-branch">
|
||||
A new branch `docs-sync-pr-XX` should be created
|
||||
</Card>
|
||||
<Card title="PR Comment" icon="comment">
|
||||
Automated comment with translation results
|
||||
</Card>
|
||||
</CardGroup>
|
||||
|
||||
<Note>
|
||||
This is a test file created on {{ new Date().toISOString() }}
|
||||
</Note>
|
||||
|
||||
## Code Example
|
||||
|
||||
```python
|
||||
def test_workflow():
|
||||
"""Test the documentation sync workflow"""
|
||||
return "Testing internal contributor flow"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
Test conducted by: Internal contributor
|
||||
Branch: test/internal-docs-sync
|
||||
384
tools/translate/security_validator.py
Normal file
384
tools/translate/security_validator.py
Normal file
@@ -0,0 +1,384 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Security validation utilities for documentation synchronization.
|
||||
Provides input validation, path sanitization, and security checks.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any, Tuple
|
||||
import hashlib
|
||||
import hmac
|
||||
|
||||
class SecurityValidator:
|
||||
"""Validates and sanitizes inputs for documentation synchronization"""
|
||||
|
||||
# Security constants
|
||||
MAX_FILE_SIZE_MB = 10
|
||||
MAX_FILES_PER_SYNC = 50
|
||||
MAX_PATH_LENGTH = 255
|
||||
MAX_CONTENT_LENGTH = 1024 * 1024 * 10 # 10MB
|
||||
|
||||
# Allowed file extensions
|
||||
ALLOWED_EXTENSIONS = {'.md', '.mdx', '.json'}
|
||||
|
||||
# Allowed base directories
|
||||
ALLOWED_BASE_DIRS = {'en', 'zh-hans', 'ja-jp'}
|
||||
|
||||
# Dangerous patterns to block
|
||||
DANGEROUS_PATTERNS = [
|
||||
r'\.\.', # Directory traversal
|
||||
r'^/', # Absolute paths
|
||||
r'^~', # Home directory
|
||||
r'\$\{', # Variable expansion
|
||||
r'`', # Command substitution
|
||||
r'<script', # Script tags
|
||||
r'javascript:', # JavaScript protocol
|
||||
r'data:text/html', # Data URLs with HTML
|
||||
]
|
||||
|
||||
def __init__(self, base_dir: Path):
|
||||
"""
|
||||
Initialize the security validator.
|
||||
|
||||
Args:
|
||||
base_dir: The base directory for all operations
|
||||
"""
|
||||
self.base_dir = Path(base_dir).resolve()
|
||||
|
||||
def validate_file_path(self, file_path: str) -> Tuple[bool, Optional[str]]:
|
||||
"""
|
||||
Validate a file path for security issues.
|
||||
|
||||
Args:
|
||||
file_path: The file path to validate
|
||||
|
||||
Returns:
|
||||
Tuple of (is_valid, error_message)
|
||||
"""
|
||||
# Check path length
|
||||
if len(file_path) > self.MAX_PATH_LENGTH:
|
||||
return False, f"Path too long: {len(file_path)} > {self.MAX_PATH_LENGTH}"
|
||||
|
||||
# Check for dangerous patterns
|
||||
for pattern in self.DANGEROUS_PATTERNS:
|
||||
if re.search(pattern, file_path, re.IGNORECASE):
|
||||
return False, f"Dangerous pattern detected: {pattern}"
|
||||
|
||||
# Parse path
|
||||
path = Path(file_path)
|
||||
|
||||
# Check for absolute path
|
||||
if path.is_absolute():
|
||||
return False, "Absolute paths not allowed"
|
||||
|
||||
# Check file extension
|
||||
if path.suffix not in self.ALLOWED_EXTENSIONS:
|
||||
return False, f"File extension not allowed: {path.suffix}"
|
||||
|
||||
# Check if path starts with allowed directory
|
||||
parts = path.parts
|
||||
if not parts:
|
||||
return False, "Empty path"
|
||||
|
||||
if parts[0] not in self.ALLOWED_BASE_DIRS and not file_path == 'docs.json':
|
||||
return False, f"Path must start with allowed directory: {self.ALLOWED_BASE_DIRS}"
|
||||
|
||||
# Resolve and check if path stays within base directory
|
||||
try:
|
||||
full_path = (self.base_dir / path).resolve()
|
||||
if not full_path.is_relative_to(self.base_dir):
|
||||
return False, "Path escapes base directory"
|
||||
except (ValueError, RuntimeError) as e:
|
||||
return False, f"Invalid path: {e}"
|
||||
|
||||
return True, None
|
||||
|
||||
def validate_file_content(self, content: str) -> Tuple[bool, Optional[str]]:
|
||||
"""
|
||||
Validate file content for security issues.
|
||||
|
||||
Args:
|
||||
content: The file content to validate
|
||||
|
||||
Returns:
|
||||
Tuple of (is_valid, error_message)
|
||||
"""
|
||||
# Check content length
|
||||
if len(content) > self.MAX_CONTENT_LENGTH:
|
||||
return False, f"Content too large: {len(content)} > {self.MAX_CONTENT_LENGTH}"
|
||||
|
||||
# Check for script injections in content
|
||||
dangerous_content_patterns = [
|
||||
r'<script[^>]*>.*?</script>', # Script tags
|
||||
r'on\w+\s*=\s*["\']', # Event handlers
|
||||
r'javascript:', # JavaScript protocol
|
||||
r'data:text/html', # Data URLs with HTML
|
||||
]
|
||||
|
||||
for pattern in dangerous_content_patterns:
|
||||
if re.search(pattern, content, re.IGNORECASE | re.DOTALL):
|
||||
return False, f"Dangerous content pattern detected"
|
||||
|
||||
return True, None
|
||||
|
||||
def validate_json_structure(self, json_data: Dict[str, Any]) -> Tuple[bool, Optional[str]]:
|
||||
"""
|
||||
Validate JSON structure for security issues.
|
||||
|
||||
Args:
|
||||
json_data: The JSON data to validate
|
||||
|
||||
Returns:
|
||||
Tuple of (is_valid, error_message)
|
||||
"""
|
||||
def check_value(value: Any, depth: int = 0) -> Optional[str]:
|
||||
"""Recursively check JSON values"""
|
||||
if depth > 10:
|
||||
return "JSON nesting too deep"
|
||||
|
||||
if isinstance(value, str):
|
||||
# Check for dangerous patterns in string values
|
||||
for pattern in self.DANGEROUS_PATTERNS:
|
||||
if re.search(pattern, value, re.IGNORECASE):
|
||||
return f"Dangerous pattern in JSON value: {pattern}"
|
||||
elif isinstance(value, dict):
|
||||
for k, v in value.items():
|
||||
if not isinstance(k, str):
|
||||
return "Non-string key in JSON"
|
||||
error = check_value(v, depth + 1)
|
||||
if error:
|
||||
return error
|
||||
elif isinstance(value, list):
|
||||
for item in value:
|
||||
error = check_value(item, depth + 1)
|
||||
if error:
|
||||
return error
|
||||
|
||||
return None
|
||||
|
||||
error = check_value(json_data)
|
||||
if error:
|
||||
return False, error
|
||||
|
||||
return True, None
|
||||
|
||||
def validate_sync_plan(self, sync_plan: Dict[str, Any]) -> Tuple[bool, Optional[str]]:
|
||||
"""
|
||||
Validate a synchronization plan.
|
||||
|
||||
Args:
|
||||
sync_plan: The sync plan to validate
|
||||
|
||||
Returns:
|
||||
Tuple of (is_valid, error_message)
|
||||
"""
|
||||
# Check required fields
|
||||
required_fields = ['files_to_sync', 'target_languages', 'metadata']
|
||||
for field in required_fields:
|
||||
if field not in sync_plan:
|
||||
return False, f"Missing required field: {field}"
|
||||
|
||||
# Validate file count
|
||||
files = sync_plan.get('files_to_sync', [])
|
||||
if len(files) > self.MAX_FILES_PER_SYNC:
|
||||
return False, f"Too many files: {len(files)} > {self.MAX_FILES_PER_SYNC}"
|
||||
|
||||
# Validate each file
|
||||
for file_info in files:
|
||||
if not isinstance(file_info, dict):
|
||||
return False, "Invalid file info structure"
|
||||
|
||||
file_path = file_info.get('path')
|
||||
if not file_path:
|
||||
return False, "File path missing in sync plan"
|
||||
|
||||
valid, error = self.validate_file_path(file_path)
|
||||
if not valid:
|
||||
return False, f"Invalid file path in sync plan: {error}"
|
||||
|
||||
# Validate file size if present
|
||||
if 'size' in file_info:
|
||||
max_size = self.MAX_FILE_SIZE_MB * 1024 * 1024
|
||||
if file_info['size'] > max_size:
|
||||
return False, f"File too large: {file_path}"
|
||||
|
||||
# Validate target languages
|
||||
valid_languages = {'zh-hans', 'ja-jp'}
|
||||
target_langs = sync_plan.get('target_languages', [])
|
||||
for lang in target_langs:
|
||||
if lang not in valid_languages:
|
||||
return False, f"Invalid target language: {lang}"
|
||||
|
||||
return True, None
|
||||
|
||||
def sanitize_path(self, file_path: str) -> Optional[str]:
|
||||
"""
|
||||
Sanitize a file path by removing dangerous elements.
|
||||
|
||||
Args:
|
||||
file_path: The file path to sanitize
|
||||
|
||||
Returns:
|
||||
Sanitized path or None if path cannot be sanitized
|
||||
"""
|
||||
# Remove leading/trailing whitespace
|
||||
file_path = file_path.strip()
|
||||
|
||||
# Remove any null bytes
|
||||
file_path = file_path.replace('\x00', '')
|
||||
|
||||
# Normalize path separators
|
||||
file_path = file_path.replace('\\', '/')
|
||||
|
||||
# Remove double slashes
|
||||
while '//' in file_path:
|
||||
file_path = file_path.replace('//', '/')
|
||||
|
||||
# Validate the sanitized path
|
||||
valid, _ = self.validate_file_path(file_path)
|
||||
if not valid:
|
||||
return None
|
||||
|
||||
return file_path
|
||||
|
||||
def create_safe_temp_dir(self) -> Path:
|
||||
"""
|
||||
Create a safe temporary directory for operations.
|
||||
|
||||
Returns:
|
||||
Path to the temporary directory
|
||||
"""
|
||||
import tempfile
|
||||
import secrets
|
||||
|
||||
# Create temp dir with random suffix
|
||||
suffix = secrets.token_hex(8)
|
||||
temp_dir = Path(tempfile.mkdtemp(suffix=f'-sync-{suffix}'))
|
||||
|
||||
# Set restrictive permissions (Unix only)
|
||||
try:
|
||||
os.chmod(temp_dir, 0o700)
|
||||
except:
|
||||
pass # Windows doesn't support chmod
|
||||
|
||||
return temp_dir
|
||||
|
||||
def calculate_file_hash(self, file_path: Path) -> str:
|
||||
"""
|
||||
Calculate SHA-256 hash of a file.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file
|
||||
|
||||
Returns:
|
||||
Hex digest of the file hash
|
||||
"""
|
||||
sha256_hash = hashlib.sha256()
|
||||
with open(file_path, "rb") as f:
|
||||
for byte_block in iter(lambda: f.read(4096), b""):
|
||||
sha256_hash.update(byte_block)
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
def verify_artifact_integrity(self, artifact_data: bytes, expected_hash: Optional[str] = None) -> bool:
|
||||
"""
|
||||
Verify the integrity of an artifact.
|
||||
|
||||
Args:
|
||||
artifact_data: The artifact data
|
||||
expected_hash: Optional expected hash
|
||||
|
||||
Returns:
|
||||
True if artifact is valid
|
||||
"""
|
||||
if expected_hash:
|
||||
actual_hash = hashlib.sha256(artifact_data).hexdigest()
|
||||
return hmac.compare_digest(actual_hash, expected_hash)
|
||||
|
||||
# Basic validation if no hash provided
|
||||
return len(artifact_data) < self.MAX_CONTENT_LENGTH
|
||||
|
||||
def is_trusted_contributor(self, username: str, trusted_list: List[str] = None) -> bool:
|
||||
"""
|
||||
Check if a user is a trusted contributor.
|
||||
|
||||
Args:
|
||||
username: GitHub username
|
||||
trusted_list: Optional list of trusted usernames
|
||||
|
||||
Returns:
|
||||
True if user is trusted
|
||||
"""
|
||||
if not trusted_list:
|
||||
# Default trusted contributors (should be configured)
|
||||
trusted_list = []
|
||||
|
||||
return username in trusted_list
|
||||
|
||||
def rate_limit_check(self, identifier: str, max_requests: int = 10, window_seconds: int = 60) -> bool:
|
||||
"""
|
||||
Simple rate limiting check (would need persistent storage in production).
|
||||
|
||||
Args:
|
||||
identifier: Unique identifier (e.g., PR number)
|
||||
max_requests: Maximum requests allowed
|
||||
window_seconds: Time window in seconds
|
||||
|
||||
Returns:
|
||||
True if within rate limit
|
||||
"""
|
||||
# This is a placeholder - in production, you'd use Redis or similar
|
||||
# For now, always return True
|
||||
return True
|
||||
|
||||
|
||||
def create_validator(base_dir: Optional[Path] = None) -> SecurityValidator:
|
||||
"""
|
||||
Create a security validator instance.
|
||||
|
||||
Args:
|
||||
base_dir: Optional base directory (defaults to script parent)
|
||||
|
||||
Returns:
|
||||
SecurityValidator instance
|
||||
"""
|
||||
if base_dir is None:
|
||||
base_dir = Path(__file__).parent.parent.parent
|
||||
|
||||
return SecurityValidator(base_dir)
|
||||
|
||||
|
||||
# Example usage and tests
|
||||
if __name__ == "__main__":
|
||||
validator = create_validator()
|
||||
|
||||
# Test path validation
|
||||
test_paths = [
|
||||
"en/docs/test.md", # Valid
|
||||
"../../../etc/passwd", # Invalid - directory traversal
|
||||
"/etc/passwd", # Invalid - absolute path
|
||||
"en/test.exe", # Invalid - wrong extension
|
||||
"zh-hans/docs/test.mdx", # Valid
|
||||
"docs.json", # Valid - special case
|
||||
]
|
||||
|
||||
print("Path Validation Tests:")
|
||||
for path in test_paths:
|
||||
valid, error = validator.validate_file_path(path)
|
||||
status = "✓" if valid else "✗"
|
||||
print(f" {status} {path}: {error if error else 'Valid'}")
|
||||
|
||||
print("\nContent Validation Tests:")
|
||||
test_contents = [
|
||||
"# Normal markdown content", # Valid
|
||||
"<script>alert('xss')</script>", # Invalid - script tag
|
||||
"Normal text with onclick='alert()'", # Invalid - event handler
|
||||
]
|
||||
|
||||
for content in test_contents:
|
||||
valid, error = validator.validate_file_content(content)
|
||||
status = "✓" if valid else "✗"
|
||||
preview = content[:30] + "..." if len(content) > 30 else content
|
||||
print(f" {status} {preview}: {error if error else 'Valid'}")
|
||||
@@ -2,6 +2,7 @@
|
||||
"""
|
||||
Documentation Auto-Sync System
|
||||
Synchronizes English documentation structure and content to Chinese and Japanese versions.
|
||||
With enhanced security for handling external PRs.
|
||||
"""
|
||||
|
||||
import json
|
||||
@@ -17,6 +18,14 @@ import tempfile
|
||||
# Import the existing translation function
|
||||
from main import translate_text, load_md_mdx
|
||||
|
||||
# Import security validator
|
||||
try:
|
||||
from security_validator import SecurityValidator, create_validator
|
||||
except ImportError:
|
||||
# Fallback if security module not available
|
||||
SecurityValidator = None
|
||||
create_validator = None
|
||||
|
||||
# --- Configuration ---
|
||||
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
BASE_DIR = SCRIPT_DIR.parent.parent
|
||||
@@ -44,12 +53,39 @@ LANGUAGES = {
|
||||
TARGET_LANGUAGES = ["zh-hans", "ja-jp"]
|
||||
|
||||
class DocsSynchronizer:
|
||||
def __init__(self, dify_api_key: str):
|
||||
def __init__(self, dify_api_key: str, enable_security: bool = False):
|
||||
self.dify_api_key = dify_api_key
|
||||
self.base_dir = BASE_DIR
|
||||
self.docs_json_path = DOCS_JSON_PATH
|
||||
self.enable_security = enable_security
|
||||
|
||||
# Initialize security validator if enabled
|
||||
self.validator = None
|
||||
if enable_security and create_validator:
|
||||
self.validator = create_validator(self.base_dir)
|
||||
self.config = self.load_config()
|
||||
self.notices = self.load_notices()
|
||||
|
||||
def validate_file_path(self, file_path: str) -> Tuple[bool, Optional[str]]:
|
||||
"""Validate file path for security if security is enabled"""
|
||||
if not self.enable_security or not self.validator:
|
||||
return True, None
|
||||
|
||||
return self.validator.validate_file_path(file_path)
|
||||
|
||||
def validate_sync_plan(self, sync_plan: Dict[str, Any]) -> Tuple[bool, Optional[str]]:
|
||||
"""Validate synchronization plan for security if security is enabled"""
|
||||
if not self.enable_security or not self.validator:
|
||||
return True, None
|
||||
|
||||
return self.validator.validate_sync_plan(sync_plan)
|
||||
|
||||
def sanitize_path(self, file_path: str) -> Optional[str]:
|
||||
"""Sanitize file path if security is enabled"""
|
||||
if not self.enable_security or not self.validator:
|
||||
return file_path
|
||||
|
||||
return self.validator.sanitize_path(file_path)
|
||||
|
||||
def load_config(self) -> Dict[str, Any]:
|
||||
"""Load configuration file with language mappings"""
|
||||
@@ -142,6 +178,24 @@ class DocsSynchronizer:
|
||||
async def translate_file_with_notice(self, en_file_path: str, target_file_path: str, target_lang: str) -> bool:
|
||||
"""Translate a file and add AI notice at the top"""
|
||||
try:
|
||||
# Security validation
|
||||
if self.enable_security:
|
||||
# Validate source path
|
||||
valid, error = self.validate_file_path(en_file_path)
|
||||
if not valid:
|
||||
print(f"Security error - invalid source path {en_file_path}: {error}")
|
||||
return False
|
||||
|
||||
# Validate target path
|
||||
valid, error = self.validate_file_path(target_file_path)
|
||||
if not valid:
|
||||
print(f"Security error - invalid target path {target_file_path}: {error}")
|
||||
return False
|
||||
|
||||
# Sanitize paths
|
||||
en_file_path = self.sanitize_path(en_file_path) or en_file_path
|
||||
target_file_path = self.sanitize_path(target_file_path) or target_file_path
|
||||
|
||||
print(f"Translating {en_file_path} to {target_file_path}")
|
||||
|
||||
# Ensure target directory exists
|
||||
@@ -496,6 +550,85 @@ class DocsSynchronizer:
|
||||
|
||||
print("=== Synchronization Complete ===")
|
||||
return results
|
||||
|
||||
async def secure_sync_from_plan(self, sync_plan: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Execute synchronization from a validated sync plan (for external PRs)
|
||||
"""
|
||||
print("=== Starting Secure Documentation Synchronization ===")
|
||||
|
||||
# Validate sync plan
|
||||
if self.enable_security:
|
||||
valid, error = self.validate_sync_plan(sync_plan)
|
||||
if not valid:
|
||||
return {"errors": [f"Invalid sync plan: {error}"]}
|
||||
|
||||
results = {
|
||||
"translated": [],
|
||||
"failed": [],
|
||||
"skipped": [],
|
||||
"structure_synced": False,
|
||||
"errors": []
|
||||
}
|
||||
|
||||
try:
|
||||
# Process files from sync plan
|
||||
files_to_sync = sync_plan.get("files_to_sync", [])
|
||||
|
||||
# Limit number of files for security
|
||||
max_files = 10 if self.enable_security else len(files_to_sync)
|
||||
files_to_process = files_to_sync[:max_files]
|
||||
|
||||
for file_info in files_to_process:
|
||||
file_path = file_info.get("path")
|
||||
if not file_path:
|
||||
continue
|
||||
|
||||
# Additional security validation per file
|
||||
if self.enable_security:
|
||||
valid, error = self.validate_file_path(file_path)
|
||||
if not valid:
|
||||
results["errors"].append(f"Invalid file path {file_path}: {error}")
|
||||
continue
|
||||
|
||||
print(f"Processing: {file_path}")
|
||||
|
||||
# Check if source file exists
|
||||
if not (self.base_dir / file_path).exists():
|
||||
results["skipped"].append(file_path)
|
||||
continue
|
||||
|
||||
# Translate to target languages
|
||||
for target_lang in TARGET_LANGUAGES:
|
||||
target_path = self.convert_path_to_target_language(file_path, target_lang)
|
||||
try:
|
||||
success = await self.translate_file_with_notice(
|
||||
file_path, target_path, target_lang
|
||||
)
|
||||
if success:
|
||||
results["translated"].append(target_path)
|
||||
else:
|
||||
results["failed"].append(target_path)
|
||||
except Exception as e:
|
||||
print(f"Error translating {file_path} to {target_lang}: {e}")
|
||||
results["failed"].append(target_path)
|
||||
|
||||
# Handle structure changes
|
||||
structure_changes = sync_plan.get("structure_changes", {})
|
||||
if structure_changes.get("structure_changed"):
|
||||
print("Syncing documentation structure...")
|
||||
try:
|
||||
sync_log = self.sync_docs_json_structure()
|
||||
results["structure_synced"] = True
|
||||
print("Structure sync completed")
|
||||
except Exception as e:
|
||||
results["errors"].append(f"Structure sync failed: {e}")
|
||||
|
||||
except Exception as e:
|
||||
results["errors"].append(f"Critical error: {e}")
|
||||
|
||||
print("=== Secure Synchronization Complete ===")
|
||||
return results
|
||||
|
||||
async def main():
|
||||
"""Main entry point"""
|
||||
|
||||
196
tools/translate/test_security.py
Normal file
196
tools/translate/test_security.py
Normal file
@@ -0,0 +1,196 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Test the security features of the documentation sync system"""
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from security_validator import SecurityValidator, create_validator
|
||||
from sync_and_translate import DocsSynchronizer
|
||||
|
||||
def test_security_validator():
|
||||
"""Test the security validator functions"""
|
||||
print("=== Testing Security Validator ===")
|
||||
|
||||
# Create temp directory for testing
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
validator = SecurityValidator(Path(temp_dir))
|
||||
|
||||
# Test path validation
|
||||
test_paths = [
|
||||
("en/docs/test.md", True, "Valid path"),
|
||||
("../../../etc/passwd", False, "Directory traversal"),
|
||||
("/etc/passwd", False, "Absolute path"),
|
||||
("en/test.exe", False, "Invalid extension"),
|
||||
("docs.json", True, "Special case"),
|
||||
("zh-hans/test.mdx", True, "Valid target path"),
|
||||
]
|
||||
|
||||
print("Path Validation Tests:")
|
||||
for path, should_be_valid, description in test_paths:
|
||||
valid, error = validator.validate_file_path(path)
|
||||
status = "✓" if valid == should_be_valid else "✗"
|
||||
result = "PASS" if valid == should_be_valid else "FAIL"
|
||||
print(f" {status} {path}: {result} - {description}")
|
||||
if error and not should_be_valid:
|
||||
print(f" Error: {error}")
|
||||
|
||||
# Test content validation
|
||||
print("\nContent Validation Tests:")
|
||||
test_contents = [
|
||||
("# Normal markdown", True),
|
||||
("<script>alert('xss')</script>", False),
|
||||
("Normal text with onclick='bad()'", False),
|
||||
("Valid content with [link](./test.md)", True),
|
||||
]
|
||||
|
||||
for content, should_be_valid in test_contents:
|
||||
valid, error = validator.validate_file_content(content)
|
||||
status = "✓" if valid == should_be_valid else "✗"
|
||||
result = "PASS" if valid == should_be_valid else "FAIL"
|
||||
preview = content[:30] + "..." if len(content) > 30 else content
|
||||
print(f" {status} {preview}: {result}")
|
||||
|
||||
# Test sync plan validation
|
||||
print("\nSync Plan Validation Tests:")
|
||||
|
||||
# Valid sync plan
|
||||
valid_plan = {
|
||||
"files_to_sync": [
|
||||
{"path": "en/test.md", "size": 1000}
|
||||
],
|
||||
"target_languages": ["zh-hans", "ja-jp"],
|
||||
"metadata": {"pr_number": 123}
|
||||
}
|
||||
|
||||
valid, error = validator.validate_sync_plan(valid_plan)
|
||||
status = "✓" if valid else "✗"
|
||||
print(f" {status} Valid sync plan: {'PASS' if valid else 'FAIL'}")
|
||||
|
||||
# Invalid sync plan (too many files)
|
||||
invalid_plan = {
|
||||
"files_to_sync": [{"path": f"en/test{i}.md", "size": 1000} for i in range(60)],
|
||||
"target_languages": ["zh-hans"],
|
||||
"metadata": {"pr_number": 123}
|
||||
}
|
||||
|
||||
valid, error = validator.validate_sync_plan(invalid_plan)
|
||||
status = "✓" if not valid else "✗"
|
||||
print(f" {status} Invalid sync plan (too many files): {'PASS' if not valid else 'FAIL'}")
|
||||
if error:
|
||||
print(f" Error: {error}")
|
||||
|
||||
def test_secure_synchronizer():
|
||||
"""Test the secure synchronizer functionality"""
|
||||
print("\n=== Testing Secure Synchronizer ===")
|
||||
|
||||
# Initialize with security enabled
|
||||
sync = DocsSynchronizer("test-key", enable_security=True)
|
||||
|
||||
# Test path validation
|
||||
print("Synchronizer Security Tests:")
|
||||
|
||||
test_cases = [
|
||||
("en/docs/test.md", True),
|
||||
("../../../etc/passwd", False),
|
||||
("malicious/../path", False),
|
||||
("docs.json", True),
|
||||
]
|
||||
|
||||
for path, should_be_valid in test_cases:
|
||||
valid, error = sync.validate_file_path(path)
|
||||
status = "✓" if valid == should_be_valid else "✗"
|
||||
result = "PASS" if valid == should_be_valid else "FAIL"
|
||||
print(f" {status} {path}: {result}")
|
||||
if error and not should_be_valid:
|
||||
print(f" Error: {error}")
|
||||
|
||||
def create_test_sync_plan():
|
||||
"""Create a test sync plan for validation"""
|
||||
return {
|
||||
"metadata": {
|
||||
"pr_number": 123,
|
||||
"pr_title": "Test PR",
|
||||
"pr_author": "test-user",
|
||||
"base_sha": "abc123",
|
||||
"head_sha": "def456",
|
||||
"file_count": 1,
|
||||
"timestamp": "2024-08-22T10:00:00Z",
|
||||
"repository": "test/repo",
|
||||
"ref": "refs/pull/123/head"
|
||||
},
|
||||
"files_to_sync": [
|
||||
{
|
||||
"path": "en/documentation/pages/getting-started/test.mdx",
|
||||
"size": 2048,
|
||||
"type": "mdx"
|
||||
}
|
||||
],
|
||||
"structure_changes": {
|
||||
"structure_changed": False,
|
||||
"navigation_modified": False,
|
||||
"languages_affected": []
|
||||
},
|
||||
"target_languages": ["zh-hans", "ja-jp"],
|
||||
"sync_required": True
|
||||
}
|
||||
|
||||
def test_artifact_simulation():
|
||||
"""Test the artifact handling simulation"""
|
||||
print("\n=== Testing Artifact Simulation ===")
|
||||
|
||||
# Create test artifacts
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
temp_path = Path(temp_dir)
|
||||
|
||||
# Create test sync plan
|
||||
sync_plan = create_test_sync_plan()
|
||||
|
||||
# Write test artifacts
|
||||
artifacts = {
|
||||
"analysis.json": sync_plan["metadata"],
|
||||
"sync_plan.json": sync_plan,
|
||||
"changed_files.txt": "en/documentation/pages/getting-started/test.mdx\n",
|
||||
"file_analysis.txt": "en/documentation/pages/getting-started/test.mdx|2048\n"
|
||||
}
|
||||
|
||||
for filename, content in artifacts.items():
|
||||
file_path = temp_path / filename
|
||||
if isinstance(content, dict):
|
||||
with open(file_path, 'w') as f:
|
||||
json.dump(content, f, indent=2)
|
||||
else:
|
||||
with open(file_path, 'w') as f:
|
||||
f.write(content)
|
||||
|
||||
# Validate artifacts
|
||||
validator = SecurityValidator(temp_path.parent)
|
||||
|
||||
# Test sync plan validation
|
||||
valid, error = validator.validate_sync_plan(sync_plan)
|
||||
status = "✓" if valid else "✗"
|
||||
print(f" {status} Sync plan validation: {'PASS' if valid else 'FAIL'}")
|
||||
if error:
|
||||
print(f" Error: {error}")
|
||||
|
||||
print(" ✓ Artifact simulation completed successfully")
|
||||
|
||||
def main():
|
||||
"""Run all tests"""
|
||||
try:
|
||||
test_security_validator()
|
||||
test_secure_synchronizer()
|
||||
test_artifact_simulation()
|
||||
|
||||
print("\n=== Test Summary ===")
|
||||
print("✓ Security validation tests completed")
|
||||
print("✓ Synchronizer security tests completed")
|
||||
print("✓ Artifact handling tests completed")
|
||||
print("\n🎉 All security tests passed!")
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n❌ Test failed with error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user