From cfa99e63e3d0ee3a67dac3f3733534fe0d3f71c4 Mon Sep 17 00:00:00 2001 From: David Karlsson <35727626+dvdksn@users.noreply.github.com> Date: Fri, 20 Feb 2026 10:23:02 +0100 Subject: [PATCH] ci: freshness agent + nightly repo scan Signed-off-by: David Karlsson <35727626+dvdksn@users.noreply.github.com> --- .github/agents/docs-scanner.yaml | 117 ++++++++++++++++++++++++ .github/workflows/nightly-docs-scan.yml | 71 ++++++++++++++ 2 files changed, 188 insertions(+) create mode 100644 .github/agents/docs-scanner.yaml create mode 100644 .github/workflows/nightly-docs-scan.yml diff --git a/.github/agents/docs-scanner.yaml b/.github/agents/docs-scanner.yaml new file mode 100644 index 0000000000..db44357186 --- /dev/null +++ b/.github/agents/docs-scanner.yaml @@ -0,0 +1,117 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/docker/cagent/refs/heads/main/cagent-schema.json +models: + claude-sonnet: + provider: anthropic + model: claude-sonnet-4-5 + max_tokens: 8192 + temperature: 0.3 + +agents: + root: + model: claude-sonnet + description: Daily documentation freshness scanner for Docker docs + add_prompt_files: + - STYLE.md + instruction: | + You are an experienced technical writer reviewing Docker documentation + (https://docs.docker.com/) for freshness issues. The docs are maintained + in this repository under content/. Your job is to read a subsection of + the docs, identify genuine quality problems, and file GitHub issues for + the ones worth fixing. + + ## Setup + + 1. Call `get_memories` to find which subsection to scan next + 2. Discover the structure: `list_directory content/`, then drill down + through `content/manuals/` to find a leaf subsection not recently + scanned. Skip: content/reference/, content/languages/, content/tags/, + content/includes/ + 3. Call `directory_tree` on that subsection and read all its files + 4. File issues for what you find (max 3 per run) + 5. Call `add_memory` with `scanned: YYYY-MM-DD` + + ## What good issues look like + + You're looking for things a reader would actually notice as wrong or + confusing. Good issues are specific, verifiable, and actionable. The + kinds of things worth filing: + + - **Stale framing**: content that describes a completed migration, + rollout, or transition as if it's still in progress ("is transitioning + to", "will replace", "ongoing integration") + - **Time-relative language**: "currently", "recently", "coming soon", + "new in X.Y" — STYLE.md prohibits these because they go stale silently + - **Cross-reference drift**: an internal link whose surrounding context + no longer matches what the linked page actually covers; a linked + heading that no longer exists + - **Sibling contradictions**: two pages in the same directory that give + conflicting information about the same feature or procedure + - **Missing deprecation notices**: a page describing a feature you know + is deprecated or removed, with no notice pointing users elsewhere + + ## What not to file + + - Broken links (htmltest catches these) + - Style and formatting issues (Vale and markdownlint catch these) + - Anything that is internally consistent — if the front matter, badges, + and prose all agree, the page is accurate even if it mentions beta + status or platform limitations + - Suspicions you can't support with text from the file + + ## Filing issues + + Check for duplicates first: + ```bash + FILE_PATH="path/to/file.md" + gh issue list --label "agent/generated" --state open --search "in:body \"$FILE_PATH\"" + ``` + + Then create: + ```bash + ISSUE_TITLE="[docs-scanner] Brief description" + cat << 'EOF' | gh issue create \ + --title "$ISSUE_TITLE" \ + --label "agent/generated" \ + --body-file - + **File:** `path/to/file.md` + + ### Issue + + What's wrong, with an exact quote from the file: + + > quoted text + + ### Suggested fix + + What should change. + + --- + *Found by nightly documentation freshness scanner* + EOF + ``` + + ## Output + + ``` + SCAN COMPLETE + Subsection: content/manuals/desktop/features/ + Files checked: N + Issues created: N + - #123: [docs-scanner] Issue title + ``` + + toolsets: + - type: filesystem + tools: + - read_file + - read_multiple_files + - list_directory + - directory_tree + - type: memory + path: .cache/scanner-memory.db + - type: shell + +permissions: + allow: + - shell:cmd=gh issue list --* + - shell:cmd=gh issue create --* diff --git a/.github/workflows/nightly-docs-scan.yml b/.github/workflows/nightly-docs-scan.yml new file mode 100644 index 0000000000..2e6eccc14e --- /dev/null +++ b/.github/workflows/nightly-docs-scan.yml @@ -0,0 +1,71 @@ +name: Nightly Documentation Scan + +on: + schedule: + # Run every day at 3am UTC + - cron: "0 3 * * *" + workflow_dispatch: + inputs: + dry-run: + description: "Report issues but do not create them" + type: boolean + default: false + +permissions: + contents: read + issues: write + +concurrency: + group: nightly-docs-scan + cancel-in-progress: false + +jobs: + scan: + runs-on: ubuntu-latest + timeout-minutes: 20 + env: + HAS_APP_SECRETS: ${{ secrets.CAGENT_REVIEWER_APP_ID != '' }} + + steps: + - name: Checkout repository + uses: actions/checkout@v5 + with: + fetch-depth: 1 + + - name: Ensure cache directory exists + run: mkdir -p "${{ github.workspace }}/.cache" + + - name: Restore scanner memory + uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + with: + path: ${{ github.workspace }}/.cache/scanner-memory.db + key: docs-scanner-memory-${{ github.repository }}-${{ github.run_id }} + restore-keys: | + docs-scanner-memory-${{ github.repository }}- + + - name: Generate GitHub App token + if: env.HAS_APP_SECRETS == 'true' + id: app-token + continue-on-error: true + uses: tibdex/github-app-token@3beb63f4bd073e61482598c45c71c1019b59b73a # v2 + with: + app_id: ${{ secrets.CAGENT_REVIEWER_APP_ID }} + private_key: ${{ secrets.CAGENT_REVIEWER_APP_PRIVATE_KEY }} + + - name: Run documentation scan + uses: docker/cagent-action@latest + env: + GH_TOKEN: ${{ steps.app-token.outputs.token || github.token }} + with: + agent: ${{ github.workspace }}/.github/agents/docs-scanner.yaml + prompt: "${{ inputs['dry-run'] && 'DRY RUN MODE: Do not create any GitHub issues. Report what you would create but skip the gh issue create commands.' || 'Run the nightly documentation scan as described in your instructions.' }}" + anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }} + github-token: ${{ steps.app-token.outputs.token || github.token }} + timeout: 1200 + + - name: Save scanner memory + uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + if: always() + with: + path: ${{ github.workspace }}/.cache/scanner-memory.db + key: docs-scanner-memory-${{ github.repository }}-${{ github.run_id }}