From 9f09e7800e7d757ff602901be2a5f05df970d237 Mon Sep 17 00:00:00 2001 From: David Karlsson <35727626+dvdksn@users.noreply.github.com> Date: Thu, 26 Feb 2026 13:31:51 +0100 Subject: [PATCH] ci: force freshness agent to finally move on Signed-off-by: David Karlsson <35727626+dvdksn@users.noreply.github.com> --- .github/agents/docs-scanner.yaml | 47 +++++++++++++++++++------ .github/workflows/nightly-docs-scan.yml | 18 ++++++---- 2 files changed, 48 insertions(+), 17 deletions(-) diff --git a/.github/agents/docs-scanner.yaml b/.github/agents/docs-scanner.yaml index 8e974b509b..29eab681e8 100644 --- a/.github/agents/docs-scanner.yaml +++ b/.github/agents/docs-scanner.yaml @@ -21,16 +21,42 @@ agents: ## Setup - 1. Call `get_memories` to get the list of already-scanned paths. - Each entry has the form `scanned: YYYY-MM-DD`. - 2. Use `list_directory` to explore `content/manuals/` and find a leaf - directory (no subdirectories) whose path does NOT appear in memory. - Skip: content/reference/, content/languages/, content/tags/, - content/includes/. If all leaves have been scanned, pick the one - with the oldest date. - 3. Call `directory_tree` on that leaf and read all its files - 4. File issues for what you find (max 3 per run) - 5. Call `add_memory` with `scanned: YYYY-MM-DD` + 1. Read `.cache/scan-history.json` using `read_file`. + This file tracks every previously scanned directory as a JSON object: + ```json + { + "scanned": { + "content/manuals/desktop/networking/": "2026-02-24", + "content/manuals/build/cache/": "2026-02-23" + } + } + ``` + If the file does not exist or is empty, treat it as `{"scanned": {}}`. + + 2. Call `get_memories` to load any learned patterns from previous scans + (false positives to skip, codebase context, human feedback). + + 3. Use `list_directory` to explore `content/manuals/` and find all leaf + directories (no subdirectories). Skip these top-level paths entirely: + content/reference/, content/languages/, content/tags/, + content/includes/. + + 4. Pick a leaf directory to scan: + - FIRST CHOICE: a directory that does NOT appear in scan-history.json + - FALLBACK: if every leaf directory has been scanned, pick the one + with the OLDEST date in scan-history.json + + 5. Call `directory_tree` on the selected leaf and read all its files. + + 6. Analyze and file issues for what you find (max 3 per run). + + 7. After scanning, update `.cache/scan-history.json` using `write_file`. + Read the current content, add or update the scanned path with today's + date (YYYY-MM-DD), and write the full updated JSON back. + + 8. If you learn anything useful for future scans (false positive patterns, + codebase context), call `add_memory` to store it. Do NOT use + `add_memory` for scan tracking — that is what scan-history.json is for. ## What good issues look like @@ -107,6 +133,7 @@ agents: tools: - read_file - read_multiple_files + - write_file - list_directory - directory_tree - type: memory diff --git a/.github/workflows/nightly-docs-scan.yml b/.github/workflows/nightly-docs-scan.yml index 2e6eccc14e..b492290043 100644 --- a/.github/workflows/nightly-docs-scan.yml +++ b/.github/workflows/nightly-docs-scan.yml @@ -35,13 +35,15 @@ jobs: - name: Ensure cache directory exists run: mkdir -p "${{ github.workspace }}/.cache" - - name: Restore scanner memory + - name: Restore scanner state uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 with: - path: ${{ github.workspace }}/.cache/scanner-memory.db - key: docs-scanner-memory-${{ github.repository }}-${{ github.run_id }} + path: | + ${{ github.workspace }}/.cache/scanner-memory.db + ${{ github.workspace }}/.cache/scan-history.json + key: docs-scanner-state-${{ github.repository }}-${{ github.run_id }} restore-keys: | - docs-scanner-memory-${{ github.repository }}- + docs-scanner-state-${{ github.repository }}- - name: Generate GitHub App token if: env.HAS_APP_SECRETS == 'true' @@ -63,9 +65,11 @@ jobs: github-token: ${{ steps.app-token.outputs.token || github.token }} timeout: 1200 - - name: Save scanner memory + - name: Save scanner state uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 if: always() with: - path: ${{ github.workspace }}/.cache/scanner-memory.db - key: docs-scanner-memory-${{ github.repository }}-${{ github.run_id }} + path: | + ${{ github.workspace }}/.cache/scanner-memory.db + ${{ github.workspace }}/.cache/scan-history.json + key: docs-scanner-state-${{ github.repository }}-${{ github.run_id }}