From 8b42ad08e51ff7d310f9b745331996a290af4c2f Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Thu, 26 Mar 2026 21:50:41 +0000
Subject: [PATCH] perf: speed up shared extension test batches

---
 docs/help/testing.md                     |   9 +-
 scripts/test-planner/planner.mjs         |  57 ++++++++-
 scripts/test-planner/runtime-profile.mjs |  17 ++-
 test/scripts/test-parallel.test.ts       | 156 +++++++++++++++++++++++
 test/vitest-config.test.ts               |  22 ++++
 5 files changed, 254 insertions(+), 7 deletions(-)

diff --git a/docs/help/testing.md b/docs/help/testing.md
index fa5be3734e8..8965ddfdd96 100644
--- a/docs/help/testing.md
+++ b/docs/help/testing.md
@@ -55,8 +55,13 @@ Think of the suites as “increasing realism” (and increasing flakiness/cost):
   - Should be fast and stable
 - Scheduler note:
   - `pnpm test` now keeps a small checked-in behavioral manifest for true pool/isolation overrides and a separate timing snapshot for the slowest unit files.
-  - Extension-only local runs now also use a checked-in extensions timing snapshot so the shared extensions lane can split into a few measured batches instead of one oversized run.
+  - Extension-only local runs now also use a checked-in extensions timing snapshot plus a slightly coarser shared batch target on high-memory hosts, so the shared extensions lane avoids spawning an extra batch when two measured shared runs are enough.
+  - High-memory local extension shared batches also run with a slightly higher worker cap than before, which shortened the two remaining shared extension batches without changing the isolated extension lanes.
   - High-memory local channel runs now reuse the checked-in channel timing snapshot to split the shared channels lane into a few measured batches instead of one long shared worker.
+  - High-memory local channel shared batches also run with a slightly lower worker cap than shared unit batches, which helped targeted channel reruns avoid CPU oversubscription once isolated channel lanes are already in flight.
+  - Targeted local channel reruns now start splitting shared channel work a bit earlier, which keeps medium-sized targeted reruns from leaving one oversized shared channel batch on the critical path.
+  - Targeted local unit reruns also split medium-sized shared unit selections into measured batches, which helps large focused reruns overlap instead of waiting behind one long shared unit lane.
+  - High-memory local multi-surface runs also use slightly coarser shared `unit-fast` batches so the mixed planner spends less time spinning up extra shared unit workers before the later surfaces can overlap.
   - Shared unit, extension, channel, and gateway runs all stay on Vitest `forks`.
   - The wrapper keeps measured fork-isolated exceptions and heavy singleton lanes explicit in `test/fixtures/test-parallel.behavior.json`.
   - The wrapper peels the heaviest measured files into dedicated lanes instead of relying on a growing hand-maintained exclusion list.
@@ -85,7 +90,7 @@ Think of the suites as “increasing realism” (and increasing flakiness/cost):
   - `pnpm test:changed` runs the wrapper with `--changed origin/main`.
   - `pnpm test:changed:max` keeps the same changed-file filter but uses the wrapper's aggressive local planner profile.
   - `pnpm test:max` exposes that same planner profile for a full local run.
-  - On Node 25, the normal local profile keeps top-level lane parallelism off; `pnpm test:max` re-enables it. On Node 22/24 LTS, normal local runs can also use top-level lane parallelism.
+  - On supported local Node versions, including Node 25, the normal profile can use top-level lane parallelism. `pnpm test:max` still pushes the planner harder when you want a more aggressive local run.
   - The base Vitest config marks the wrapper manifests/config files as `forceRerunTriggers` so changed-mode reruns stay correct when scheduler inputs change.
   - Vitest's filesystem module cache is now enabled by default for Node-side test reruns.
   - Opt out with `OPENCLAW_VITEST_FS_MODULE_CACHE=0` or `OPENCLAW_VITEST_FS_MODULE_CACHE=false` if you suspect stale transform cache behavior.
diff --git a/scripts/test-planner/planner.mjs b/scripts/test-planner/planner.mjs
index ada727d267d..f881c81ce23 100644
--- a/scripts/test-planner/planner.mjs
+++ b/scripts/test-planner/planner.mjs
@@ -255,6 +255,19 @@ const splitFilesByBalancedDurationBudget = (files, targetDurationMs, estimateDur
   );
 };
 
+const resolveUnitFastBatchTargetMs = ({ context, selectedSurfaceSet, unitOnlyRun }) => {
+  const defaultTargetMs = context.executionBudget.unitFastBatchTargetMs;
+  if (
+    !unitOnlyRun &&
+    selectedSurfaceSet.size > 1 &&
+    !context.runtime.isCI &&
+    context.runtime.memoryBand === "high"
+  ) {
+    return Math.max(defaultTargetMs, 75_000);
+  }
+  return defaultTargetMs;
+};
+
 const resolveMaxWorkersForUnit = (unit, context) => {
   const overrideWorkers = Number.parseInt(context.env.OPENCLAW_TEST_WORKERS ?? "", 10);
   const resolvedOverride =
@@ -272,6 +285,9 @@ const resolveMaxWorkersForUnit = (unit, context) => {
   if (unit.surface === "extensions") {
     return budget.extensionWorkers;
   }
+  if (unit.surface === "channels") {
+    return budget.channelSharedWorkers ?? budget.unitSharedWorkers;
+  }
   if (unit.surface === "gateway") {
     return budget.gatewayWorkers;
   }
@@ -416,7 +432,11 @@ const buildDefaultUnits = (context, request) => {
     1,
     parseEnvNumber(env, "OPENCLAW_TEST_UNIT_FAST_LANES", defaultUnitFastLaneCount),
   );
-  const defaultUnitFastBatchTargetMs = executionBudget.unitFastBatchTargetMs;
+  const defaultUnitFastBatchTargetMs = resolveUnitFastBatchTargetMs({
+    context,
+    selectedSurfaceSet,
+    unitOnlyRun,
+  });
   const unitFastBatchTargetMs = parseEnvNumber(
     env,
     "OPENCLAW_TEST_UNIT_FAST_BATCH_TARGET_MS",
@@ -806,10 +826,19 @@ const buildTargetedUnits = (context, request) => {
     return [];
   }
   const unitMemoryIsolatedFiles = request.unitMemoryIsolatedFiles ?? [];
+  const estimateUnitDurationMs = (file) =>
+    context.unitTimingManifest.files[file]?.durationMs ??
+    context.unitTimingManifest.defaultDurationMs;
   const estimateChannelDurationMs = (file) =>
     context.channelTimingManifest.files[file]?.durationMs ??
     context.channelTimingManifest.defaultDurationMs;
-  const defaultTargetedChannelsBatchTargetMs = 12_000;
+  const defaultTargetedUnitBatchTargetMs = 12_000;
+  const targetedUnitBatchTargetMs = parseEnvNumber(
+    context.env,
+    "OPENCLAW_TEST_TARGETED_UNIT_BATCH_TARGET_MS",
+    defaultTargetedUnitBatchTargetMs,
+  );
+  const defaultTargetedChannelsBatchTargetMs = 11_000;
   const targetedChannelsBatchTargetMs = parseEnvNumber(
     context.env,
     "OPENCLAW_TEST_TARGETED_CHANNELS_BATCH_TARGET_MS",
@@ -855,6 +884,28 @@ const buildTargetedUnits = (context, request) => {
         ),
       );
     }
+    if (
+      classification.surface === "unit" &&
+      uniqueFilters.length > 4 &&
+      targetedUnitBatchTargetMs > 0
+    ) {
+      const estimatedTotalDurationMs = uniqueFilters.reduce(
+        (totalMs, file) => totalMs + estimateUnitDurationMs(file),
+        0,
+      );
+      if (estimatedTotalDurationMs > targetedUnitBatchTargetMs) {
+        return splitFilesByBalancedDurationBudget(
+          uniqueFilters,
+          targetedUnitBatchTargetMs,
+          estimateUnitDurationMs,
+        ).map((batch, batchIndex) =>
+          createExecutionUnit(context, {
+            ...createTargetedUnit(context, classification, batch),
+            id: `unit-batch-${String(batchIndex + 1)}`,
+          }),
+        );
+      }
+    }
     if (
       classification.surface === "channels" &&
       uniqueFilters.length > 4 &&
@@ -865,7 +916,7 @@ const buildTargetedUnits = (context, request) => {
         0,
       );
       if (estimatedTotalDurationMs > targetedChannelsBatchTargetMs) {
-        return splitFilesByDurationBudget(
+        return splitFilesByBalancedDurationBudget(
           uniqueFilters,
           targetedChannelsBatchTargetMs,
           estimateChannelDurationMs,
diff --git a/scripts/test-planner/runtime-profile.mjs b/scripts/test-planner/runtime-profile.mjs
index f31d49c0aac..79977570f4d 100644
--- a/scripts/test-planner/runtime-profile.mjs
+++ b/scripts/test-planner/runtime-profile.mjs
@@ -91,6 +91,7 @@ const LOCAL_MEMORY_BUDGETS = {
   constrained: {
     vitestCap: 2,
     unitShared: 2,
+    channelsShared: 2,
     unitIsolated: 1,
     unitHeavy: 1,
     extensions: 1,
@@ -107,6 +108,7 @@ const LOCAL_MEMORY_BUDGETS = {
   moderate: {
     vitestCap: 3,
     unitShared: 3,
+    channelsShared: 3,
     unitIsolated: 1,
     unitHeavy: 1,
     extensions: 2,
@@ -123,6 +125,7 @@ const LOCAL_MEMORY_BUDGETS = {
   mid: {
     vitestCap: 4,
     unitShared: 4,
+    channelsShared: 4,
     unitIsolated: 1,
     unitHeavy: 1,
     extensions: 3,
@@ -139,9 +142,10 @@ const LOCAL_MEMORY_BUDGETS = {
   high: {
     vitestCap: 6,
     unitShared: 6,
+    channelsShared: 5,
     unitIsolated: 2,
     unitHeavy: 2,
-    extensions: 4,
+    extensions: 5,
     gateway: 3,
     topLevelNoIsolate: 14,
     topLevelIsolated: 4,
@@ -160,6 +164,7 @@ const withIntentBudgetAdjustments = (budget, intentProfile, cpuCount) => {
       ...budget,
       vitestMaxWorkers: 1,
       unitSharedWorkers: 1,
+      channelSharedWorkers: 1,
       unitIsolatedWorkers: 1,
       unitHeavyWorkers: 1,
       extensionWorkers: 1,
@@ -182,6 +187,11 @@ const withIntentBudgetAdjustments = (budget, intentProfile, cpuCount) => {
       ...budget,
       vitestMaxWorkers: clamp(Math.max(budget.vitestMaxWorkers, Math.min(8, cpuCount)), 1, 16),
       unitSharedWorkers: clamp(Math.max(budget.unitSharedWorkers, Math.min(8, cpuCount)), 1, 16),
+      channelSharedWorkers: clamp(
+        Math.max(budget.channelSharedWorkers ?? budget.unitSharedWorkers, Math.min(6, cpuCount)),
+        1,
+        16,
+      ),
       unitIsolatedWorkers: clamp(Math.max(budget.unitIsolatedWorkers, Math.min(4, cpuCount)), 1, 4),
       unitHeavyWorkers: clamp(Math.max(budget.unitHeavyWorkers, Math.min(4, cpuCount)), 1, 4),
       extensionWorkers: clamp(Math.max(budget.extensionWorkers, Math.min(6, cpuCount)), 1, 6),
@@ -263,6 +273,7 @@ export function resolveExecutionBudget(runtimeCapabilities) {
     return {
       vitestMaxWorkers: runtime.isWindows ? 2 : runtime.isMacOS ? 1 : 3,
       unitSharedWorkers: macCiWorkers,
+      channelSharedWorkers: macCiWorkers,
       unitIsolatedWorkers: macCiWorkers,
       unitHeavyWorkers: macCiWorkers,
       extensionWorkers: macCiWorkers,
@@ -286,6 +297,7 @@ export function resolveExecutionBudget(runtimeCapabilities) {
   const baseBudget = {
     vitestMaxWorkers: Math.min(cpuCount, bandBudget.vitestCap),
     unitSharedWorkers: Math.min(cpuCount, bandBudget.unitShared),
+    channelSharedWorkers: Math.min(cpuCount, bandBudget.channelsShared ?? bandBudget.unitShared),
     unitIsolatedWorkers: Math.min(cpuCount, bandBudget.unitIsolated),
     unitHeavyWorkers: Math.min(cpuCount, bandBudget.unitHeavy),
     extensionWorkers: Math.min(cpuCount, bandBudget.extensions),
@@ -301,13 +313,14 @@ export function resolveExecutionBudget(runtimeCapabilities) {
     unitFastLaneCount: 1,
     unitFastBatchTargetMs: bandBudget.unitFastBatchTargetMs,
     channelsBatchTargetMs: bandBudget.channelsBatchTargetMs ?? 0,
-    extensionsBatchTargetMs: 240_000,
+    extensionsBatchTargetMs: 300_000,
   };
 
   const loadAdjustedBudget = {
     ...baseBudget,
     vitestMaxWorkers: scaleForLoad(baseBudget.vitestMaxWorkers, runtime.loadBand),
     unitSharedWorkers: scaleForLoad(baseBudget.unitSharedWorkers, runtime.loadBand),
+    channelSharedWorkers: scaleForLoad(baseBudget.channelSharedWorkers, runtime.loadBand),
     unitHeavyWorkers: scaleForLoad(baseBudget.unitHeavyWorkers, runtime.loadBand),
     extensionWorkers: scaleForLoad(baseBudget.extensionWorkers, runtime.loadBand),
     gatewayWorkers: scaleForLoad(baseBudget.gatewayWorkers, runtime.loadBand),
diff --git a/test/scripts/test-parallel.test.ts b/test/scripts/test-parallel.test.ts
index fee39f0bfe2..d84279bfe12 100644
--- a/test/scripts/test-parallel.test.ts
+++ b/test/scripts/test-parallel.test.ts
@@ -13,6 +13,7 @@ import {
   hasFatalTestRunOutput,
   resolveTestRunExitCode,
 } from "../../scripts/test-parallel-utils.mjs";
+import { loadTestCatalog } from "../../scripts/test-planner/catalog.mjs";
 
 const clearPlannerShardEnv = (env) => {
   const nextEnv = { ...env };
@@ -25,6 +26,39 @@ const clearPlannerShardEnv = (env) => {
   return nextEnv;
 };
 
+const sharedTargetedChannelProxyFiles = (() => {
+  const catalog = loadTestCatalog();
+  return catalog.allKnownTestFiles
+    .filter((file) => {
+      const classification = catalog.classifyTestFile(file);
+      return classification.surface === "channels" && !classification.isolated;
+    })
+    .slice(0, 100);
+})();
+
+const sharedTargetedUnitProxyFiles = (() => {
+  const catalog = loadTestCatalog();
+  return catalog.allKnownTestFiles
+    .filter((file) => {
+      const classification = catalog.classifyTestFile(file);
+      return classification.surface === "unit" && !classification.isolated;
+    })
+    .slice(0, 100);
+})();
+
+const targetedChannelProxyFiles = [
+  ...sharedTargetedChannelProxyFiles,
+  "extensions/discord/src/monitor/message-handler.preflight.acp-bindings.test.ts",
+  "extensions/discord/src/monitor/monitor.agent-components.test.ts",
+  "extensions/telegram/src/bot.create-telegram-bot.test.ts",
+  "extensions/whatsapp/src/monitor-inbox.streams-inbound-messages.test.ts",
+];
+
+const targetedUnitProxyFiles = [
+  ...sharedTargetedUnitProxyFiles,
+  "src/cli/qr-dashboard.integration.test.ts",
+];
+
 describe("scripts/test-parallel fatal output guard", () => {
   it("fails a zero exit when V8 reports an out-of-memory fatal", () => {
     const output = [
@@ -203,6 +237,32 @@ describe("scripts/test-parallel lane planning", () => {
     expect(output).toMatch(/extensions(?:-batch-1)? filters=all maxWorkers=/);
   });
 
+  it("uses fewer shared extension batches on high-memory local hosts", () => {
+    const repoRoot = path.resolve(import.meta.dirname, "../..");
+    const output = execFileSync(
+      "node",
+      ["scripts/test-parallel.mjs", "--plan", "--surface", "extensions"],
+      {
+        cwd: repoRoot,
+        env: {
+          ...clearPlannerShardEnv(process.env),
+          CI: "",
+          GITHUB_ACTIONS: "",
+          RUNNER_OS: "macOS",
+          OPENCLAW_TEST_HOST_CPU_COUNT: "12",
+          OPENCLAW_TEST_HOST_MEMORY_GIB: "128",
+          OPENCLAW_TEST_LOAD_AWARE: "0",
+        },
+        encoding: "utf8",
+      },
+    );
+
+    expect(output).toContain("extensions-batch-1 filters=all maxWorkers=5");
+    expect(output).toContain("extensions-batch-2 filters=all maxWorkers=5");
+    expect(output).toContain("extensions-batch-2");
+    expect(output).not.toContain("extensions-batch-3");
+  });
+
   it("starts isolated channel lanes before shared extension batches on high-memory local hosts", () => {
     const repoRoot = path.resolve(import.meta.dirname, "../..");
     const output = execFileSync(
@@ -240,6 +300,102 @@ describe("scripts/test-parallel lane planning", () => {
     expect(firstChannelIsolated).toBeGreaterThanOrEqual(0);
     expect(firstExtensionBatch).toBeGreaterThan(firstChannelIsolated);
     expect(firstChannelBatch).toBeGreaterThan(firstExtensionBatch);
+    expect(output).toContain("channels-batch-1 filters=all maxWorkers=5");
+  });
+
+  it("uses coarser unit-fast batching for high-memory local multi-surface runs", () => {
+    const repoRoot = path.resolve(import.meta.dirname, "../..");
+    const output = execFileSync(
+      "node",
+      [
+        "scripts/test-parallel.mjs",
+        "--plan",
+        "--surface",
+        "unit",
+        "--surface",
+        "extensions",
+        "--surface",
+        "channels",
+      ],
+      {
+        cwd: repoRoot,
+        env: {
+          ...clearPlannerShardEnv(process.env),
+          CI: "",
+          GITHUB_ACTIONS: "",
+          RUNNER_OS: "macOS",
+          OPENCLAW_TEST_HOST_CPU_COUNT: "12",
+          OPENCLAW_TEST_HOST_MEMORY_GIB: "128",
+          OPENCLAW_TEST_LOAD_AWARE: "0",
+        },
+        encoding: "utf8",
+      },
+    );
+
+    expect(output).toContain("unit-fast-batch-4");
+    expect(output).not.toContain("unit-fast-batch-5");
+  });
+
+  it("uses earlier targeted channel batching on high-memory local hosts", () => {
+    const repoRoot = path.resolve(import.meta.dirname, "../..");
+    const output = execFileSync(
+      "node",
+      [
+        "scripts/test-parallel.mjs",
+        "--plan",
+        "--surface",
+        "channels",
+        ...targetedChannelProxyFiles.flatMap((file) => ["--files", file]),
+      ],
+      {
+        cwd: repoRoot,
+        env: {
+          ...clearPlannerShardEnv(process.env),
+          CI: "",
+          GITHUB_ACTIONS: "",
+          RUNNER_OS: "macOS",
+          OPENCLAW_TEST_HOST_CPU_COUNT: "12",
+          OPENCLAW_TEST_HOST_MEMORY_GIB: "128",
+          OPENCLAW_TEST_LOAD_AWARE: "0",
+        },
+        encoding: "utf8",
+      },
+    );
+
+    expect(output).toContain("channels-batch-1 filters=49");
+    expect(output).toContain("channels-batch-2 filters=51");
+    expect(output).not.toContain("channels-batch-3");
+  });
+
+  it("uses targeted unit batching on high-memory local hosts", () => {
+    const repoRoot = path.resolve(import.meta.dirname, "../..");
+    const output = execFileSync(
+      "node",
+      [
+        "scripts/test-parallel.mjs",
+        "--plan",
+        "--surface",
+        "unit",
+        ...targetedUnitProxyFiles.flatMap((file) => ["--files", file]),
+      ],
+      {
+        cwd: repoRoot,
+        env: {
+          ...clearPlannerShardEnv(process.env),
+          CI: "",
+          GITHUB_ACTIONS: "",
+          RUNNER_OS: "macOS",
+          OPENCLAW_TEST_HOST_CPU_COUNT: "12",
+          OPENCLAW_TEST_HOST_MEMORY_GIB: "128",
+          OPENCLAW_TEST_LOAD_AWARE: "0",
+        },
+        encoding: "utf8",
+      },
+    );
+
+    expect(output).toContain("unit-batch-1 filters=50");
+    expect(output).toContain("unit-batch-2 filters=49");
+    expect(output).not.toContain("unit-batch-3");
   });
 
   it("explains targeted file ownership and execution policy", () => {
diff --git a/test/vitest-config.test.ts b/test/vitest-config.test.ts
index 5ef78957e30..d7106f379e7 100644
--- a/test/vitest-config.test.ts
+++ b/test/vitest-config.test.ts
@@ -172,7 +172,29 @@ describe("resolveLocalVitestMaxWorkers", () => {
     expect(runtime.memoryBand).toBe("high");
     expect(runtime.loadBand).toBe("idle");
     expect(budget.channelsBatchTargetMs).toBe(30_000);
+    expect(budget.channelSharedWorkers).toBe(5);
     expect(budget.deferredRunConcurrency).toBe(8);
     expect(budget.topLevelParallelLimitNoIsolate).toBe(14);
   });
+
+  it("uses a coarser shared extension batch target on high-memory local hosts", () => {
+    const runtime = resolveRuntimeCapabilities(
+      {
+        RUNNER_OS: "macOS",
+      },
+      {
+        cpuCount: 16,
+        totalMemoryBytes: 128 * 1024 ** 3,
+        platform: "darwin",
+        mode: "local",
+        loadAverage: [0.2, 0.2, 0.2],
+      },
+    );
+    const budget = resolveExecutionBudget(runtime);
+
+    expect(runtime.memoryBand).toBe("high");
+    expect(runtime.loadBand).toBe("idle");
+    expect(budget.extensionsBatchTargetMs).toBe(300_000);
+    expect(budget.extensionWorkers).toBe(5);
+  });
 });