mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-27 09:21:35 +07:00
perf: speed up shared extension test batches
This commit is contained in:
@@ -55,8 +55,13 @@ Think of the suites as “increasing realism” (and increasing flakiness/cost):
|
||||
- Should be fast and stable
|
||||
- Scheduler note:
|
||||
- `pnpm test` now keeps a small checked-in behavioral manifest for true pool/isolation overrides and a separate timing snapshot for the slowest unit files.
|
||||
- Extension-only local runs now also use a checked-in extensions timing snapshot so the shared extensions lane can split into a few measured batches instead of one oversized run.
|
||||
- Extension-only local runs now also use a checked-in extensions timing snapshot plus a slightly coarser shared batch target on high-memory hosts, so the shared extensions lane avoids spawning an extra batch when two measured shared runs are enough.
|
||||
- High-memory local extension shared batches also run with a slightly higher worker cap than before, which shortened the two remaining shared extension batches without changing the isolated extension lanes.
|
||||
- High-memory local channel runs now reuse the checked-in channel timing snapshot to split the shared channels lane into a few measured batches instead of one long shared worker.
|
||||
- High-memory local channel shared batches also run with a slightly lower worker cap than shared unit batches, which helped targeted channel reruns avoid CPU oversubscription once isolated channel lanes are already in flight.
|
||||
- Targeted local channel reruns now start splitting shared channel work a bit earlier, which keeps medium-sized targeted reruns from leaving one oversized shared channel batch on the critical path.
|
||||
- Targeted local unit reruns also split medium-sized shared unit selections into measured batches, which helps large focused reruns overlap instead of waiting behind one long shared unit lane.
|
||||
- High-memory local multi-surface runs also use slightly coarser shared `unit-fast` batches so the mixed planner spends less time spinning up extra shared unit workers before the later surfaces can overlap.
|
||||
- Shared unit, extension, channel, and gateway runs all stay on Vitest `forks`.
|
||||
- The wrapper keeps measured fork-isolated exceptions and heavy singleton lanes explicit in `test/fixtures/test-parallel.behavior.json`.
|
||||
- The wrapper peels the heaviest measured files into dedicated lanes instead of relying on a growing hand-maintained exclusion list.
|
||||
@@ -85,7 +90,7 @@ Think of the suites as “increasing realism” (and increasing flakiness/cost):
|
||||
- `pnpm test:changed` runs the wrapper with `--changed origin/main`.
|
||||
- `pnpm test:changed:max` keeps the same changed-file filter but uses the wrapper's aggressive local planner profile.
|
||||
- `pnpm test:max` exposes that same planner profile for a full local run.
|
||||
- On Node 25, the normal local profile keeps top-level lane parallelism off; `pnpm test:max` re-enables it. On Node 22/24 LTS, normal local runs can also use top-level lane parallelism.
|
||||
- On supported local Node versions, including Node 25, the normal profile can use top-level lane parallelism. `pnpm test:max` still pushes the planner harder when you want a more aggressive local run.
|
||||
- The base Vitest config marks the wrapper manifests/config files as `forceRerunTriggers` so changed-mode reruns stay correct when scheduler inputs change.
|
||||
- Vitest's filesystem module cache is now enabled by default for Node-side test reruns.
|
||||
- Opt out with `OPENCLAW_VITEST_FS_MODULE_CACHE=0` or `OPENCLAW_VITEST_FS_MODULE_CACHE=false` if you suspect stale transform cache behavior.
|
||||
|
||||
@@ -255,6 +255,19 @@ const splitFilesByBalancedDurationBudget = (files, targetDurationMs, estimateDur
|
||||
);
|
||||
};
|
||||
|
||||
const resolveUnitFastBatchTargetMs = ({ context, selectedSurfaceSet, unitOnlyRun }) => {
|
||||
const defaultTargetMs = context.executionBudget.unitFastBatchTargetMs;
|
||||
if (
|
||||
!unitOnlyRun &&
|
||||
selectedSurfaceSet.size > 1 &&
|
||||
!context.runtime.isCI &&
|
||||
context.runtime.memoryBand === "high"
|
||||
) {
|
||||
return Math.max(defaultTargetMs, 75_000);
|
||||
}
|
||||
return defaultTargetMs;
|
||||
};
|
||||
|
||||
const resolveMaxWorkersForUnit = (unit, context) => {
|
||||
const overrideWorkers = Number.parseInt(context.env.OPENCLAW_TEST_WORKERS ?? "", 10);
|
||||
const resolvedOverride =
|
||||
@@ -272,6 +285,9 @@ const resolveMaxWorkersForUnit = (unit, context) => {
|
||||
if (unit.surface === "extensions") {
|
||||
return budget.extensionWorkers;
|
||||
}
|
||||
if (unit.surface === "channels") {
|
||||
return budget.channelSharedWorkers ?? budget.unitSharedWorkers;
|
||||
}
|
||||
if (unit.surface === "gateway") {
|
||||
return budget.gatewayWorkers;
|
||||
}
|
||||
@@ -416,7 +432,11 @@ const buildDefaultUnits = (context, request) => {
|
||||
1,
|
||||
parseEnvNumber(env, "OPENCLAW_TEST_UNIT_FAST_LANES", defaultUnitFastLaneCount),
|
||||
);
|
||||
const defaultUnitFastBatchTargetMs = executionBudget.unitFastBatchTargetMs;
|
||||
const defaultUnitFastBatchTargetMs = resolveUnitFastBatchTargetMs({
|
||||
context,
|
||||
selectedSurfaceSet,
|
||||
unitOnlyRun,
|
||||
});
|
||||
const unitFastBatchTargetMs = parseEnvNumber(
|
||||
env,
|
||||
"OPENCLAW_TEST_UNIT_FAST_BATCH_TARGET_MS",
|
||||
@@ -806,10 +826,19 @@ const buildTargetedUnits = (context, request) => {
|
||||
return [];
|
||||
}
|
||||
const unitMemoryIsolatedFiles = request.unitMemoryIsolatedFiles ?? [];
|
||||
const estimateUnitDurationMs = (file) =>
|
||||
context.unitTimingManifest.files[file]?.durationMs ??
|
||||
context.unitTimingManifest.defaultDurationMs;
|
||||
const estimateChannelDurationMs = (file) =>
|
||||
context.channelTimingManifest.files[file]?.durationMs ??
|
||||
context.channelTimingManifest.defaultDurationMs;
|
||||
const defaultTargetedChannelsBatchTargetMs = 12_000;
|
||||
const defaultTargetedUnitBatchTargetMs = 12_000;
|
||||
const targetedUnitBatchTargetMs = parseEnvNumber(
|
||||
context.env,
|
||||
"OPENCLAW_TEST_TARGETED_UNIT_BATCH_TARGET_MS",
|
||||
defaultTargetedUnitBatchTargetMs,
|
||||
);
|
||||
const defaultTargetedChannelsBatchTargetMs = 11_000;
|
||||
const targetedChannelsBatchTargetMs = parseEnvNumber(
|
||||
context.env,
|
||||
"OPENCLAW_TEST_TARGETED_CHANNELS_BATCH_TARGET_MS",
|
||||
@@ -855,6 +884,28 @@ const buildTargetedUnits = (context, request) => {
|
||||
),
|
||||
);
|
||||
}
|
||||
if (
|
||||
classification.surface === "unit" &&
|
||||
uniqueFilters.length > 4 &&
|
||||
targetedUnitBatchTargetMs > 0
|
||||
) {
|
||||
const estimatedTotalDurationMs = uniqueFilters.reduce(
|
||||
(totalMs, file) => totalMs + estimateUnitDurationMs(file),
|
||||
0,
|
||||
);
|
||||
if (estimatedTotalDurationMs > targetedUnitBatchTargetMs) {
|
||||
return splitFilesByBalancedDurationBudget(
|
||||
uniqueFilters,
|
||||
targetedUnitBatchTargetMs,
|
||||
estimateUnitDurationMs,
|
||||
).map((batch, batchIndex) =>
|
||||
createExecutionUnit(context, {
|
||||
...createTargetedUnit(context, classification, batch),
|
||||
id: `unit-batch-${String(batchIndex + 1)}`,
|
||||
}),
|
||||
);
|
||||
}
|
||||
}
|
||||
if (
|
||||
classification.surface === "channels" &&
|
||||
uniqueFilters.length > 4 &&
|
||||
@@ -865,7 +916,7 @@ const buildTargetedUnits = (context, request) => {
|
||||
0,
|
||||
);
|
||||
if (estimatedTotalDurationMs > targetedChannelsBatchTargetMs) {
|
||||
return splitFilesByDurationBudget(
|
||||
return splitFilesByBalancedDurationBudget(
|
||||
uniqueFilters,
|
||||
targetedChannelsBatchTargetMs,
|
||||
estimateChannelDurationMs,
|
||||
|
||||
@@ -91,6 +91,7 @@ const LOCAL_MEMORY_BUDGETS = {
|
||||
constrained: {
|
||||
vitestCap: 2,
|
||||
unitShared: 2,
|
||||
channelsShared: 2,
|
||||
unitIsolated: 1,
|
||||
unitHeavy: 1,
|
||||
extensions: 1,
|
||||
@@ -107,6 +108,7 @@ const LOCAL_MEMORY_BUDGETS = {
|
||||
moderate: {
|
||||
vitestCap: 3,
|
||||
unitShared: 3,
|
||||
channelsShared: 3,
|
||||
unitIsolated: 1,
|
||||
unitHeavy: 1,
|
||||
extensions: 2,
|
||||
@@ -123,6 +125,7 @@ const LOCAL_MEMORY_BUDGETS = {
|
||||
mid: {
|
||||
vitestCap: 4,
|
||||
unitShared: 4,
|
||||
channelsShared: 4,
|
||||
unitIsolated: 1,
|
||||
unitHeavy: 1,
|
||||
extensions: 3,
|
||||
@@ -139,9 +142,10 @@ const LOCAL_MEMORY_BUDGETS = {
|
||||
high: {
|
||||
vitestCap: 6,
|
||||
unitShared: 6,
|
||||
channelsShared: 5,
|
||||
unitIsolated: 2,
|
||||
unitHeavy: 2,
|
||||
extensions: 4,
|
||||
extensions: 5,
|
||||
gateway: 3,
|
||||
topLevelNoIsolate: 14,
|
||||
topLevelIsolated: 4,
|
||||
@@ -160,6 +164,7 @@ const withIntentBudgetAdjustments = (budget, intentProfile, cpuCount) => {
|
||||
...budget,
|
||||
vitestMaxWorkers: 1,
|
||||
unitSharedWorkers: 1,
|
||||
channelSharedWorkers: 1,
|
||||
unitIsolatedWorkers: 1,
|
||||
unitHeavyWorkers: 1,
|
||||
extensionWorkers: 1,
|
||||
@@ -182,6 +187,11 @@ const withIntentBudgetAdjustments = (budget, intentProfile, cpuCount) => {
|
||||
...budget,
|
||||
vitestMaxWorkers: clamp(Math.max(budget.vitestMaxWorkers, Math.min(8, cpuCount)), 1, 16),
|
||||
unitSharedWorkers: clamp(Math.max(budget.unitSharedWorkers, Math.min(8, cpuCount)), 1, 16),
|
||||
channelSharedWorkers: clamp(
|
||||
Math.max(budget.channelSharedWorkers ?? budget.unitSharedWorkers, Math.min(6, cpuCount)),
|
||||
1,
|
||||
16,
|
||||
),
|
||||
unitIsolatedWorkers: clamp(Math.max(budget.unitIsolatedWorkers, Math.min(4, cpuCount)), 1, 4),
|
||||
unitHeavyWorkers: clamp(Math.max(budget.unitHeavyWorkers, Math.min(4, cpuCount)), 1, 4),
|
||||
extensionWorkers: clamp(Math.max(budget.extensionWorkers, Math.min(6, cpuCount)), 1, 6),
|
||||
@@ -263,6 +273,7 @@ export function resolveExecutionBudget(runtimeCapabilities) {
|
||||
return {
|
||||
vitestMaxWorkers: runtime.isWindows ? 2 : runtime.isMacOS ? 1 : 3,
|
||||
unitSharedWorkers: macCiWorkers,
|
||||
channelSharedWorkers: macCiWorkers,
|
||||
unitIsolatedWorkers: macCiWorkers,
|
||||
unitHeavyWorkers: macCiWorkers,
|
||||
extensionWorkers: macCiWorkers,
|
||||
@@ -286,6 +297,7 @@ export function resolveExecutionBudget(runtimeCapabilities) {
|
||||
const baseBudget = {
|
||||
vitestMaxWorkers: Math.min(cpuCount, bandBudget.vitestCap),
|
||||
unitSharedWorkers: Math.min(cpuCount, bandBudget.unitShared),
|
||||
channelSharedWorkers: Math.min(cpuCount, bandBudget.channelsShared ?? bandBudget.unitShared),
|
||||
unitIsolatedWorkers: Math.min(cpuCount, bandBudget.unitIsolated),
|
||||
unitHeavyWorkers: Math.min(cpuCount, bandBudget.unitHeavy),
|
||||
extensionWorkers: Math.min(cpuCount, bandBudget.extensions),
|
||||
@@ -301,13 +313,14 @@ export function resolveExecutionBudget(runtimeCapabilities) {
|
||||
unitFastLaneCount: 1,
|
||||
unitFastBatchTargetMs: bandBudget.unitFastBatchTargetMs,
|
||||
channelsBatchTargetMs: bandBudget.channelsBatchTargetMs ?? 0,
|
||||
extensionsBatchTargetMs: 240_000,
|
||||
extensionsBatchTargetMs: 300_000,
|
||||
};
|
||||
|
||||
const loadAdjustedBudget = {
|
||||
...baseBudget,
|
||||
vitestMaxWorkers: scaleForLoad(baseBudget.vitestMaxWorkers, runtime.loadBand),
|
||||
unitSharedWorkers: scaleForLoad(baseBudget.unitSharedWorkers, runtime.loadBand),
|
||||
channelSharedWorkers: scaleForLoad(baseBudget.channelSharedWorkers, runtime.loadBand),
|
||||
unitHeavyWorkers: scaleForLoad(baseBudget.unitHeavyWorkers, runtime.loadBand),
|
||||
extensionWorkers: scaleForLoad(baseBudget.extensionWorkers, runtime.loadBand),
|
||||
gatewayWorkers: scaleForLoad(baseBudget.gatewayWorkers, runtime.loadBand),
|
||||
|
||||
@@ -13,6 +13,7 @@ import {
|
||||
hasFatalTestRunOutput,
|
||||
resolveTestRunExitCode,
|
||||
} from "../../scripts/test-parallel-utils.mjs";
|
||||
import { loadTestCatalog } from "../../scripts/test-planner/catalog.mjs";
|
||||
|
||||
const clearPlannerShardEnv = (env) => {
|
||||
const nextEnv = { ...env };
|
||||
@@ -25,6 +26,39 @@ const clearPlannerShardEnv = (env) => {
|
||||
return nextEnv;
|
||||
};
|
||||
|
||||
const sharedTargetedChannelProxyFiles = (() => {
|
||||
const catalog = loadTestCatalog();
|
||||
return catalog.allKnownTestFiles
|
||||
.filter((file) => {
|
||||
const classification = catalog.classifyTestFile(file);
|
||||
return classification.surface === "channels" && !classification.isolated;
|
||||
})
|
||||
.slice(0, 100);
|
||||
})();
|
||||
|
||||
const sharedTargetedUnitProxyFiles = (() => {
|
||||
const catalog = loadTestCatalog();
|
||||
return catalog.allKnownTestFiles
|
||||
.filter((file) => {
|
||||
const classification = catalog.classifyTestFile(file);
|
||||
return classification.surface === "unit" && !classification.isolated;
|
||||
})
|
||||
.slice(0, 100);
|
||||
})();
|
||||
|
||||
const targetedChannelProxyFiles = [
|
||||
...sharedTargetedChannelProxyFiles,
|
||||
"extensions/discord/src/monitor/message-handler.preflight.acp-bindings.test.ts",
|
||||
"extensions/discord/src/monitor/monitor.agent-components.test.ts",
|
||||
"extensions/telegram/src/bot.create-telegram-bot.test.ts",
|
||||
"extensions/whatsapp/src/monitor-inbox.streams-inbound-messages.test.ts",
|
||||
];
|
||||
|
||||
const targetedUnitProxyFiles = [
|
||||
...sharedTargetedUnitProxyFiles,
|
||||
"src/cli/qr-dashboard.integration.test.ts",
|
||||
];
|
||||
|
||||
describe("scripts/test-parallel fatal output guard", () => {
|
||||
it("fails a zero exit when V8 reports an out-of-memory fatal", () => {
|
||||
const output = [
|
||||
@@ -203,6 +237,32 @@ describe("scripts/test-parallel lane planning", () => {
|
||||
expect(output).toMatch(/extensions(?:-batch-1)? filters=all maxWorkers=/);
|
||||
});
|
||||
|
||||
it("uses fewer shared extension batches on high-memory local hosts", () => {
|
||||
const repoRoot = path.resolve(import.meta.dirname, "../..");
|
||||
const output = execFileSync(
|
||||
"node",
|
||||
["scripts/test-parallel.mjs", "--plan", "--surface", "extensions"],
|
||||
{
|
||||
cwd: repoRoot,
|
||||
env: {
|
||||
...clearPlannerShardEnv(process.env),
|
||||
CI: "",
|
||||
GITHUB_ACTIONS: "",
|
||||
RUNNER_OS: "macOS",
|
||||
OPENCLAW_TEST_HOST_CPU_COUNT: "12",
|
||||
OPENCLAW_TEST_HOST_MEMORY_GIB: "128",
|
||||
OPENCLAW_TEST_LOAD_AWARE: "0",
|
||||
},
|
||||
encoding: "utf8",
|
||||
},
|
||||
);
|
||||
|
||||
expect(output).toContain("extensions-batch-1 filters=all maxWorkers=5");
|
||||
expect(output).toContain("extensions-batch-2 filters=all maxWorkers=5");
|
||||
expect(output).toContain("extensions-batch-2");
|
||||
expect(output).not.toContain("extensions-batch-3");
|
||||
});
|
||||
|
||||
it("starts isolated channel lanes before shared extension batches on high-memory local hosts", () => {
|
||||
const repoRoot = path.resolve(import.meta.dirname, "../..");
|
||||
const output = execFileSync(
|
||||
@@ -240,6 +300,102 @@ describe("scripts/test-parallel lane planning", () => {
|
||||
expect(firstChannelIsolated).toBeGreaterThanOrEqual(0);
|
||||
expect(firstExtensionBatch).toBeGreaterThan(firstChannelIsolated);
|
||||
expect(firstChannelBatch).toBeGreaterThan(firstExtensionBatch);
|
||||
expect(output).toContain("channels-batch-1 filters=all maxWorkers=5");
|
||||
});
|
||||
|
||||
it("uses coarser unit-fast batching for high-memory local multi-surface runs", () => {
|
||||
const repoRoot = path.resolve(import.meta.dirname, "../..");
|
||||
const output = execFileSync(
|
||||
"node",
|
||||
[
|
||||
"scripts/test-parallel.mjs",
|
||||
"--plan",
|
||||
"--surface",
|
||||
"unit",
|
||||
"--surface",
|
||||
"extensions",
|
||||
"--surface",
|
||||
"channels",
|
||||
],
|
||||
{
|
||||
cwd: repoRoot,
|
||||
env: {
|
||||
...clearPlannerShardEnv(process.env),
|
||||
CI: "",
|
||||
GITHUB_ACTIONS: "",
|
||||
RUNNER_OS: "macOS",
|
||||
OPENCLAW_TEST_HOST_CPU_COUNT: "12",
|
||||
OPENCLAW_TEST_HOST_MEMORY_GIB: "128",
|
||||
OPENCLAW_TEST_LOAD_AWARE: "0",
|
||||
},
|
||||
encoding: "utf8",
|
||||
},
|
||||
);
|
||||
|
||||
expect(output).toContain("unit-fast-batch-4");
|
||||
expect(output).not.toContain("unit-fast-batch-5");
|
||||
});
|
||||
|
||||
it("uses earlier targeted channel batching on high-memory local hosts", () => {
|
||||
const repoRoot = path.resolve(import.meta.dirname, "../..");
|
||||
const output = execFileSync(
|
||||
"node",
|
||||
[
|
||||
"scripts/test-parallel.mjs",
|
||||
"--plan",
|
||||
"--surface",
|
||||
"channels",
|
||||
...targetedChannelProxyFiles.flatMap((file) => ["--files", file]),
|
||||
],
|
||||
{
|
||||
cwd: repoRoot,
|
||||
env: {
|
||||
...clearPlannerShardEnv(process.env),
|
||||
CI: "",
|
||||
GITHUB_ACTIONS: "",
|
||||
RUNNER_OS: "macOS",
|
||||
OPENCLAW_TEST_HOST_CPU_COUNT: "12",
|
||||
OPENCLAW_TEST_HOST_MEMORY_GIB: "128",
|
||||
OPENCLAW_TEST_LOAD_AWARE: "0",
|
||||
},
|
||||
encoding: "utf8",
|
||||
},
|
||||
);
|
||||
|
||||
expect(output).toContain("channels-batch-1 filters=49");
|
||||
expect(output).toContain("channels-batch-2 filters=51");
|
||||
expect(output).not.toContain("channels-batch-3");
|
||||
});
|
||||
|
||||
it("uses targeted unit batching on high-memory local hosts", () => {
|
||||
const repoRoot = path.resolve(import.meta.dirname, "../..");
|
||||
const output = execFileSync(
|
||||
"node",
|
||||
[
|
||||
"scripts/test-parallel.mjs",
|
||||
"--plan",
|
||||
"--surface",
|
||||
"unit",
|
||||
...targetedUnitProxyFiles.flatMap((file) => ["--files", file]),
|
||||
],
|
||||
{
|
||||
cwd: repoRoot,
|
||||
env: {
|
||||
...clearPlannerShardEnv(process.env),
|
||||
CI: "",
|
||||
GITHUB_ACTIONS: "",
|
||||
RUNNER_OS: "macOS",
|
||||
OPENCLAW_TEST_HOST_CPU_COUNT: "12",
|
||||
OPENCLAW_TEST_HOST_MEMORY_GIB: "128",
|
||||
OPENCLAW_TEST_LOAD_AWARE: "0",
|
||||
},
|
||||
encoding: "utf8",
|
||||
},
|
||||
);
|
||||
|
||||
expect(output).toContain("unit-batch-1 filters=50");
|
||||
expect(output).toContain("unit-batch-2 filters=49");
|
||||
expect(output).not.toContain("unit-batch-3");
|
||||
});
|
||||
|
||||
it("explains targeted file ownership and execution policy", () => {
|
||||
|
||||
@@ -172,7 +172,29 @@ describe("resolveLocalVitestMaxWorkers", () => {
|
||||
expect(runtime.memoryBand).toBe("high");
|
||||
expect(runtime.loadBand).toBe("idle");
|
||||
expect(budget.channelsBatchTargetMs).toBe(30_000);
|
||||
expect(budget.channelSharedWorkers).toBe(5);
|
||||
expect(budget.deferredRunConcurrency).toBe(8);
|
||||
expect(budget.topLevelParallelLimitNoIsolate).toBe(14);
|
||||
});
|
||||
|
||||
it("uses a coarser shared extension batch target on high-memory local hosts", () => {
|
||||
const runtime = resolveRuntimeCapabilities(
|
||||
{
|
||||
RUNNER_OS: "macOS",
|
||||
},
|
||||
{
|
||||
cpuCount: 16,
|
||||
totalMemoryBytes: 128 * 1024 ** 3,
|
||||
platform: "darwin",
|
||||
mode: "local",
|
||||
loadAverage: [0.2, 0.2, 0.2],
|
||||
},
|
||||
);
|
||||
const budget = resolveExecutionBudget(runtime);
|
||||
|
||||
expect(runtime.memoryBand).toBe("high");
|
||||
expect(runtime.loadBand).toBe("idle");
|
||||
expect(budget.extensionsBatchTargetMs).toBe(300_000);
|
||||
expect(budget.extensionWorkers).toBe(5);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user