refactor: unify gateway handshake timeout wiring

This commit is contained in:
Peter Steinberger
2026-03-24 22:53:06 -07:00
parent 258a214bcb
commit f5408d82d2
13 changed files with 140 additions and 87 deletions

View File

@@ -281,7 +281,7 @@ async function connectGatewayClient(params: {
url: params.url,
token: params.token,
password: params.password,
connectDelayMs: 0,
connectChallengeTimeoutMs: 0,
clientName: GATEWAY_CLIENT_NAMES.TEST,
clientDisplayName: "android-live-test",
clientVersion: "dev",

View File

@@ -22,6 +22,7 @@ import {
} from "../utils/message-channel.js";
import { VERSION } from "../version.js";
import { buildDeviceAuthPayloadV3 } from "./device-auth.js";
import { resolveConnectChallengeTimeoutMs } from "./handshake-timeouts.js";
import { isLoopbackHost, isSecureWebSocketUrl } from "./net.js";
import {
ConnectErrorDetailCodes,
@@ -77,6 +78,8 @@ class GatewayClientRequestError extends Error {
export type GatewayClientOptions = {
url?: string; // ws://127.0.0.1:18789
connectChallengeTimeoutMs?: number;
/** @deprecated Use connectChallengeTimeoutMs. */
connectDelayMs?: number;
tickWatchMinIntervalMs?: number;
requestTimeoutMs?: number;
@@ -119,9 +122,29 @@ export function describeGatewayCloseCode(code: number): string | undefined {
return GATEWAY_CLOSE_CODE_HINTS[code];
}
function readConnectChallengeTimeoutOverride(
opts: Pick<GatewayClientOptions, "connectChallengeTimeoutMs" | "connectDelayMs">,
): number | undefined {
if (
typeof opts.connectChallengeTimeoutMs === "number" &&
Number.isFinite(opts.connectChallengeTimeoutMs)
) {
return opts.connectChallengeTimeoutMs;
}
if (typeof opts.connectDelayMs === "number" && Number.isFinite(opts.connectDelayMs)) {
return opts.connectDelayMs;
}
return undefined;
}
export function resolveGatewayClientConnectChallengeTimeoutMs(
opts: Pick<GatewayClientOptions, "connectChallengeTimeoutMs" | "connectDelayMs">,
): number {
return resolveConnectChallengeTimeoutMs(readConnectChallengeTimeoutOverride(opts));
}
const FORCE_STOP_TERMINATE_GRACE_MS = 250;
const STOP_AND_WAIT_TIMEOUT_MS = 1_000;
const DEFAULT_CONNECT_CHALLENGE_TIMEOUT_MS = 5_000;
type PendingStop = {
ws: WebSocket;
@@ -238,7 +261,7 @@ export class GatewayClient {
return;
}
}
this.queueConnect();
this.beginPreauthHandshake();
});
ws.on("message", (data) => this.handleMessage(rawDataToString(data)));
ws.on("close", (code, reason) => {
@@ -328,10 +351,7 @@ export class GatewayClient {
clearInterval(this.tickTimer);
this.tickTimer = null;
}
if (this.connectTimer) {
clearTimeout(this.connectTimer);
this.connectTimer = null;
}
this.clearConnectChallengeTimeout();
if (this.pendingStop) {
this.flushPendingErrors(new Error("gateway client stopped"));
return this.pendingStop.promise;
@@ -387,10 +407,7 @@ export class GatewayClient {
return;
}
this.connectSent = true;
if (this.connectTimer) {
clearTimeout(this.connectTimer);
this.connectTimer = null;
}
this.clearConnectChallengeTimeout();
const role = this.opts.role ?? "operator";
const {
authToken,
@@ -695,17 +712,22 @@ export class GatewayClient {
}
}
private queueConnect() {
private beginPreauthHandshake() {
this.connectNonce = null;
this.connectSent = false;
const rawConnectDelayMs = this.opts.connectDelayMs;
const connectChallengeTimeoutMs =
typeof rawConnectDelayMs === "number" && Number.isFinite(rawConnectDelayMs)
? Math.max(250, Math.min(10_000, rawConnectDelayMs))
: DEFAULT_CONNECT_CHALLENGE_TIMEOUT_MS;
this.armConnectChallengeTimeout();
}
private clearConnectChallengeTimeout() {
if (this.connectTimer) {
clearTimeout(this.connectTimer);
this.connectTimer = null;
}
}
private armConnectChallengeTimeout() {
const connectChallengeTimeoutMs = resolveGatewayClientConnectChallengeTimeoutMs(this.opts);
this.clearConnectChallengeTimeout();
this.connectTimer = setTimeout(() => {
if (this.connectSent || this.ws?.readyState !== WebSocket.OPEN) {
return;

View File

@@ -3,7 +3,12 @@ import { createServer } from "node:net";
import { afterEach, describe, expect, test, vi } from "vitest";
import { WebSocket, WebSocketServer } from "ws";
import { rawDataToString } from "../infra/ws.js";
import { GatewayClient } from "./client.js";
import { GatewayClient, resolveGatewayClientConnectChallengeTimeoutMs } from "./client.js";
import {
DEFAULT_PREAUTH_HANDSHAKE_TIMEOUT_MS,
MAX_CONNECT_CHALLENGE_TIMEOUT_MS,
MIN_CONNECT_CHALLENGE_TIMEOUT_MS,
} from "./handshake-timeouts.js";
// Find a free localhost port for ad-hoc WS servers.
async function getFreePort(): Promise<number> {
@@ -36,47 +41,22 @@ describe("GatewayClient", () => {
}
});
test("uses a 5s default connect-challenge timeout", async () => {
vi.useFakeTimers();
try {
const onConnectError = vi.fn();
const close = vi.fn();
const client = new GatewayClient({ onConnectError });
(
client as unknown as {
ws:
| WebSocket
| {
readyState: number;
send: () => void;
close: (code: number, reason: string) => void;
};
queueConnect: () => void;
}
).ws = {
readyState: WebSocket.OPEN,
send: vi.fn(),
close,
};
(
client as unknown as {
queueConnect: () => void;
}
).queueConnect();
await vi.advanceTimersByTimeAsync(4_999);
expect(onConnectError).not.toHaveBeenCalled();
expect(close).not.toHaveBeenCalled();
await vi.advanceTimersByTimeAsync(1);
expect(onConnectError).toHaveBeenCalledWith(
expect.objectContaining({ message: "gateway connect challenge timeout" }),
);
expect(close).toHaveBeenCalledWith(1008, "connect challenge timeout");
} finally {
vi.useRealTimers();
}
test("prefers connectChallengeTimeoutMs and still honors the legacy alias", () => {
expect(resolveGatewayClientConnectChallengeTimeoutMs({})).toBe(
DEFAULT_PREAUTH_HANDSHAKE_TIMEOUT_MS,
);
expect(resolveGatewayClientConnectChallengeTimeoutMs({ connectDelayMs: 0 })).toBe(
MIN_CONNECT_CHALLENGE_TIMEOUT_MS,
);
expect(resolveGatewayClientConnectChallengeTimeoutMs({ connectDelayMs: 20_000 })).toBe(
MAX_CONNECT_CHALLENGE_TIMEOUT_MS,
);
expect(
resolveGatewayClientConnectChallengeTimeoutMs({
connectDelayMs: 2_000,
connectChallengeTimeoutMs: 5_000,
}),
).toBe(5_000);
});
test("closes on missing ticks", async () => {
@@ -112,7 +92,7 @@ describe("GatewayClient", () => {
const closed = new Promise<{ code: number; reason: string }>((resolve) => {
const client = new GatewayClient({
url: `ws://127.0.0.1:${port}`,
connectDelayMs: 0,
connectChallengeTimeoutMs: 0,
tickWatchMinIntervalMs: 5,
onClose: (code, reason) => resolve({ code, reason }),
});
@@ -363,7 +343,7 @@ r1USnb+wUdA7Zoj/mQ==
}, 2000);
client = new GatewayClient({
url: `wss://127.0.0.1:${port}`,
connectDelayMs: 0,
connectChallengeTimeoutMs: 0,
tlsFingerprint: "deadbeef",
onConnectError: (err) => {
clearTimeout(timeout);

View File

@@ -0,0 +1,37 @@
import { describe, expect, test } from "vitest";
import {
clampConnectChallengeTimeoutMs,
DEFAULT_PREAUTH_HANDSHAKE_TIMEOUT_MS,
getPreauthHandshakeTimeoutMsFromEnv,
MAX_CONNECT_CHALLENGE_TIMEOUT_MS,
MIN_CONNECT_CHALLENGE_TIMEOUT_MS,
resolveConnectChallengeTimeoutMs,
} from "./handshake-timeouts.js";
describe("gateway handshake timeouts", () => {
test("defaults connect challenge timeout to the shared pre-auth handshake timeout", () => {
expect(resolveConnectChallengeTimeoutMs()).toBe(DEFAULT_PREAUTH_HANDSHAKE_TIMEOUT_MS);
});
test("clamps connect challenge timeouts into the supported range", () => {
expect(clampConnectChallengeTimeoutMs(0)).toBe(MIN_CONNECT_CHALLENGE_TIMEOUT_MS);
expect(clampConnectChallengeTimeoutMs(2_000)).toBe(2_000);
expect(clampConnectChallengeTimeoutMs(20_000)).toBe(MAX_CONNECT_CHALLENGE_TIMEOUT_MS);
});
test("prefers OPENCLAW_HANDSHAKE_TIMEOUT_MS and falls back on the test-only env", () => {
expect(
getPreauthHandshakeTimeoutMsFromEnv({
OPENCLAW_HANDSHAKE_TIMEOUT_MS: "75",
OPENCLAW_TEST_HANDSHAKE_TIMEOUT_MS: "20",
}),
).toBe(75);
expect(
getPreauthHandshakeTimeoutMsFromEnv({
OPENCLAW_HANDSHAKE_TIMEOUT_MS: "",
OPENCLAW_TEST_HANDSHAKE_TIMEOUT_MS: "20",
VITEST: "1",
}),
).toBe(20);
});
});

View File

@@ -0,0 +1,28 @@
export const DEFAULT_PREAUTH_HANDSHAKE_TIMEOUT_MS = 10_000;
export const MIN_CONNECT_CHALLENGE_TIMEOUT_MS = 250;
export const MAX_CONNECT_CHALLENGE_TIMEOUT_MS = DEFAULT_PREAUTH_HANDSHAKE_TIMEOUT_MS;
export function clampConnectChallengeTimeoutMs(timeoutMs: number): number {
return Math.max(
MIN_CONNECT_CHALLENGE_TIMEOUT_MS,
Math.min(MAX_CONNECT_CHALLENGE_TIMEOUT_MS, timeoutMs),
);
}
export function resolveConnectChallengeTimeoutMs(timeoutMs?: number | null): number {
return typeof timeoutMs === "number" && Number.isFinite(timeoutMs)
? clampConnectChallengeTimeoutMs(timeoutMs)
: DEFAULT_PREAUTH_HANDSHAKE_TIMEOUT_MS;
}
export function getPreauthHandshakeTimeoutMsFromEnv(env: NodeJS.ProcessEnv = process.env): number {
const configuredTimeout =
env.OPENCLAW_HANDSHAKE_TIMEOUT_MS || (env.VITEST && env.OPENCLAW_TEST_HANDSHAKE_TIMEOUT_MS);
if (configuredTimeout) {
const parsed = Number(configuredTimeout);
if (Number.isFinite(parsed) && parsed > 0) {
return parsed;
}
}
return DEFAULT_PREAUTH_HANDSHAKE_TIMEOUT_MS;
}

View File

@@ -21,20 +21,6 @@ export const __setMaxChatHistoryMessagesBytesForTest = (value?: number) => {
maxChatHistoryMessagesBytes = value;
}
};
export const DEFAULT_HANDSHAKE_TIMEOUT_MS = 10_000;
export const getHandshakeTimeoutMs = () => {
// User-facing env var (works in all environments); test-only var gated behind VITEST
const envKey =
process.env.OPENCLAW_HANDSHAKE_TIMEOUT_MS ||
(process.env.VITEST && process.env.OPENCLAW_TEST_HANDSHAKE_TIMEOUT_MS);
if (envKey) {
const parsed = Number(envKey);
if (Number.isFinite(parsed) && parsed > 0) {
return parsed;
}
}
return DEFAULT_HANDSHAKE_TIMEOUT_MS;
};
export const TICK_INTERVAL_MS = 30_000;
export const HEALTH_REFRESH_INTERVAL_MS = 60_000;
export const DEDUPE_TTL_MS = 5 * 60_000;

View File

@@ -6,7 +6,7 @@ import {
createSignedDevice,
expectHelloOkServerVersion,
getFreePort,
getHandshakeTimeoutMs,
getPreauthHandshakeTimeoutMsFromEnv,
GATEWAY_CLIENT_MODES,
GATEWAY_CLIENT_NAMES,
NODE_CLIENT,
@@ -81,7 +81,7 @@ export function registerDefaultAuthTokenSuite(): void {
process.env.OPENCLAW_TEST_HANDSHAKE_TIMEOUT_MS = "20";
try {
const ws = await openWs(port);
const handshakeTimeoutMs = getHandshakeTimeoutMs();
const handshakeTimeoutMs = getPreauthHandshakeTimeoutMsFromEnv();
const closed = await waitForWsClose(ws, handshakeTimeoutMs + 500);
expect(closed).toBe(true);
} finally {
@@ -99,9 +99,9 @@ export function registerDefaultAuthTokenSuite(): void {
process.env.OPENCLAW_HANDSHAKE_TIMEOUT_MS = "75";
process.env.OPENCLAW_TEST_HANDSHAKE_TIMEOUT_MS = "20";
try {
expect(getHandshakeTimeoutMs()).toBe(75);
expect(getPreauthHandshakeTimeoutMsFromEnv()).toBe(75);
process.env.OPENCLAW_HANDSHAKE_TIMEOUT_MS = "";
expect(getHandshakeTimeoutMs()).toBe(20);
expect(getPreauthHandshakeTimeoutMsFromEnv()).toBe(20);
} finally {
if (prevHandshakeTimeout === undefined) {
delete process.env.OPENCLAW_HANDSHAKE_TIMEOUT_MS;

View File

@@ -390,6 +390,6 @@ export {
writeTrustedProxyControlUiConfig,
};
export { ConnectErrorDetailCodes } from "./protocol/connect-error-details.js";
export { getHandshakeTimeoutMs } from "./server-constants.js";
export { getPreauthHandshakeTimeoutMsFromEnv } from "./handshake-timeouts.js";
export { PROTOCOL_VERSION } from "./protocol/index.js";
export { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES } from "../utils/message-channel.js";

View File

@@ -149,7 +149,7 @@ async function connectApprovedNode(params: {
const client = new GatewayClient({
url: `ws://127.0.0.1:${params.port}`,
connectDelayMs: 2_000,
connectChallengeTimeoutMs: 2_000,
token: "secret",
role: "node",
clientName: GATEWAY_CLIENT_NAMES.NODE_HOST,

View File

@@ -216,7 +216,7 @@ describe("node.invoke approval bypass", () => {
url: `ws://127.0.0.1:${port}`,
// Keep challenge timeout realistic in tests; 0 maps to a 250ms timeout and can
// trigger reconnect backoff loops under load.
connectDelayMs: 2_000,
connectChallengeTimeoutMs: 2_000,
token: "secret",
role: "node",
clientName: GATEWAY_CLIENT_NAMES.NODE_HOST,

View File

@@ -8,8 +8,8 @@ import { truncateUtf16Safe } from "../../utils.js";
import { isWebchatClient } from "../../utils/message-channel.js";
import type { AuthRateLimiter } from "../auth-rate-limit.js";
import type { ResolvedGatewayAuth } from "../auth.js";
import { getPreauthHandshakeTimeoutMsFromEnv } from "../handshake-timeouts.js";
import { isLoopbackAddress } from "../net.js";
import { getHandshakeTimeoutMs } from "../server-constants.js";
import type { GatewayRequestContext, GatewayRequestHandlers } from "../server-methods/types.js";
import { formatError } from "../server-utils.js";
import { logWs } from "../ws-log.js";
@@ -265,7 +265,7 @@ export function attachGatewayWsConnectionHandler(params: AttachGatewayWsConnecti
close();
});
const handshakeTimeoutMs = getHandshakeTimeoutMs();
const handshakeTimeoutMs = getPreauthHandshakeTimeoutMsFromEnv();
const handshakeTimer = setTimeout(() => {
if (!client) {
handshakeState = "failed";

View File

@@ -43,7 +43,7 @@ export async function connectGatewayClient(params: {
instanceId?: string;
deviceIdentity?: DeviceIdentity;
onEvent?: (evt: { event?: string; payload?: unknown }) => void;
connectDelayMs?: number;
connectChallengeTimeoutMs?: number;
timeoutMs?: number;
timeoutMessage?: string;
}) {
@@ -78,7 +78,7 @@ export async function connectGatewayClient(params: {
const client = new GatewayClient({
url: params.url,
token: params.token,
connectDelayMs: params.connectDelayMs ?? 0,
connectChallengeTimeoutMs: params.connectChallengeTimeoutMs ?? 0,
clientName: params.clientName ?? GATEWAY_CLIENT_NAMES.TEST,
clientDisplayName: params.clientDisplayName ?? "vitest",
clientVersion: params.clientVersion ?? "dev",

View File

@@ -312,7 +312,7 @@ async function connectStatusClient(
const client = new GatewayClient({
url: `ws://127.0.0.1:${inst.port}`,
connectDelayMs: 0,
connectChallengeTimeoutMs: 0,
token: inst.gatewayToken,
clientName: GATEWAY_CLIENT_NAMES.CLI,
clientDisplayName: `status-${inst.name}`,