fix: improve onboarding install diagnostics

This commit is contained in:
Peter Steinberger
2026-03-13 21:43:13 +00:00
parent eea41f308e
commit c659f6c959
4 changed files with 329 additions and 14 deletions

View File

@@ -995,6 +995,7 @@ SHARP_IGNORE_GLOBAL_LIBVIPS="${SHARP_IGNORE_GLOBAL_LIBVIPS:-1}"
NPM_LOGLEVEL="${OPENCLAW_NPM_LOGLEVEL:-error}"
NPM_SILENT_FLAG="--silent"
VERBOSE="${OPENCLAW_VERBOSE:-0}"
VERIFY_INSTALL="${OPENCLAW_VERIFY_INSTALL:-0}"
OPENCLAW_BIN=""
PNPM_CMD=()
HELP=0
@@ -1016,6 +1017,7 @@ Options:
--no-git-update Skip git pull for existing checkout
--no-onboard Skip onboarding (non-interactive)
--no-prompt Disable prompts (required in CI/automation)
--verify Run a post-install smoke verify
--dry-run Print what would happen (no changes)
--verbose Print debug output (set -x, npm verbose)
--help, -h Show this help
@@ -1027,6 +1029,7 @@ Environment variables:
OPENCLAW_GIT_DIR=...
OPENCLAW_GIT_UPDATE=0|1
OPENCLAW_NO_PROMPT=1
OPENCLAW_VERIFY_INSTALL=1
OPENCLAW_DRY_RUN=1
OPENCLAW_NO_ONBOARD=1
OPENCLAW_VERBOSE=1
@@ -1036,6 +1039,7 @@ Environment variables:
Examples:
curl -fsSL --proto '=https' --tlsv1.2 https://openclaw.ai/install.sh | bash
curl -fsSL --proto '=https' --tlsv1.2 https://openclaw.ai/install.sh | bash -s -- --no-onboard
curl -fsSL --proto '=https' --tlsv1.2 https://openclaw.ai/install.sh | bash -s -- --no-onboard --verify
curl -fsSL --proto '=https' --tlsv1.2 https://openclaw.ai/install.sh | bash -s -- --install-method git --no-onboard
EOF
}
@@ -1059,6 +1063,10 @@ parse_args() {
VERBOSE=1
shift
;;
--verify)
VERIFY_INSTALL=1
shift
;;
--no-prompt)
NO_PROMPT=1
shift
@@ -2196,7 +2204,38 @@ refresh_gateway_service_if_loaded() {
return 0
fi
run_quiet_step "Probing gateway service" "$claw" gateway status --probe --deep || true
run_quiet_step "Probing gateway service" "$claw" gateway status --deep || true
}
verify_installation() {
if [[ "${VERIFY_INSTALL}" != "1" ]]; then
return 0
fi
ui_stage "Verifying installation"
local claw="${OPENCLAW_BIN:-}"
if [[ -z "$claw" ]]; then
claw="$(resolve_openclaw_bin || true)"
fi
if [[ -z "$claw" ]]; then
ui_error "Install verify failed: openclaw not on PATH yet"
warn_openclaw_not_found
return 1
fi
run_quiet_step "Checking OpenClaw version" "$claw" --version || return 1
if is_gateway_daemon_loaded "$claw"; then
run_quiet_step "Checking gateway service" "$claw" gateway status --deep || {
ui_error "Install verify failed: gateway service unhealthy"
ui_info "Run: openclaw gateway status --deep"
return 1
}
else
ui_info "Gateway service not loaded; skipping gateway deep probe"
fi
ui_success "Install verify complete"
}
# Main installation flow
@@ -2485,6 +2524,10 @@ main() {
fi
fi
if ! verify_installation; then
exit 1
fi
if [[ "$should_open_dashboard" == "true" ]]; then
maybe_open_dashboard
fi

View File

@@ -14,6 +14,19 @@ const gatewayClientCalls: Array<{
}> = [];
const ensureWorkspaceAndSessionsMock = vi.fn(async (..._args: unknown[]) => {});
const installGatewayDaemonNonInteractiveMock = vi.hoisted(() => vi.fn(async () => {}));
const gatewayServiceMock = vi.hoisted(() => ({
label: "LaunchAgent",
loadedText: "loaded",
isLoaded: vi.fn(async () => true),
readRuntime: vi.fn(async () => ({
status: "running",
state: "active",
pid: 4242,
})),
}));
const readLastGatewayErrorLineMock = vi.hoisted(() =>
vi.fn(async () => "Gateway failed to start: required secrets are unavailable."),
);
let waitForGatewayReachableMock:
| ((params: { url: string; token?: string; password?: string; deadlineMs?: number }) => Promise<{
ok: boolean;
@@ -64,6 +77,14 @@ vi.mock("./onboard-non-interactive/local/daemon-install.js", () => ({
installGatewayDaemonNonInteractive: installGatewayDaemonNonInteractiveMock,
}));
vi.mock("../daemon/service.js", () => ({
resolveGatewayService: () => gatewayServiceMock,
}));
vi.mock("../daemon/diagnostics.js", () => ({
readLastGatewayErrorLine: readLastGatewayErrorLineMock,
}));
const { runNonInteractiveOnboarding } = await import("./onboard-non-interactive.js");
const { resolveConfigPath: resolveStateConfigPath } = await import("../config/paths.js");
const { resolveConfigPath } = await import("../config/config.js");
@@ -134,6 +155,9 @@ describe("onboard (non-interactive): gateway and remote auth", () => {
afterEach(() => {
waitForGatewayReachableMock = undefined;
installGatewayDaemonNonInteractiveMock.mockClear();
gatewayServiceMock.isLoaded.mockClear();
gatewayServiceMock.readRuntime.mockClear();
readLastGatewayErrorLineMock.mockClear();
});
it("writes gateway token auth into config", async () => {
@@ -376,6 +400,73 @@ describe("onboard (non-interactive): gateway and remote auth", () => {
});
}, 60_000);
it("emits structured JSON diagnostics when daemon health fails", async () => {
await withStateDir("state-local-daemon-health-json-fail-", async (stateDir) => {
waitForGatewayReachableMock = vi.fn(async () => ({
ok: false,
detail: "gateway closed (1006 abnormal closure (no close frame)): no close reason",
}));
let capturedError = "";
const runtimeWithCapture = {
log: () => {},
error: (message: string) => {
capturedError = message;
throw new Error(message);
},
exit: (_code: number) => {
throw new Error("exit should not be reached after runtime.error");
},
};
await expect(
runNonInteractiveOnboarding(
{
nonInteractive: true,
mode: "local",
workspace: path.join(stateDir, "openclaw"),
authChoice: "skip",
skipSkills: true,
skipHealth: false,
installDaemon: true,
gatewayBind: "loopback",
json: true,
},
runtimeWithCapture,
),
).rejects.toThrow(/"phase": "gateway-health"/);
const parsed = JSON.parse(capturedError) as {
ok: boolean;
phase: string;
installDaemon: boolean;
detail?: string;
gateway?: { wsUrl?: string };
hints?: string[];
diagnostics?: {
service?: {
label?: string;
loaded?: boolean;
runtimeStatus?: string;
pid?: number;
};
lastGatewayError?: string;
};
};
expect(parsed.ok).toBe(false);
expect(parsed.phase).toBe("gateway-health");
expect(parsed.installDaemon).toBe(true);
expect(parsed.detail).toContain("1006 abnormal closure");
expect(parsed.gateway?.wsUrl).toContain("ws://127.0.0.1:");
expect(parsed.hints).toContain("Run `openclaw gateway status --deep` for more detail.");
expect(parsed.diagnostics?.service?.label).toBe("LaunchAgent");
expect(parsed.diagnostics?.service?.loaded).toBe(true);
expect(parsed.diagnostics?.service?.runtimeStatus).toBe("running");
expect(parsed.diagnostics?.service?.pid).toBe(4242);
expect(parsed.diagnostics?.lastGatewayError).toContain("required secrets are unavailable");
});
}, 60_000);
it("auto-generates token auth when binding LAN and persists the token", async () => {
if (process.platform === "win32") {
// Windows runner occasionally drops the temp config write in this flow; skip to keep CI green.

View File

@@ -15,13 +15,84 @@ import {
import type { OnboardOptions } from "../onboard-types.js";
import { inferAuthChoiceFromFlags } from "./local/auth-choice-inference.js";
import { applyNonInteractiveGatewayConfig } from "./local/gateway-config.js";
import { logNonInteractiveOnboardingJson } from "./local/output.js";
import {
logNonInteractiveOnboardingFailure,
logNonInteractiveOnboardingJson,
} from "./local/output.js";
import { applyNonInteractiveSkillsConfig } from "./local/skills-config.js";
import { resolveNonInteractiveWorkspaceDir } from "./local/workspace.js";
const INSTALL_DAEMON_HEALTH_DEADLINE_MS = 45_000;
const ATTACH_EXISTING_GATEWAY_HEALTH_DEADLINE_MS = 15_000;
async function collectGatewayHealthFailureDiagnostics(): Promise<
| {
service?: {
label: string;
loaded: boolean;
loadedText: string;
runtimeStatus?: string;
state?: string;
pid?: number;
lastExitStatus?: number;
lastExitReason?: string;
};
lastGatewayError?: string;
inspectError?: string;
}
| undefined
> {
const diagnostics: {
service?: {
label: string;
loaded: boolean;
loadedText: string;
runtimeStatus?: string;
state?: string;
pid?: number;
lastExitStatus?: number;
lastExitReason?: string;
};
lastGatewayError?: string;
inspectError?: string;
} = {};
try {
const { resolveGatewayService } = await import("../../daemon/service.js");
const service = resolveGatewayService();
const env = process.env as Record<string, string | undefined>;
const [loaded, runtime] = await Promise.all([
service.isLoaded({ env }).catch(() => false),
service.readRuntime(env).catch(() => undefined),
]);
diagnostics.service = {
label: service.label,
loaded,
loadedText: service.loadedText,
runtimeStatus: runtime?.status,
state: runtime?.state,
pid: runtime?.pid,
lastExitStatus: runtime?.lastExitStatus,
lastExitReason: runtime?.lastExitReason,
};
} catch (err) {
diagnostics.inspectError = `service diagnostics failed: ${String(err)}`;
}
try {
const { readLastGatewayErrorLine } = await import("../../daemon/diagnostics.js");
diagnostics.lastGatewayError = (await readLastGatewayErrorLine(process.env)) ?? undefined;
} catch (err) {
diagnostics.inspectError = diagnostics.inspectError
? `${diagnostics.inspectError}; log diagnostics failed: ${String(err)}`
: `log diagnostics failed: ${String(err)}`;
}
return diagnostics.service || diagnostics.lastGatewayError || diagnostics.inspectError
? diagnostics
: undefined;
}
export async function runNonInteractiveOnboardingLocal(params: {
opts: OnboardOptions;
runtime: RuntimeEnv;
@@ -115,24 +186,33 @@ export async function runNonInteractiveOnboardingLocal(params: {
: ATTACH_EXISTING_GATEWAY_HEALTH_DEADLINE_MS,
});
if (!probe.ok) {
const message = [
`Gateway did not become reachable at ${links.wsUrl}.`,
probe.detail ? `Last probe: ${probe.detail}` : undefined,
!opts.installDaemon
const diagnostics = opts.installDaemon
? await collectGatewayHealthFailureDiagnostics()
: undefined;
logNonInteractiveOnboardingFailure({
opts,
runtime,
mode,
phase: "gateway-health",
message: `Gateway did not become reachable at ${links.wsUrl}.`,
detail: probe.detail,
gateway: {
wsUrl: links.wsUrl,
httpUrl: links.httpUrl,
},
installDaemon: Boolean(opts.installDaemon),
daemonRuntime: opts.installDaemon ? daemonRuntimeRaw : undefined,
diagnostics,
hints: !opts.installDaemon
? [
"Non-interactive local onboarding only waits for an already-running gateway unless you pass --install-daemon.",
`Fix: start \`${formatCliCommand("openclaw gateway run")}\`, re-run with \`--install-daemon\`, or use \`--skip-health\`.`,
process.platform === "win32"
? "Native Windows managed gateway install tries Scheduled Tasks first and falls back to a per-user Startup-folder login item when task creation is denied."
: undefined,
]
.filter(Boolean)
.join("\n")
: undefined,
]
.filter(Boolean)
.join("\n");
runtime.error(message);
].filter((value): value is string => Boolean(value))
: [`Run \`${formatCliCommand("openclaw gateway status --deep")}\` for more detail.`],
});
runtime.exit(1);
return;
}

View File

@@ -1,6 +1,21 @@
import type { RuntimeEnv } from "../../../runtime.js";
import type { OnboardOptions } from "../../onboard-types.js";
type GatewayHealthFailureDiagnostics = {
service?: {
label: string;
loaded: boolean;
loadedText: string;
runtimeStatus?: string;
state?: string;
pid?: number;
lastExitStatus?: number;
lastExitReason?: string;
};
lastGatewayError?: string;
inspectError?: string;
};
export function logNonInteractiveOnboardingJson(params: {
opts: OnboardOptions;
runtime: RuntimeEnv;
@@ -24,6 +39,7 @@ export function logNonInteractiveOnboardingJson(params: {
params.runtime.log(
JSON.stringify(
{
ok: true,
mode: params.mode,
workspace: params.workspaceDir,
authChoice: params.authChoice,
@@ -38,3 +54,88 @@ export function logNonInteractiveOnboardingJson(params: {
),
);
}
function formatGatewayRuntimeSummary(
diagnostics: GatewayHealthFailureDiagnostics | undefined,
): string | undefined {
const service = diagnostics?.service;
if (!service?.runtimeStatus) {
return undefined;
}
const parts = [service.runtimeStatus];
if (typeof service.pid === "number") {
parts.push(`pid ${service.pid}`);
}
if (service.state) {
parts.push(`state ${service.state}`);
}
if (typeof service.lastExitStatus === "number") {
parts.push(`last exit ${service.lastExitStatus}`);
}
if (service.lastExitReason) {
parts.push(`reason ${service.lastExitReason}`);
}
return parts.join(", ");
}
export function logNonInteractiveOnboardingFailure(params: {
opts: OnboardOptions;
runtime: RuntimeEnv;
mode: "local" | "remote";
phase: string;
message: string;
detail?: string;
hints?: string[];
gateway?: {
wsUrl?: string;
httpUrl?: string;
};
installDaemon?: boolean;
daemonRuntime?: string;
diagnostics?: GatewayHealthFailureDiagnostics;
}) {
const hints = params.hints?.filter(Boolean) ?? [];
const gatewayRuntime = formatGatewayRuntimeSummary(params.diagnostics);
if (params.opts.json) {
params.runtime.error(
JSON.stringify(
{
ok: false,
mode: params.mode,
phase: params.phase,
message: params.message,
detail: params.detail,
gateway: params.gateway,
installDaemon: Boolean(params.installDaemon),
daemonRuntime: params.daemonRuntime,
diagnostics: params.diagnostics,
hints: hints.length > 0 ? hints : undefined,
},
null,
2,
),
);
return;
}
const lines = [
params.message,
params.detail ? `Last probe: ${params.detail}` : undefined,
params.diagnostics?.service
? `Service: ${params.diagnostics.service.label} (${params.diagnostics.service.loaded ? params.diagnostics.service.loadedText : "not loaded"})`
: undefined,
gatewayRuntime ? `Runtime: ${gatewayRuntime}` : undefined,
params.diagnostics?.lastGatewayError
? `Last gateway error: ${params.diagnostics.lastGatewayError}`
: undefined,
params.diagnostics?.inspectError
? `Diagnostics warning: ${params.diagnostics.inspectError}`
: undefined,
hints.length > 0 ? hints.join("\n") : undefined,
]
.filter(Boolean)
.join("\n");
params.runtime.error(lines);
}