diff --git a/CHANGELOG.md b/CHANGELOG.md index 32467dc6031..a49a78997d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai - CLI/containers: add `--container` and `OPENCLAW_CONTAINER` to run `openclaw` commands inside a running Docker or Podman OpenClaw container. (#52651) Thanks @sallyom. - Discord/auto threads: add optional `autoThreadName: "generated"` naming so new auto-created threads can be renamed asynchronously with concise LLM-generated titles while keeping the existing message-based naming as the default. (#43366) Thanks @davidguttman. - Slack/interactive replies: restore rich reply parity for direct deliveries, auto-render simple trailing `Options:` lines as buttons/selects, improve Slack interactive setup defaults, and isolate reply controls from plugin interactive handlers. (#53389) Thanks @vincentkoc. +- Gateway/OpenAI compatibility: add `/v1/models` and `/v1/embeddings`, and forward explicit model overrides through `/v1/chat/completions` and `/v1/responses` for broader client and RAG compatibility. Thanks @vincentkoc. ### Fixes diff --git a/docs/gateway/index.md b/docs/gateway/index.md index f64de55f32a..fffebcc89f5 100644 --- a/docs/gateway/index.md +++ b/docs/gateway/index.md @@ -70,11 +70,35 @@ Default mode is `gateway.reload.mode="hybrid"`. - One always-on process for routing, control plane, and channel connections. - Single multiplexed port for: - WebSocket control/RPC - - HTTP APIs (OpenAI-compatible, Responses, tools invoke) + - HTTP APIs, OpenAI compatible (`/v1/models`, `/v1/embeddings`, `/v1/chat/completions`, `/v1/responses`, `/tools/invoke`) - Control UI and hooks - Default bind mode: `loopback`. - Auth is required by default (`gateway.auth.token` / `gateway.auth.password`, or `OPENCLAW_GATEWAY_TOKEN` / `OPENCLAW_GATEWAY_PASSWORD`). +## OpenAI-compatible endpoints + +OpenClaw’s highest-leverage compatibility surface is now: + +- `GET /v1/models` +- `GET /v1/models/{id}` +- `POST /v1/embeddings` +- `POST /v1/chat/completions` +- `POST /v1/responses` + +Why this set matters: + +- Most Open WebUI, LobeChat, and LibreChat integrations probe `/v1/models` first. +- Many RAG and memory pipelines expect `/v1/embeddings`. +- Agent-native clients increasingly prefer `/v1/responses`. + +Planning note: + +- Keep `/v1/models` as a flat `provider/model` list for client compatibility. +- Treat agent and sub-agent selection as separate OpenClaw routing concerns, not pseudo-model entries. +- When you need agent-scoped filtering, pass `x-openclaw-agent-id` on both model-list and request calls. + +All of these run on the main Gateway port and use the same trusted operator auth boundary as the rest of the Gateway HTTP API. + ### Port and bind precedence | Setting | Resolution order | diff --git a/docs/gateway/openai-http-api.md b/docs/gateway/openai-http-api.md index 722b3fdf706..bf541d282fc 100644 --- a/docs/gateway/openai-http-api.md +++ b/docs/gateway/openai-http-api.md @@ -14,6 +14,13 @@ This endpoint is **disabled by default**. Enable it in config first. - `POST /v1/chat/completions` - Same port as the Gateway (WS + HTTP multiplex): `http://:/v1/chat/completions` +When the Gateway’s OpenAI-compatible HTTP surface is enabled, it also serves: + +- `GET /v1/models` +- `GET /v1/models/{id}` +- `POST /v1/embeddings` +- `POST /v1/responses` + Under the hood, requests are executed as a normal Gateway agent run (same codepath as `openclaw agent`), so routing/permissions/config match your Gateway. ## Authentication @@ -55,6 +62,12 @@ Or target a specific OpenClaw agent by header: Advanced: - `x-openclaw-session-key: ` to fully control session routing. +- `x-openclaw-message-channel: ` to set the synthetic ingress channel context for channel-aware prompts and policies. + +For `/v1/models` and `/v1/embeddings`, `x-openclaw-agent-id` is still useful: + +- `/v1/models` uses it for agent-scoped model filtering where relevant. +- `/v1/embeddings` uses it to resolve agent-specific memory-search embedding config. ## Enabling the endpoint @@ -94,6 +107,51 @@ By default the endpoint is **stateless per request** (a new session key is gener If the request includes an OpenAI `user` string, the Gateway derives a stable session key from it, so repeated calls can share an agent session. +## Why this surface matters + +This is the highest-leverage compatibility set for self-hosted frontends and tooling: + +- Most Open WebUI, LobeChat, and LibreChat setups expect `/v1/models`. +- Many RAG systems expect `/v1/embeddings`. +- Existing OpenAI chat clients can usually start with `/v1/chat/completions`. +- More agent-native clients increasingly prefer `/v1/responses`. + +## Model list and agent routing + + + + A flat OpenAI-style model list. + + The returned ids are canonical `provider/model` values such as `openai/gpt-5.4`. + These ids are meant to be passed back directly as the OpenAI `model` field. + + + + No. + + `/v1/models` lists model choices, not execution topology. Agents and sub-agents are OpenClaw routing concerns, so they are selected separately with `x-openclaw-agent-id` or the `openclaw:` / `agent:` model aliases on chat and responses requests. + + + + Send `x-openclaw-agent-id: ` when you want the model list for a specific agent. + + OpenClaw filters the model list against that agent's allowed models and fallbacks when configured. If no allowlist is configured, the endpoint returns the full catalog. + + + + Sub-agent model choice is resolved at spawn time from OpenClaw agent config. + + That means sub-agent model selection does not create extra `/v1/models` entries. Keep the compatibility list flat, and treat agent and sub-agent selection as separate OpenClaw-native routing behavior. + + + + Use `/v1/models` to populate the normal model picker. + + If your client or integration also knows which OpenClaw agent it wants, set `x-openclaw-agent-id` when listing models and when sending chat, responses, or embeddings requests. That keeps the picker aligned with the target agent's allowed model set. + + + + ## Streaming (SSE) Set `stream: true` to receive Server-Sent Events (SSE): @@ -130,3 +188,36 @@ curl -N http://127.0.0.1:18789/v1/chat/completions \ "messages": [{"role":"user","content":"hi"}] }' ``` + +List models: + +```bash +curl -sS http://127.0.0.1:18789/v1/models \ + -H 'Authorization: Bearer YOUR_TOKEN' +``` + +Fetch one model: + +```bash +curl -sS http://127.0.0.1:18789/v1/models/openai%2Fgpt-5.4 \ + -H 'Authorization: Bearer YOUR_TOKEN' +``` + +Create embeddings: + +```bash +curl -sS http://127.0.0.1:18789/v1/embeddings \ + -H 'Authorization: Bearer YOUR_TOKEN' \ + -H 'Content-Type: application/json' \ + -H 'x-openclaw-agent-id: main' \ + -d '{ + "model": "openai/text-embedding-3-small", + "input": ["alpha", "beta"] + }' +``` + +Notes: + +- `/v1/models` returns canonical ids in `provider/model` form so they can be passed back directly as OpenAI `model` values. +- `/v1/models` stays flat on purpose: it does not enumerate agents or sub-agents as pseudo-model ids. +- `/v1/embeddings` supports `input` as a string or array of strings. diff --git a/docs/gateway/openresponses-http-api.md b/docs/gateway/openresponses-http-api.md index 8305da62ee5..58d883bdd04 100644 --- a/docs/gateway/openresponses-http-api.md +++ b/docs/gateway/openresponses-http-api.md @@ -26,9 +26,19 @@ Operational behavior matches [OpenAI Chat Completions](/gateway/openai-http-api) - treat the endpoint as full operator access for the gateway instance - select agents with `model: "openclaw:"`, `model: "agent:"`, or `x-openclaw-agent-id` - use `x-openclaw-session-key` for explicit session routing +- use `x-openclaw-message-channel` when you want a non-default synthetic ingress channel context Enable or disable this endpoint with `gateway.http.endpoints.responses.enabled`. +The same compatibility surface also includes: + +- `GET /v1/models` +- `GET /v1/models/{id}` +- `POST /v1/embeddings` +- `POST /v1/chat/completions` + +For the canonical explanation of how model listing, agent routing, and sub-agent model selection fit together, see [OpenAI Chat Completions](/gateway/openai-http-api#model-list-and-agent-routing). + ## Session behavior By default the endpoint is **stateless per request** (a new session key is generated each call). @@ -54,9 +64,12 @@ Accepted but **currently ignored**: - `reasoning` - `metadata` - `store` -- `previous_response_id` - `truncation` +Supported: + +- `previous_response_id`: OpenClaw reuses the earlier response session when the request stays within the same agent/user/requested-session scope. + ## Items (input) ### `message` diff --git a/src/gateway/embeddings-http.test.ts b/src/gateway/embeddings-http.test.ts new file mode 100644 index 00000000000..9318b26590d --- /dev/null +++ b/src/gateway/embeddings-http.test.ts @@ -0,0 +1,176 @@ +import { afterAll, beforeAll, describe, expect, it, vi } from "vitest"; +import { resolveAgentDir } from "../agents/agent-scope.js"; +import { getFreePort, installGatewayTestHooks } from "./test-helpers.js"; + +installGatewayTestHooks({ scope: "suite" }); + +let startGatewayServer: typeof import("./server.js").startGatewayServer; +let createEmbeddingProviderMock: ReturnType; +let enabledServer: Awaited>; +let enabledPort: number; + +beforeAll(async () => { + vi.resetModules(); + createEmbeddingProviderMock = vi.fn(async (options: { provider: string; model: string }) => ({ + provider: { + id: options.provider, + model: options.model, + embedQuery: async () => [0.1, 0.2], + embedBatch: async (texts: string[]) => + texts.map((_text, index) => [index + 0.1, index + 0.2]), + }, + })); + vi.doMock("../memory/embeddings.js", async () => { + const actual = + await vi.importActual("../memory/embeddings.js"); + return { + ...actual, + createEmbeddingProvider: createEmbeddingProviderMock, + }; + }); + ({ startGatewayServer } = await import("./server.js")); + enabledPort = await getFreePort(); + enabledServer = await startServer(enabledPort, { openAiChatCompletionsEnabled: true }); +}); + +afterAll(async () => { + await enabledServer.close({ reason: "embeddings http enabled suite done" }); + vi.resetModules(); +}); + +async function startServer(port: number, opts?: { openAiChatCompletionsEnabled?: boolean }) { + return await startGatewayServer(port, { + host: "127.0.0.1", + auth: { mode: "token", token: "secret" }, + controlUiEnabled: false, + openAiChatCompletionsEnabled: opts?.openAiChatCompletionsEnabled ?? false, + }); +} + +async function postEmbeddings(body: unknown, headers?: Record) { + return await fetch(`http://127.0.0.1:${enabledPort}/v1/embeddings`, { + method: "POST", + headers: { + authorization: "Bearer secret", + "content-type": "application/json", + ...headers, + }, + body: JSON.stringify(body), + }); +} + +describe("OpenAI-compatible embeddings HTTP API (e2e)", () => { + it("embeds string and array inputs", async () => { + const single = await postEmbeddings({ + model: "text-embedding-3-small", + input: "hello", + }); + expect(single.status).toBe(200); + const singleJson = (await single.json()) as { + object?: string; + data?: Array<{ object?: string; embedding?: number[]; index?: number }>; + }; + expect(singleJson.object).toBe("list"); + expect(singleJson.data?.[0]?.object).toBe("embedding"); + expect(singleJson.data?.[0]?.embedding).toEqual([0.1, 0.2]); + + const batch = await postEmbeddings({ + model: "text-embedding-3-small", + input: ["a", "b"], + }); + expect(batch.status).toBe(200); + const batchJson = (await batch.json()) as { + data?: Array<{ embedding?: number[]; index?: number }>; + }; + expect(batchJson.data).toEqual([ + { object: "embedding", index: 0, embedding: [0.1, 0.2] }, + { object: "embedding", index: 1, embedding: [1.1, 1.2] }, + ]); + + const qualified = await postEmbeddings({ + model: "openai/text-embedding-3-small", + input: "hello again", + }); + expect(qualified.status).toBe(200); + const qualifiedJson = (await qualified.json()) as { model?: string }; + expect(qualifiedJson.model).toBe("openai/text-embedding-3-small"); + const lastCall = createEmbeddingProviderMock.mock.calls.at(-1)?.[0] as + | { provider?: string; model?: string } + | undefined; + expect(lastCall).toMatchObject({ + provider: "openai", + model: "text-embedding-3-small", + }); + }); + + it("supports base64 encoding and agent-scoped auth/config resolution", async () => { + const res = await postEmbeddings( + { + model: "text-embedding-3-small", + input: "hello", + encoding_format: "base64", + }, + { "x-openclaw-agent-id": "beta" }, + ); + expect(res.status).toBe(200); + const json = (await res.json()) as { data?: Array<{ embedding?: string }> }; + expect(typeof json.data?.[0]?.embedding).toBe("string"); + expect(createEmbeddingProviderMock).toHaveBeenCalled(); + const lastCall = createEmbeddingProviderMock.mock.calls.at(-1)?.[0] as + | { provider?: string; model?: string; fallback?: string; agentDir?: string } + | undefined; + expect(lastCall?.model).toBe("text-embedding-3-small"); + expect(lastCall?.fallback).toBe("none"); + expect(lastCall?.agentDir).toBe(resolveAgentDir({}, "beta")); + }); + + it("rejects invalid input shapes", async () => { + const res = await postEmbeddings({ + model: "text-embedding-3-small", + input: [{ nope: true }], + }); + expect(res.status).toBe(400); + const json = (await res.json()) as { error?: { type?: string } }; + expect(json.error?.type).toBe("invalid_request_error"); + }); + + it("rejects disallowed provider-prefixed model overrides", async () => { + const res = await postEmbeddings({ + model: "ollama/nomic-embed-text", + input: "hello", + }); + expect(res.status).toBe(400); + const json = (await res.json()) as { error?: { type?: string; message?: string } }; + expect(json.error).toEqual({ + type: "invalid_request_error", + message: "This agent does not allow that embedding provider on `/v1/embeddings`.", + }); + }); + + it("rejects oversized batches", async () => { + const res = await postEmbeddings({ + model: "text-embedding-3-small", + input: Array.from({ length: 129 }, () => "x"), + }); + expect(res.status).toBe(400); + const json = (await res.json()) as { error?: { type?: string; message?: string } }; + expect(json.error).toEqual({ + type: "invalid_request_error", + message: "Too many inputs (max 128).", + }); + }); + + it("sanitizes provider failures", async () => { + createEmbeddingProviderMock.mockRejectedValueOnce(new Error("secret upstream failure")); + const res = await postEmbeddings({ + model: "text-embedding-3-small", + input: "hello", + }); + expect(res.status).toBe(500); + const json = (await res.json()) as { error?: { type?: string; message?: string } }; + expect(json.error).toEqual({ + type: "api_error", + message: "internal error", + }); + }); +}); diff --git a/src/gateway/embeddings-http.ts b/src/gateway/embeddings-http.ts new file mode 100644 index 00000000000..222bdc8244a --- /dev/null +++ b/src/gateway/embeddings-http.ts @@ -0,0 +1,252 @@ +import { Buffer } from "node:buffer"; +import type { IncomingMessage, ServerResponse } from "node:http"; +import { resolveAgentDir } from "../agents/agent-scope.js"; +import { resolveMemorySearchConfig } from "../agents/memory-search.js"; +import { loadConfig } from "../config/config.js"; +import { logWarn } from "../logger.js"; +import { + createEmbeddingProvider, + type EmbeddingProviderOptions, + type EmbeddingProviderId, + type EmbeddingProviderRequest, +} from "../memory/embeddings.js"; +import type { AuthRateLimiter } from "./auth-rate-limit.js"; +import type { ResolvedGatewayAuth } from "./auth.js"; +import { sendJson } from "./http-common.js"; +import { handleGatewayPostJsonEndpoint } from "./http-endpoint-helpers.js"; +import { resolveAgentIdFromHeader } from "./http-utils.js"; + +type OpenAiEmbeddingsHttpOptions = { + auth: ResolvedGatewayAuth; + maxBodyBytes?: number; + trustedProxies?: string[]; + allowRealIpFallback?: boolean; + rateLimiter?: AuthRateLimiter; +}; + +type EmbeddingsRequest = { + model?: unknown; + input?: unknown; + encoding_format?: unknown; + dimensions?: unknown; + user?: unknown; +}; + +const DEFAULT_EMBEDDINGS_BODY_BYTES = 5 * 1024 * 1024; +const MAX_EMBEDDING_INPUTS = 128; +const MAX_EMBEDDING_INPUT_CHARS = 8_192; +const MAX_EMBEDDING_TOTAL_CHARS = 65_536; +const SAFE_AUTO_EXPLICIT_PROVIDERS = new Set([ + "openai", + "gemini", + "voyage", + "mistral", +]); + +function coerceRequest(value: unknown): EmbeddingsRequest { + return value && typeof value === "object" ? (value as EmbeddingsRequest) : {}; +} + +function resolveInputTexts(input: unknown): string[] | null { + if (typeof input === "string") { + return [input]; + } + if (!Array.isArray(input)) { + return null; + } + if (input.every((entry) => typeof entry === "string")) { + return input; + } + return null; +} + +function encodeEmbeddingBase64(embedding: number[]): string { + const float32 = Float32Array.from(embedding); + return Buffer.from(float32.buffer).toString("base64"); +} + +function validateInputTexts(texts: string[]): string | undefined { + if (texts.length > MAX_EMBEDDING_INPUTS) { + return `Too many inputs (max ${MAX_EMBEDDING_INPUTS}).`; + } + let totalChars = 0; + for (const text of texts) { + if (text.length > MAX_EMBEDDING_INPUT_CHARS) { + return `Input too long (max ${MAX_EMBEDDING_INPUT_CHARS} chars).`; + } + totalChars += text.length; + if (totalChars > MAX_EMBEDDING_TOTAL_CHARS) { + return `Total input too large (max ${MAX_EMBEDDING_TOTAL_CHARS} chars).`; + } + } + return undefined; +} + +function resolveEmbeddingsTarget(params: { + requestModel: string; + configuredProvider: EmbeddingProviderRequest; +}): { provider: EmbeddingProviderRequest; model: string } | { errorMessage: string } { + const raw = params.requestModel.trim(); + const slash = raw.indexOf("/"); + if (slash === -1) { + return { provider: params.configuredProvider, model: raw }; + } + + const provider = raw.slice(0, slash).trim().toLowerCase() as EmbeddingProviderRequest; + const model = raw.slice(slash + 1).trim(); + if (!model) { + return { errorMessage: "Unsupported embedding model reference." }; + } + + if (params.configuredProvider === "auto") { + if (provider === "auto") { + return { provider: "auto", model }; + } + if (SAFE_AUTO_EXPLICIT_PROVIDERS.has(provider)) { + return { provider, model }; + } + return { + errorMessage: "This agent does not allow that embedding provider on `/v1/embeddings`.", + }; + } + + if (provider !== params.configuredProvider) { + return { + errorMessage: "This agent does not allow that embedding provider on `/v1/embeddings`.", + }; + } + + return { provider: params.configuredProvider, model }; +} + +export async function handleOpenAiEmbeddingsHttpRequest( + req: IncomingMessage, + res: ServerResponse, + opts: OpenAiEmbeddingsHttpOptions, +): Promise { + const handled = await handleGatewayPostJsonEndpoint(req, res, { + pathname: "/v1/embeddings", + auth: opts.auth, + trustedProxies: opts.trustedProxies, + allowRealIpFallback: opts.allowRealIpFallback, + rateLimiter: opts.rateLimiter, + maxBodyBytes: opts.maxBodyBytes ?? DEFAULT_EMBEDDINGS_BODY_BYTES, + }); + if (handled === false) { + return false; + } + if (!handled) { + return true; + } + + const payload = coerceRequest(handled.body); + const requestModel = typeof payload.model === "string" ? payload.model.trim() : ""; + if (!requestModel) { + sendJson(res, 400, { + error: { message: "Missing `model`.", type: "invalid_request_error" }, + }); + return true; + } + + const texts = resolveInputTexts(payload.input); + if (!texts) { + sendJson(res, 400, { + error: { + message: "`input` must be a string or an array of strings.", + type: "invalid_request_error", + }, + }); + return true; + } + const inputError = validateInputTexts(texts); + if (inputError) { + sendJson(res, 400, { + error: { message: inputError, type: "invalid_request_error" }, + }); + return true; + } + + const cfg = loadConfig(); + const agentId = resolveAgentIdFromHeader(req) ?? "main"; + const agentDir = resolveAgentDir(cfg, agentId); + const memorySearch = resolveMemorySearchConfig(cfg, agentId); + const configuredProvider = (memorySearch?.provider ?? "openai") as EmbeddingProviderRequest; + const target = resolveEmbeddingsTarget({ + requestModel, + configuredProvider, + }); + if ("errorMessage" in target) { + sendJson(res, 400, { + error: { + message: target.errorMessage, + type: "invalid_request_error", + }, + }); + return true; + } + + const options: EmbeddingProviderOptions = { + config: cfg, + agentDir, + provider: target.provider, + model: target.model, + // Public HTTP embeddings should fail closed rather than silently mixing + // vector spaces across fallback providers/models. + fallback: "none", + local: memorySearch?.local, + remote: memorySearch?.remote + ? { + baseUrl: memorySearch.remote.baseUrl, + apiKey: memorySearch.remote.apiKey, + headers: memorySearch.remote.headers, + } + : undefined, + outputDimensionality: + typeof payload.dimensions === "number" && payload.dimensions > 0 + ? Math.floor(payload.dimensions) + : memorySearch?.outputDimensionality, + }; + + try { + const result = await createEmbeddingProvider(options); + if (!result.provider) { + sendJson(res, 503, { + error: { + message: result.providerUnavailableReason ?? "Embeddings provider unavailable.", + type: "api_error", + }, + }); + return true; + } + + const embeddings = await result.provider.embedBatch(texts); + const encodingFormat = payload.encoding_format === "base64" ? "base64" : "float"; + + sendJson(res, 200, { + object: "list", + data: embeddings.map((embedding, index) => ({ + object: "embedding", + index, + embedding: encodingFormat === "base64" ? encodeEmbeddingBase64(embedding) : embedding, + })), + model: + requestModel.includes("/") || target.provider === "auto" + ? requestModel + : `${target.provider}/${target.model}`, + usage: { + prompt_tokens: 0, + total_tokens: 0, + }, + }); + } catch (err) { + logWarn(`openai-compat: embeddings request failed: ${String(err)}`); + sendJson(res, 500, { + error: { + message: "internal error", + type: "api_error", + }, + }); + } + + return true; +} diff --git a/src/gateway/http-utils.ts b/src/gateway/http-utils.ts index f3ffa8af7da..7e61c25d5dc 100644 --- a/src/gateway/http-utils.ts +++ b/src/gateway/http-utils.ts @@ -1,7 +1,16 @@ import { randomUUID } from "node:crypto"; import type { IncomingMessage } from "node:http"; +import { + buildAllowedModelSet, + isCliProvider, + modelKey, + parseModelRef, + resolveDefaultModelForAgent, +} from "../agents/model-selection.js"; +import { loadConfig } from "../config/config.js"; import { buildAgentMainSessionKey, normalizeAgentId } from "../routing/session-key.js"; import { normalizeMessageChannel } from "../utils/message-channel.js"; +import { loadGatewayModelCatalog } from "./server-model-catalog.js"; export function getHeader(req: IncomingMessage, name: string): string | undefined { const raw = req.headers[name.toLowerCase()]; @@ -50,6 +59,51 @@ export function resolveAgentIdFromModel(model: string | undefined): string | und return normalizeAgentId(agentId); } +export async function resolveOpenAiCompatModelOverride(params: { + agentId: string; + model: string | undefined; +}): Promise<{ modelOverride?: string; errorMessage?: string }> { + const model = params.model; + const raw = model?.trim(); + if (!raw) { + return {}; + } + if (raw.toLowerCase() === "openclaw") { + return {}; + } + if (resolveAgentIdFromModel(raw)) { + return {}; + } + + const cfg = loadConfig(); + const defaultModelRef = resolveDefaultModelForAgent({ cfg, agentId: params.agentId }); + const defaultProvider = defaultModelRef.provider; + const parsed = parseModelRef(raw, defaultProvider); + if (!parsed) { + return { errorMessage: "Invalid `model`." }; + } + + const catalog = await loadGatewayModelCatalog(); + const allowed = buildAllowedModelSet({ + cfg, + catalog, + defaultProvider, + agentId: params.agentId, + }); + const normalized = modelKey(parsed.provider, parsed.model); + if ( + !isCliProvider(parsed.provider, cfg) && + !allowed.allowAny && + !allowed.allowedKeys.has(normalized) + ) { + return { + errorMessage: `Model '${normalized}' is not allowed for agent '${params.agentId}'.`, + }; + } + + return { modelOverride: raw }; +} + export function resolveAgentIdForRequest(params: { req: IncomingMessage; model: string | undefined; diff --git a/src/gateway/models-http.test.ts b/src/gateway/models-http.test.ts new file mode 100644 index 00000000000..4a70b08463a --- /dev/null +++ b/src/gateway/models-http.test.ts @@ -0,0 +1,76 @@ +import { afterAll, beforeAll, describe, expect, it } from "vitest"; +import { getFreePort, installGatewayTestHooks } from "./test-helpers.js"; + +installGatewayTestHooks({ scope: "suite" }); + +let startGatewayServer: typeof import("./server.js").startGatewayServer; +let enabledServer: Awaited>; +let enabledPort: number; + +beforeAll(async () => { + ({ startGatewayServer } = await import("./server.js")); + enabledPort = await getFreePort(); + enabledServer = await startServer(enabledPort, { openAiChatCompletionsEnabled: true }); +}); + +afterAll(async () => { + await enabledServer.close({ reason: "models http enabled suite done" }); +}); + +async function startServer(port: number, opts?: { openAiChatCompletionsEnabled?: boolean }) { + return await startGatewayServer(port, { + host: "127.0.0.1", + auth: { mode: "token", token: "secret" }, + controlUiEnabled: false, + openAiChatCompletionsEnabled: opts?.openAiChatCompletionsEnabled ?? false, + }); +} + +async function getModels(pathname: string, headers?: Record) { + return await fetch(`http://127.0.0.1:${enabledPort}${pathname}`, { + headers: { + authorization: "Bearer secret", + ...headers, + }, + }); +} + +describe("OpenAI-compatible models HTTP API (e2e)", () => { + it("serves /v1/models when compatibility endpoints are enabled", async () => { + const res = await getModels("/v1/models"); + expect(res.status).toBe(200); + const json = (await res.json()) as { object?: string; data?: Array<{ id?: string }> }; + expect(json.object).toBe("list"); + expect(Array.isArray(json.data)).toBe(true); + expect((json.data?.length ?? 0) > 0).toBe(true); + expect( + json.data?.every((entry) => typeof entry.id === "string" && entry.id?.includes("/")), + ).toBe(true); + }); + + it("serves /v1/models/{id}", async () => { + const list = (await (await getModels("/v1/models")).json()) as { + data?: Array<{ id?: string }>; + }; + const firstId = list.data?.[0]?.id; + expect(typeof firstId).toBe("string"); + const res = await getModels(`/v1/models/${encodeURIComponent(firstId!)}`); + expect(res.status).toBe(200); + const json = (await res.json()) as { id?: string; object?: string }; + expect(json.object).toBe("model"); + expect(json.id).toBe(firstId); + }); + + it("rejects when disabled", async () => { + const port = await getFreePort(); + const server = await startServer(port, { openAiChatCompletionsEnabled: false }); + try { + const res = await fetch(`http://127.0.0.1:${port}/v1/models`, { + headers: { authorization: "Bearer secret" }, + }); + expect(res.status).toBe(404); + } finally { + await server.close({ reason: "models disabled test done" }); + } + }); +}); diff --git a/src/gateway/models-http.ts b/src/gateway/models-http.ts new file mode 100644 index 00000000000..6d954e324ea --- /dev/null +++ b/src/gateway/models-http.ts @@ -0,0 +1,138 @@ +import type { IncomingMessage, ServerResponse } from "node:http"; +import { DEFAULT_PROVIDER } from "../agents/defaults.js"; +import type { ModelCatalogEntry } from "../agents/model-catalog.js"; +import { buildAllowedModelSet, modelKey, parseModelRef } from "../agents/model-selection.js"; +import { loadConfig } from "../config/config.js"; +import type { AuthRateLimiter } from "./auth-rate-limit.js"; +import type { ResolvedGatewayAuth } from "./auth.js"; +import { authorizeGatewayBearerRequestOrReply } from "./http-auth-helpers.js"; +import { sendInvalidRequest, sendJson, sendMethodNotAllowed } from "./http-common.js"; +import { resolveAgentIdForRequest } from "./http-utils.js"; +import { loadGatewayModelCatalog } from "./server-model-catalog.js"; + +type OpenAiModelsHttpOptions = { + auth: ResolvedGatewayAuth; + trustedProxies?: string[]; + allowRealIpFallback?: boolean; + rateLimiter?: AuthRateLimiter; +}; + +type OpenAiModelObject = { + id: string; + object: "model"; + created: number; + owned_by: string; + permission: []; + input?: ModelCatalogEntry["input"]; + context_window?: number; + reasoning?: boolean; +}; + +function toOpenAiModel(entry: ModelCatalogEntry): OpenAiModelObject { + return { + id: modelKey(entry.provider, entry.id), + object: "model", + created: 0, + owned_by: entry.provider, + permission: [], + ...(entry.input ? { input: entry.input } : {}), + ...(typeof entry.contextWindow === "number" ? { context_window: entry.contextWindow } : {}), + ...(typeof entry.reasoning === "boolean" ? { reasoning: entry.reasoning } : {}), + }; +} + +async function authorizeRequest( + req: IncomingMessage, + res: ServerResponse, + opts: OpenAiModelsHttpOptions, +): Promise { + return await authorizeGatewayBearerRequestOrReply({ + req, + res, + auth: opts.auth, + trustedProxies: opts.trustedProxies, + allowRealIpFallback: opts.allowRealIpFallback, + rateLimiter: opts.rateLimiter, + }); +} + +async function loadAllowedCatalog(req: IncomingMessage): Promise { + const cfg = loadConfig(); + const catalog = await loadGatewayModelCatalog(); + const agentId = resolveAgentIdForRequest({ req, model: undefined }); + const { allowedCatalog } = buildAllowedModelSet({ + cfg, + catalog, + defaultProvider: DEFAULT_PROVIDER, + agentId, + }); + return allowedCatalog.length > 0 ? allowedCatalog : catalog; +} + +function resolveRequestPath(req: IncomingMessage): string { + return new URL(req.url ?? "/", `http://${req.headers.host || "localhost"}`).pathname; +} + +export async function handleOpenAiModelsHttpRequest( + req: IncomingMessage, + res: ServerResponse, + opts: OpenAiModelsHttpOptions, +): Promise { + const requestPath = resolveRequestPath(req); + if (requestPath !== "/v1/models" && !requestPath.startsWith("/v1/models/")) { + return false; + } + + if (req.method !== "GET") { + sendMethodNotAllowed(res, "GET"); + return true; + } + + if (!(await authorizeRequest(req, res, opts))) { + return true; + } + + const catalog = await loadAllowedCatalog(req); + if (requestPath === "/v1/models") { + sendJson(res, 200, { + object: "list", + data: catalog.map(toOpenAiModel), + }); + return true; + } + + const encodedId = requestPath.slice("/v1/models/".length); + if (!encodedId) { + sendInvalidRequest(res, "Missing model id."); + return true; + } + + let decodedId: string; + try { + decodedId = decodeURIComponent(encodedId); + } catch { + sendInvalidRequest(res, "Invalid model id encoding."); + return true; + } + + const parsed = parseModelRef(decodedId, DEFAULT_PROVIDER); + if (!parsed) { + sendInvalidRequest(res, "Invalid model id."); + return true; + } + + const key = modelKey(parsed.provider, parsed.model); + const entry = catalog.find((item) => modelKey(item.provider, item.id) === key); + if (!entry) { + sendJson(res, 404, { + error: { + message: `Model '${decodedId}' not found.`, + type: "invalid_request_error", + }, + }); + return true; + } + + sendJson(res, 200, toOpenAiModel(entry)); + return true; +} diff --git a/src/gateway/openai-http.test.ts b/src/gateway/openai-http.test.ts index 82130807a1b..e9358f779c1 100644 --- a/src/gateway/openai-http.test.ts +++ b/src/gateway/openai-http.test.ts @@ -1,3 +1,5 @@ +import fs from "node:fs/promises"; +import path from "node:path"; import { afterAll, beforeAll, describe, expect, it } from "vitest"; import { HISTORY_CONTEXT_MARKER } from "../auto-reply/reply/history.js"; import { CURRENT_MESSAGE_MARKER } from "../auto-reply/reply/mentions.js"; @@ -45,6 +47,15 @@ async function startServer(port: number, opts?: { openAiChatCompletionsEnabled?: }); } +async function writeGatewayConfig(config: Record) { + const configPath = process.env.OPENCLAW_CONFIG_PATH; + if (!configPath) { + throw new Error("OPENCLAW_CONFIG_PATH is required for gateway config tests"); + } + await fs.mkdir(path.dirname(configPath), { recursive: true }); + await fs.writeFile(configPath, JSON.stringify(config, null, 2), "utf-8"); +} + async function postChatCompletions(port: number, body: unknown, headers?: Record) { const res = await fetch(`http://127.0.0.1:${port}/v1/chat/completions`, { method: "POST", @@ -244,6 +255,54 @@ describe("OpenAI-compatible HTTP API (e2e)", () => { await res.text(); } + { + mockAgentOnce([{ text: "hello" }]); + const res = await postChatCompletions(port, { + model: "openai/gpt-5.4", + messages: [{ role: "user", content: "hi" }], + }); + expect(res.status).toBe(200); + const opts = (agentCommand.mock.calls[0] as unknown[] | undefined)?.[0]; + expect((opts as { model?: string } | undefined)?.model).toBe("openai/gpt-5.4"); + await res.text(); + } + + { + await writeGatewayConfig({ + agents: { + defaults: { + model: { primary: "openai/gpt-5.4" }, + models: { + "openai/gpt-5.4": {}, + }, + }, + }, + }); + mockAgentOnce([{ text: "hello" }]); + const res = await postChatCompletions(port, { + model: "gpt-5.4", + messages: [{ role: "user", content: "hi" }], + }); + expect(res.status).toBe(200); + const opts = (agentCommand.mock.calls[0] as unknown[] | undefined)?.[0]; + expect((opts as { model?: string } | undefined)?.model).toBe("gpt-5.4"); + await res.text(); + await writeGatewayConfig({}); + } + + { + agentCommand.mockClear(); + const res = await postChatCompletions(port, { + model: "openai/", + messages: [{ role: "user", content: "hi" }], + }); + expect(res.status).toBe(400); + const json = (await res.json()) as { error?: { type?: string; message?: string } }; + expect(json.error?.type).toBe("invalid_request_error"); + expect(json.error?.message).toBe("Invalid `model`."); + expect(agentCommand).toHaveBeenCalledTimes(0); + } + { mockAgentOnce([{ text: "hello" }]); const res = await postChatCompletions(port, { diff --git a/src/gateway/openai-http.ts b/src/gateway/openai-http.ts index 5809da5bcee..783658e1d11 100644 --- a/src/gateway/openai-http.ts +++ b/src/gateway/openai-http.ts @@ -27,7 +27,7 @@ import type { AuthRateLimiter } from "./auth-rate-limit.js"; import type { ResolvedGatewayAuth } from "./auth.js"; import { sendJson, setSseHeaders, writeDone } from "./http-common.js"; import { handleGatewayPostJsonEndpoint } from "./http-endpoint-helpers.js"; -import { resolveGatewayRequestContext } from "./http-utils.js"; +import { resolveGatewayRequestContext, resolveOpenAiCompatModelOverride } from "./http-utils.js"; import { normalizeInputHostnameAllowlist } from "./input-allowlist.js"; type OpenAiHttpOptions = { @@ -102,6 +102,7 @@ function writeSse(res: ServerResponse, data: unknown) { function buildAgentCommandInput(params: { prompt: { message: string; extraSystemPrompt?: string; images?: ImageContent[] }; + modelOverride?: string; sessionKey: string; runId: string; messageChannel: string; @@ -110,6 +111,7 @@ function buildAgentCommandInput(params: { message: params.prompt.message, extraSystemPrompt: params.prompt.extraSystemPrompt, images: params.prompt.images, + model: params.modelOverride, sessionKey: params.sessionKey, runId: params.runId, deliver: false as const, @@ -432,7 +434,7 @@ export async function handleOpenAiHttpRequest( const model = typeof payload.model === "string" ? payload.model : "openclaw"; const user = typeof payload.user === "string" ? payload.user : undefined; - const { sessionKey, messageChannel } = resolveGatewayRequestContext({ + const { agentId, sessionKey, messageChannel } = resolveGatewayRequestContext({ req, model, user, @@ -440,6 +442,16 @@ export async function handleOpenAiHttpRequest( defaultMessageChannel: "webchat", useMessageChannelHeader: true, }); + const { modelOverride, errorMessage: modelError } = await resolveOpenAiCompatModelOverride({ + agentId, + model, + }); + if (modelError) { + sendJson(res, 400, { + error: { message: modelError, type: "invalid_request_error" }, + }); + return true; + } const activeTurnContext = resolveActiveTurnContext(payload.messages); const prompt = buildAgentPrompt(payload.messages, activeTurnContext.activeUserMessageIndex); let images: ImageContent[] = []; @@ -474,6 +486,7 @@ export async function handleOpenAiHttpRequest( extraSystemPrompt: prompt.extraSystemPrompt, images: images.length > 0 ? images : undefined, }, + modelOverride, sessionKey, runId, messageChannel, diff --git a/src/gateway/openresponses-http.test.ts b/src/gateway/openresponses-http.test.ts index b7549339dc7..79341e2b00c 100644 --- a/src/gateway/openresponses-http.test.ts +++ b/src/gateway/openresponses-http.test.ts @@ -17,12 +17,12 @@ let openResponsesTesting: { responseId: string, sessionKey: string, now: number, - scope?: { agentId: string; user?: string; requestedSessionKey?: string }, + scope?: { authSubject: string; agentId: string; requestedSessionKey?: string }, ): void; lookupResponseSessionAt( responseId: string | undefined, now: number, - scope?: { agentId: string; user?: string; requestedSessionKey?: string }, + scope?: { authSubject: string; agentId: string; requestedSessionKey?: string }, ): string | undefined; getResponseSessionIds(): string[]; }; @@ -233,6 +233,17 @@ describe("OpenResponses HTTP API (e2e)", () => { ); await ensureResponseConsumed(resMissingModel); + agentCommand.mockClear(); + const resInvalidModel = await postResponses(port, { model: "openai/", input: "hi" }); + expect(resInvalidModel.status).toBe(400); + const invalidModelJson = (await resInvalidModel.json()) as { + error?: { type?: string; message?: string }; + }; + expect(invalidModelJson.error?.type).toBe("invalid_request_error"); + expect(invalidModelJson.error?.message).toBe("Invalid `model`."); + expect(agentCommand).toHaveBeenCalledTimes(0); + await ensureResponseConsumed(resInvalidModel); + mockAgentOnce([{ text: "hello" }]); const resHeader = await postResponses( port, @@ -267,10 +278,22 @@ describe("OpenResponses HTTP API (e2e)", () => { expect(resChannelHeader.status).toBe(200); const optsChannelHeader = (agentCommand.mock.calls[0] as unknown[] | undefined)?.[0]; expect((optsChannelHeader as { messageChannel?: string } | undefined)?.messageChannel).toBe( - "webchat", + "custom-client-channel", ); await ensureResponseConsumed(resChannelHeader); + mockAgentOnce([{ text: "hello" }]); + const resModelOverride = await postResponses(port, { + model: "openai/text-embedding-3-small", + input: "hi", + }); + expect(resModelOverride.status).toBe(200); + const optsModelOverride = (agentCommand.mock.calls[0] as unknown[] | undefined)?.[0]; + expect((optsModelOverride as { model?: string } | undefined)?.model).toBe( + "openai/text-embedding-3-small", + ); + await ensureResponseConsumed(resModelOverride); + mockAgentOnce([{ text: "hello" }]); const resUser = await postResponses(port, { user: "alice", @@ -777,7 +800,7 @@ describe("OpenResponses HTTP API (e2e)", () => { await ensureResponseConsumed(secondResponse); }); - it("does not reuse prior sessions across different user scopes", async () => { + it("reuses prior sessions across different user values when auth scope matches", async () => { const port = enabledPort; agentCommand.mockClear(); agentCommand.mockResolvedValueOnce({ @@ -812,8 +835,7 @@ describe("OpenResponses HTTP API (e2e)", () => { const secondOpts = (agentCommand.mock.calls[1] as unknown[] | undefined)?.[0] as | { sessionKey?: string } | undefined; - expect(secondOpts?.sessionKey).not.toBe(firstOpts?.sessionKey); - expect(secondOpts?.sessionKey ?? "").toContain("openresponses-user:bob"); + expect(secondOpts?.sessionKey).toBe(firstOpts?.sessionKey); await ensureResponseConsumed(secondResponse); }); @@ -864,22 +886,22 @@ describe("OpenResponses HTTP API (e2e)", () => { expect(openResponsesTesting.lookupResponseSessionAt("resp_504", 505)).toBe("session_504"); }); - it("does not reuse cached sessions when the user scope changes", () => { + it("does not reuse cached sessions when the auth subject changes", () => { openResponsesTesting.storeResponseSessionAt("resp_1", "session_1", 100, { + authSubject: "subject:a", agentId: "main", - user: "alice", }); expect( openResponsesTesting.lookupResponseSessionAt("resp_1", 101, { + authSubject: "subject:a", agentId: "main", - user: "alice", }), ).toBe("session_1"); expect( openResponsesTesting.lookupResponseSessionAt("resp_1", 101, { + authSubject: "subject:b", agentId: "main", - user: "bob", }), ).toBeUndefined(); }); diff --git a/src/gateway/openresponses-http.ts b/src/gateway/openresponses-http.ts index c23388c3d68..80d2b791450 100644 --- a/src/gateway/openresponses-http.ts +++ b/src/gateway/openresponses-http.ts @@ -6,7 +6,7 @@ * @see https://www.open-responses.com/ */ -import { randomUUID } from "node:crypto"; +import { createHash, randomUUID } from "node:crypto"; import type { IncomingMessage, ServerResponse } from "node:http"; import type { ImageContent } from "../agents/command/types.js"; import type { ClientToolDefinition } from "../agents/pi-embedded-runner/run/params.js"; @@ -35,7 +35,13 @@ import type { AuthRateLimiter } from "./auth-rate-limit.js"; import type { ResolvedGatewayAuth } from "./auth.js"; import { sendJson, setSseHeaders, writeDone } from "./http-common.js"; import { handleGatewayPostJsonEndpoint } from "./http-endpoint-helpers.js"; -import { getHeader, resolveGatewayRequestContext } from "./http-utils.js"; +import { + getBearerToken, + getHeader, + resolveAgentIdForRequest, + resolveGatewayRequestContext, + resolveOpenAiCompatModelOverride, +} from "./http-utils.js"; import { normalizeInputHostnameAllowlist } from "./input-allowlist.js"; import { CreateResponseBodySchema, @@ -64,8 +70,8 @@ const DEFAULT_MAX_URL_PARTS = 8; const RESPONSE_SESSION_TTL_MS = 30 * 60 * 1000; const MAX_RESPONSE_SESSION_ENTRIES = 500; type ResponseSessionScope = { + authSubject: string; agentId: string; - user?: string; requestedSessionKey?: string; }; @@ -77,23 +83,40 @@ type ResponseSessionEntry = ResponseSessionScope & { const responseSessionMap = new Map(); function normalizeResponseSessionScope(scope: ResponseSessionScope): ResponseSessionScope { - const user = scope.user?.trim(); + const authSubject = scope.authSubject.trim(); const requestedSessionKey = scope.requestedSessionKey?.trim(); return { + authSubject, agentId: scope.agentId, - user: user || undefined, requestedSessionKey: requestedSessionKey || undefined, }; } +function resolveResponseSessionAuthSubject(params: { + req: IncomingMessage; + auth: ResolvedGatewayAuth; +}): string { + const bearer = getBearerToken(params.req); + if (bearer) { + return `bearer:${createHash("sha256").update(bearer).digest("hex")}`; + } + if (params.auth.mode === "trusted-proxy" && params.auth.trustedProxy?.userHeader) { + const user = getHeader(params.req, params.auth.trustedProxy.userHeader)?.trim(); + if (user) { + return `trusted-proxy:${user}`; + } + } + return `gateway-auth:${params.auth.mode}`; +} + function createResponseSessionScope(params: { req: IncomingMessage; + auth: ResolvedGatewayAuth; agentId: string; - user?: string; }): ResponseSessionScope { return normalizeResponseSessionScope({ + authSubject: resolveResponseSessionAuthSubject({ req: params.req, auth: params.auth }), agentId: params.agentId, - user: params.user, requestedSessionKey: getHeader(params.req, "x-openclaw-session-key"), }); } @@ -103,8 +126,8 @@ function matchesResponseSessionScope( scope: ResponseSessionScope, ): boolean { return ( + entry.authSubject === scope.authSubject && entry.agentId === scope.agentId && - entry.user === scope.user && entry.requestedSessionKey === scope.requestedSessionKey ); } @@ -176,14 +199,14 @@ export const __testing = { responseId: string, sessionKey: string, now: number, - scope: ResponseSessionScope = { agentId: "main" }, + scope: ResponseSessionScope = { authSubject: "test", agentId: "main" }, ) { storeResponseSession(responseId, sessionKey, normalizeResponseSessionScope(scope), now); }, lookupResponseSessionAt( responseId: string | undefined, now: number, - scope: ResponseSessionScope = { agentId: "main" }, + scope: ResponseSessionScope = { authSubject: "test", agentId: "main" }, ) { return lookupResponseSession(responseId, normalizeResponseSessionScope(scope), now); }, @@ -387,6 +410,7 @@ async function runResponsesAgentCommand(params: { images: ImageContent[]; clientTools: ClientToolDefinition[]; extraSystemPrompt: string; + modelOverride?: string; streamParams: { maxTokens: number } | undefined; sessionKey: string; runId: string; @@ -399,6 +423,7 @@ async function runResponsesAgentCommand(params: { images: params.images.length > 0 ? params.images : undefined, clientTools: params.clientTools.length > 0 ? params.clientTools : undefined, extraSystemPrompt: params.extraSystemPrompt || undefined, + model: params.modelOverride, streamParams: params.streamParams ?? undefined, sessionKey: params.sessionKey, runId: params.runId, @@ -455,6 +480,17 @@ export async function handleOpenResponsesHttpRequest( const stream = Boolean(payload.stream); const model = payload.model; const user = payload.user; + const agentId = resolveAgentIdForRequest({ req, model }); + const { modelOverride, errorMessage: modelError } = await resolveOpenAiCompatModelOverride({ + agentId, + model, + }); + if (modelError) { + sendJson(res, 400, { + error: { message: modelError, type: "invalid_request_error" }, + }); + return true; + } // Extract images + files from input (Phase 2) let images: ImageContent[] = []; @@ -593,15 +629,15 @@ export async function handleOpenResponsesHttpRequest( user, sessionPrefix: "openresponses", defaultMessageChannel: "webchat", - useMessageChannelHeader: false, + useMessageChannelHeader: true, }); const responseSessionScope = createResponseSessionScope({ req, + auth: opts.auth, agentId: resolved.agentId, - user, }); // Resolve session key: reuse previous_response_id only when it matches the - // same agent/user/requested-session scope as the current request. + // same auth-subject/agent/requested-session scope as the current request. const previousSessionKey = lookupResponseSession( payload.previous_response_id, responseSessionScope, @@ -652,6 +688,7 @@ export async function handleOpenResponsesHttpRequest( images, clientTools: resolvedClientTools, extraSystemPrompt, + modelOverride, streamParams, sessionKey, runId: responseId, @@ -903,6 +940,7 @@ export async function handleOpenResponsesHttpRequest( images, clientTools: resolvedClientTools, extraSystemPrompt, + modelOverride, streamParams, sessionKey, runId: responseId, diff --git a/src/gateway/server-http.ts b/src/gateway/server-http.ts index 4f3f6aa40ff..d426e1223c4 100644 --- a/src/gateway/server-http.ts +++ b/src/gateway/server-http.ts @@ -34,6 +34,7 @@ import { handleControlUiHttpRequest, type ControlUiRootState, } from "./control-ui.js"; +import { handleOpenAiEmbeddingsHttpRequest } from "./embeddings-http.js"; import { applyHookMappings } from "./hooks-mapping.js"; import { extractHookToken, @@ -55,6 +56,7 @@ import { } from "./hooks.js"; import { sendGatewayAuthFailure, setDefaultSecurityHeaders } from "./http-common.js"; import { getBearerToken } from "./http-utils.js"; +import { handleOpenAiModelsHttpRequest } from "./models-http.js"; import { resolveRequestClientIp } from "./net.js"; import { handleOpenAiHttpRequest } from "./openai-http.js"; import { handleOpenResponsesHttpRequest } from "./openresponses-http.js"; @@ -772,6 +774,7 @@ export function createGatewayHttpServer(opts: { rateLimiter, getReadiness, } = opts; + const openAiCompatEnabled = openAiChatCompletionsEnabled || openResponsesEnabled; const httpServer: HttpServer = opts.tlsOptions ? createHttpsServer(opts.tlsOptions, (req, res) => { void handleRequest(req, res); @@ -812,6 +815,30 @@ export function createGatewayHttpServer(opts: { name: "hooks", run: () => handleHooksRequest(req, res), }, + { + name: "models", + run: () => + openAiCompatEnabled + ? handleOpenAiModelsHttpRequest(req, res, { + auth: resolvedAuth, + trustedProxies, + allowRealIpFallback, + rateLimiter, + }) + : false, + }, + { + name: "embeddings", + run: () => + openAiCompatEnabled + ? handleOpenAiEmbeddingsHttpRequest(req, res, { + auth: resolvedAuth, + trustedProxies, + allowRealIpFallback, + rateLimiter, + }) + : false, + }, { name: "tools-invoke", run: () =>