diff --git a/packages/types/src/message/common/metadata.ts b/packages/types/src/message/common/metadata.ts index a5a1f9aa77..d46bc13f49 100644 --- a/packages/types/src/message/common/metadata.ts +++ b/packages/types/src/message/common/metadata.ts @@ -105,6 +105,7 @@ export const MessageMetadataSchema = ModelUsageSchema.merge(ModelPerformanceSche reactions: z.array(EmojiReactionSchema).optional(), scope: z.string().optional(), subAgentId: z.string().optional(), + toolExecutionTimeMs: z.number().optional(), }); export interface ModelUsage extends ModelTokensUsage { @@ -193,5 +194,9 @@ export interface MessageMetadata extends ModelUsage, ModelPerformance { taskTitle?: string; // message content is multimodal, display content in the streaming, won't save to db tempDisplayContent?: string; + /** + * Tool execution time for tool messages (ms) + */ + toolExecutionTimeMs?: number; usage?: ModelUsage; } diff --git a/src/app/(backend)/api/workflows/agent-eval-run/run-agent-trajectory/route.ts b/src/app/(backend)/api/workflows/agent-eval-run/run-agent-trajectory/route.ts index a06f0d0151..dca583dc50 100644 --- a/src/app/(backend)/api/workflows/agent-eval-run/run-agent-trajectory/route.ts +++ b/src/app/(backend)/api/workflows/agent-eval-run/run-agent-trajectory/route.ts @@ -112,7 +112,7 @@ export const { POST } = serve( flowControl: { key: 'agent-eval-run.run-agent-trajectory', parallelism: 500, - ratePerSecond: 10, + ratePerSecond: 20, }, qstashClient, }, diff --git a/src/app/(backend)/api/workflows/agent-eval-run/run-thread-trajectory/route.ts b/src/app/(backend)/api/workflows/agent-eval-run/run-thread-trajectory/route.ts index dd2cacfbf1..430ec49579 100644 --- a/src/app/(backend)/api/workflows/agent-eval-run/run-thread-trajectory/route.ts +++ b/src/app/(backend)/api/workflows/agent-eval-run/run-thread-trajectory/route.ts @@ -98,7 +98,7 @@ export const { POST } = serve( flowControl: { key: 'agent-eval-run.run-thread-trajectory', parallelism: 500, - ratePerSecond: 10, + ratePerSecond: 20, }, qstashClient, }, diff --git a/src/server/modules/AgentRuntime/RuntimeExecutors.ts b/src/server/modules/AgentRuntime/RuntimeExecutors.ts index 18ac87358f..3d56b2a6e9 100644 --- a/src/server/modules/AgentRuntime/RuntimeExecutors.ts +++ b/src/server/modules/AgentRuntime/RuntimeExecutors.ts @@ -666,6 +666,7 @@ export const createRuntimeExecutors = ( const toolMessage = await ctx.messageModel.create({ agentId: state.metadata!.agentId!, content: executionResult.content, + metadata: { toolExecutionTimeMs: executionTime }, parentId: payload.parentMessageId, plugin: chatToolPayload as any, pluginError: executionResult.error, @@ -882,6 +883,7 @@ export const createRuntimeExecutors = ( const toolMessage = await ctx.messageModel.create({ agentId: state.metadata!.agentId!, content: executionResult.content, + metadata: { toolExecutionTimeMs: executionTime }, parentId: parentMessageId, plugin: chatToolPayload as any, pluginError: executionResult.error, diff --git a/src/server/modules/AgentRuntime/__tests__/RuntimeExecutors.test.ts b/src/server/modules/AgentRuntime/__tests__/RuntimeExecutors.test.ts index cb3c79b94f..2b28cdeb5d 100644 --- a/src/server/modules/AgentRuntime/__tests__/RuntimeExecutors.test.ts +++ b/src/server/modules/AgentRuntime/__tests__/RuntimeExecutors.test.ts @@ -836,6 +836,35 @@ describe('RuntimeExecutors', () => { ); }); + it('should persist tool execution time in metadata when creating tool message', async () => { + const executors = createRuntimeExecutors(ctx); + const state = createMockState(); + + const instruction = { + payload: { + parentMessageId: 'assistant-msg-456', + toolCalling: { + apiName: 'crawl', + arguments: '{"url": "https://example.com"}', + id: 'tool-call-2', + identifier: 'web-browsing', + type: 'default' as const, + }, + }, + type: 'call_tool' as const, + }; + + await executors.call_tool!(instruction, state); + + expect(mockMessageModel.create).toHaveBeenCalledWith( + expect.objectContaining({ + metadata: { + toolExecutionTimeMs: 100, + }, + }), + ); + }); + it('should return tool message ID as parentMessageId in nextContext for parentId chain', async () => { // Setup: mock messageModel.create to return a specific tool message ID const toolMessageId = 'tool-msg-789'; @@ -1553,6 +1582,69 @@ describe('RuntimeExecutors', () => { expect(state.usage.tools.totalCalls).toBe(0); }); + it('should persist execution time metadata for each tool message in batch execution', async () => { + mockToolExecutionService.executeTool + .mockResolvedValueOnce({ + content: 'Search result', + error: null, + executionTime: 150, + state: {}, + success: true, + }) + .mockResolvedValueOnce({ + content: 'Crawl result', + error: null, + executionTime: 250, + state: {}, + success: true, + }); + + const executors = createRuntimeExecutors(ctx); + const state = createMockState(); + + const instruction = { + payload: { + parentMessageId: 'assistant-msg-123', + toolsCalling: [ + { + apiName: 'search', + arguments: '{"query": "test"}', + id: 'tool-call-1', + identifier: 'web-search', + type: 'default' as const, + }, + { + apiName: 'crawl', + arguments: '{"url": "https://example.com"}', + id: 'tool-call-2', + identifier: 'web-browsing', + type: 'default' as const, + }, + ], + }, + type: 'call_tools_batch' as const, + }; + + await executors.call_tools_batch!(instruction, state); + + expect(mockMessageModel.create).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ + metadata: { + toolExecutionTimeMs: 150, + }, + }), + ); + expect(mockMessageModel.create).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ + metadata: { + toolExecutionTimeMs: 250, + }, + }), + ); + }); + it('should pass toolResultMaxLength from agentConfig to executeTool', async () => { const executors = createRuntimeExecutors(ctx); const state = createMockState({ diff --git a/src/server/routers/lambda/agentEval.ts b/src/server/routers/lambda/agentEval.ts index aa2199253c..dbbc2ad13c 100644 --- a/src/server/routers/lambda/agentEval.ts +++ b/src/server/routers/lambda/agentEval.ts @@ -40,9 +40,13 @@ const evalConfigSchema = z.object({ judgePrompt: z.string().optional() }).passth const evalRunInputConfigSchema = z.object({ k: z.number().min(1).max(10).optional(), - maxConcurrency: z.number().min(1).max(10).optional(), + maxConcurrency: z.number().min(1).max(20).optional(), maxSteps: z.number().min(1).max(1000).optional(), - timeout: z.number().min(60_000).max(3_600_000).optional(), + timeout: z + .number() + .min(60_000) + .max(6 * 3_600_000) + .optional(), }); const agentEvalProcedure = authedProcedure.use(serverDatabase).use(async (opts) => { diff --git a/src/server/services/search/impls/exa/index.test.ts b/src/server/services/search/impls/exa/index.test.ts index 8be636eeb9..67b679c7ec 100644 --- a/src/server/services/search/impls/exa/index.test.ts +++ b/src/server/services/search/impls/exa/index.test.ts @@ -178,7 +178,7 @@ describe('ExaImpl', () => { const body = JSON.parse((vi.mocked(fetch).mock.calls[0][1] as RequestInit).body as string); expect(body.query).toBe('my search query'); - expect(body.numResults).toBe(15); + expect(body.numResults).toBe(10); expect(body.type).toBe('auto'); }); diff --git a/src/server/services/search/impls/exa/index.ts b/src/server/services/search/impls/exa/index.ts index 537c3e369b..397af0ce4c 100644 --- a/src/server/services/search/impls/exa/index.ts +++ b/src/server/services/search/impls/exa/index.ts @@ -31,7 +31,7 @@ export class ExaImpl implements SearchServiceImpl { const endpoint = urlJoin(this.baseUrl, '/search'); const defaultQueryParams: ExaSearchParameters = { - numResults: 15, + numResults: 10, query, type: 'auto', };