fix benchmark table issue

This commit is contained in:
arvinxx
2026-02-28 01:01:29 +08:00
parent 687b36c81c
commit 0eccc86b90
7 changed files with 12445 additions and 23 deletions

View File

@@ -110,14 +110,16 @@ table agent_eval_benchmarks {
rubrics jsonb [not null]
reference_url text
metadata jsonb
user_id text
is_system boolean [not null, default: true]
accessed_at "timestamp with time zone" [not null, default: `now()`]
created_at "timestamp with time zone" [not null, default: `now()`]
updated_at "timestamp with time zone" [not null, default: `now()`]
indexes {
identifier [name: 'agent_eval_benchmarks_identifier_unique', unique]
(identifier, user_id) [name: 'agent_eval_benchmarks_identifier_user_id_unique', unique]
is_system [name: 'agent_eval_benchmarks_is_system_idx']
user_id [name: 'agent_eval_benchmarks_user_id_idx']
}
}

View File

@@ -0,0 +1,5 @@
DROP INDEX "agent_eval_benchmarks_identifier_unique";--> statement-breakpoint
ALTER TABLE "agent_eval_benchmarks" ADD COLUMN "user_id" text;--> statement-breakpoint
ALTER TABLE "agent_eval_benchmarks" ADD CONSTRAINT "agent_eval_benchmarks_user_id_users_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."users"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
CREATE UNIQUE INDEX "agent_eval_benchmarks_identifier_user_id_unique" ON "agent_eval_benchmarks" USING btree ("identifier","user_id");--> statement-breakpoint
CREATE INDEX "agent_eval_benchmarks_user_id_idx" ON "agent_eval_benchmarks" USING btree ("user_id");

File diff suppressed because it is too large Load Diff

View File

@@ -616,6 +616,13 @@
"when": 1771386090928,
"tag": "0087_add_eval_benchmark",
"breakpoints": true
},
{
"idx": 88,
"version": "7",
"when": 1772186403999,
"tag": "0088_smiling_newton_destine",
"breakpoints": true
}
],
"version": "6"

View File

@@ -16,6 +16,7 @@ const serverDB = await getTestDB();
const userId = 'benchmark-test-user';
const userId2 = 'benchmark-test-user-2';
const benchmarkModel = new AgentEvalBenchmarkModel(serverDB, userId);
const benchmarkModel2 = new AgentEvalBenchmarkModel(serverDB, userId2);
beforeEach(async () => {
await serverDB.delete(agentEvalRuns);
@@ -38,7 +39,7 @@ afterEach(async () => {
describe('AgentEvalBenchmarkModel', () => {
describe('create', () => {
it('should create a new benchmark', async () => {
it('should create a new benchmark with userId', async () => {
const params = {
identifier: 'test-benchmark',
name: 'Test Benchmark',
@@ -68,6 +69,7 @@ describe('AgentEvalBenchmarkModel', () => {
expect(result.referenceUrl).toBe('https://example.com');
expect(result.metadata).toEqual({ version: 1 });
expect(result.isSystem).toBe(false);
expect(result.userId).toBe(userId);
expect(result.createdAt).toBeDefined();
expect(result.updatedAt).toBeDefined();
});
@@ -84,6 +86,7 @@ describe('AgentEvalBenchmarkModel', () => {
expect(result.isSystem).toBe(true);
expect(result.identifier).toBe('system-benchmark');
expect(result.userId).toBe(userId);
});
});
@@ -95,7 +98,7 @@ describe('AgentEvalBenchmarkModel', () => {
identifier: 'delete-test',
name: 'Delete Test',
rubrics: [],
userId,
isSystem: false,
})
.returning();
@@ -115,7 +118,7 @@ describe('AgentEvalBenchmarkModel', () => {
identifier: 'system-benchmark',
name: 'System Benchmark',
rubrics: [],
userId,
isSystem: true,
})
.returning();
@@ -128,6 +131,26 @@ describe('AgentEvalBenchmarkModel', () => {
expect(stillExists).toBeDefined();
});
it("should not delete another user's benchmark", async () => {
const [benchmark] = await serverDB
.insert(agentEvalBenchmarks)
.values({
identifier: 'other-user-benchmark',
name: 'Other User Benchmark',
rubrics: [],
userId: userId2,
isSystem: false,
})
.returning();
await benchmarkModel.delete(benchmark.id);
const stillExists = await serverDB.query.agentEvalBenchmarks.findFirst({
where: eq(agentEvalBenchmarks.id, benchmark.id),
});
expect(stillExists).toBeDefined();
});
it('should return 0 rowCount when benchmark not found', async () => {
await benchmarkModel.delete('non-existent-id');
// No rowCount in PGlite, just verify no error
@@ -141,33 +164,41 @@ describe('AgentEvalBenchmarkModel', () => {
identifier: 'system-1',
name: 'System 1',
rubrics: [],
userId: null,
isSystem: true,
},
{
identifier: 'user-1',
name: 'User 1',
rubrics: [],
userId,
isSystem: false,
},
{
identifier: 'system-2',
name: 'System 2',
rubrics: [],
userId: null,
isSystem: true,
},
{
identifier: 'other-user-1',
name: 'Other User 1',
rubrics: [],
userId: userId2,
isSystem: false,
},
]);
});
it('should query all benchmarks including system', async () => {
it('should query own + system benchmarks including system', async () => {
const results = await benchmarkModel.query(true);
expect(results).toHaveLength(3);
expect(results.map((r) => r.identifier)).toContain('system-1');
expect(results.map((r) => r.identifier)).toContain('user-1');
expect(results.map((r) => r.identifier)).toContain('system-2');
expect(results.map((r) => r.identifier)).not.toContain('other-user-1');
});
it('should query only user-created benchmarks', async () => {
@@ -184,6 +215,13 @@ describe('AgentEvalBenchmarkModel', () => {
expect(results).toHaveLength(3);
});
it("should not return other user's benchmarks", async () => {
const results = await benchmarkModel.query(true);
const identifiers = results.map((r) => r.identifier);
expect(identifiers).not.toContain('other-user-1');
});
it('should order by createdAt descending', async () => {
const results = await benchmarkModel.query(true);
@@ -347,14 +385,14 @@ describe('AgentEvalBenchmarkModel', () => {
});
describe('findById', () => {
it('should find a benchmark by id', async () => {
it('should find own benchmark by id', async () => {
const [benchmark] = await serverDB
.insert(agentEvalBenchmarks)
.values({
identifier: 'find-test',
name: 'Find Test',
rubrics: [],
userId,
isSystem: false,
})
.returning();
@@ -366,6 +404,41 @@ describe('AgentEvalBenchmarkModel', () => {
expect(result?.identifier).toBe('find-test');
});
it('should find system benchmark (null userId) by id', async () => {
const [benchmark] = await serverDB
.insert(agentEvalBenchmarks)
.values({
identifier: 'system-find-test',
name: 'System Find Test',
rubrics: [],
userId: null,
isSystem: true,
})
.returning();
const result = await benchmarkModel.findById(benchmark.id);
expect(result).toBeDefined();
expect(result?.id).toBe(benchmark.id);
});
it("should not find another user's benchmark by id", async () => {
const [benchmark] = await serverDB
.insert(agentEvalBenchmarks)
.values({
identifier: 'other-find-test',
name: 'Other Find Test',
rubrics: [],
userId: userId2,
isSystem: false,
})
.returning();
const result = await benchmarkModel.findById(benchmark.id);
expect(result).toBeUndefined();
});
it('should return undefined when benchmark not found', async () => {
const result = await benchmarkModel.findById('non-existent-id');
expect(result).toBeUndefined();
@@ -373,11 +446,12 @@ describe('AgentEvalBenchmarkModel', () => {
});
describe('findByIdentifier', () => {
it('should find a benchmark by identifier', async () => {
it('should find own benchmark by identifier', async () => {
await serverDB.insert(agentEvalBenchmarks).values({
identifier: 'unique-identifier',
name: 'Unique Test',
rubrics: [],
userId,
isSystem: false,
});
@@ -388,6 +462,35 @@ describe('AgentEvalBenchmarkModel', () => {
expect(result?.name).toBe('Unique Test');
});
it('should find system benchmark (null userId) by identifier', async () => {
await serverDB.insert(agentEvalBenchmarks).values({
identifier: 'system-identifier',
name: 'System Test',
rubrics: [],
userId: null,
isSystem: true,
});
const result = await benchmarkModel.findByIdentifier('system-identifier');
expect(result).toBeDefined();
expect(result?.identifier).toBe('system-identifier');
});
it("should not find another user's benchmark by identifier", async () => {
await serverDB.insert(agentEvalBenchmarks).values({
identifier: 'other-identifier',
name: 'Other Test',
rubrics: [],
userId: userId2,
isSystem: false,
});
const result = await benchmarkModel.findByIdentifier('other-identifier');
expect(result).toBeUndefined();
});
it('should return undefined when identifier not found', async () => {
const result = await benchmarkModel.findByIdentifier('non-existent');
expect(result).toBeUndefined();
@@ -402,7 +505,7 @@ describe('AgentEvalBenchmarkModel', () => {
identifier: 'update-test',
name: 'Original Name',
rubrics: [],
userId,
isSystem: false,
})
.returning();
@@ -426,7 +529,7 @@ describe('AgentEvalBenchmarkModel', () => {
identifier: 'system-benchmark',
name: 'System Benchmark',
rubrics: [],
userId,
isSystem: true,
})
.returning();
@@ -437,10 +540,36 @@ describe('AgentEvalBenchmarkModel', () => {
expect(result).toBeUndefined();
const unchanged = await benchmarkModel.findById(systemBenchmark.id);
const unchanged = await serverDB.query.agentEvalBenchmarks.findFirst({
where: eq(agentEvalBenchmarks.id, systemBenchmark.id),
});
expect(unchanged?.name).toBe('System Benchmark');
});
it("should not update another user's benchmark", async () => {
const [benchmark] = await serverDB
.insert(agentEvalBenchmarks)
.values({
identifier: 'other-update-test',
name: 'Other User Benchmark',
rubrics: [],
userId: userId2,
isSystem: false,
})
.returning();
const result = await benchmarkModel.update(benchmark.id, {
name: 'Attempted Update',
});
expect(result).toBeUndefined();
const unchanged = await serverDB.query.agentEvalBenchmarks.findFirst({
where: eq(agentEvalBenchmarks.id, benchmark.id),
});
expect(unchanged?.name).toBe('Other User Benchmark');
});
it('should return undefined when benchmark not found', async () => {
const result = await benchmarkModel.update('non-existent-id', {
name: 'New Name',
@@ -457,7 +586,7 @@ describe('AgentEvalBenchmarkModel', () => {
name: 'Original',
description: 'Original Desc',
rubrics: [],
userId,
isSystem: false,
})
.returning();

View File

@@ -1,4 +1,4 @@
import { and, count, desc, eq, getTableColumns, sql } from 'drizzle-orm';
import { and, count, desc, eq, getTableColumns, isNull, or, sql } from 'drizzle-orm';
import {
agentEvalBenchmarks,
@@ -22,7 +22,10 @@ export class AgentEvalBenchmarkModel {
* Create a new benchmark
*/
create = async (params: NewAgentEvalBenchmark) => {
const [result] = await this.db.insert(agentEvalBenchmarks).values(params).returning();
const [result] = await this.db
.insert(agentEvalBenchmarks)
.values({ ...params, userId: this.userId })
.returning();
return result;
};
@@ -32,7 +35,13 @@ export class AgentEvalBenchmarkModel {
delete = async (id: string) => {
return this.db
.delete(agentEvalBenchmarks)
.where(and(eq(agentEvalBenchmarks.id, id), eq(agentEvalBenchmarks.isSystem, false)));
.where(
and(
eq(agentEvalBenchmarks.id, id),
eq(agentEvalBenchmarks.isSystem, false),
eq(agentEvalBenchmarks.userId, this.userId),
),
);
};
/**
@@ -40,7 +49,13 @@ export class AgentEvalBenchmarkModel {
* @param includeSystem - Whether to include system benchmarks (default: true)
*/
query = async (includeSystem = true) => {
const conditions = includeSystem ? undefined : eq(agentEvalBenchmarks.isSystem, false);
const userCondition = or(
eq(agentEvalBenchmarks.userId, this.userId),
isNull(agentEvalBenchmarks.userId),
);
const conditions = includeSystem
? userCondition
: and(eq(agentEvalBenchmarks.isSystem, false), userCondition);
const datasetCountSq = this.db
.select({
@@ -129,7 +144,12 @@ export class AgentEvalBenchmarkModel {
const [result] = await this.db
.select()
.from(agentEvalBenchmarks)
.where(eq(agentEvalBenchmarks.id, id))
.where(
and(
eq(agentEvalBenchmarks.id, id),
or(eq(agentEvalBenchmarks.userId, this.userId), isNull(agentEvalBenchmarks.userId)),
),
)
.limit(1);
return result;
};
@@ -141,7 +161,12 @@ export class AgentEvalBenchmarkModel {
const [result] = await this.db
.select()
.from(agentEvalBenchmarks)
.where(eq(agentEvalBenchmarks.identifier, identifier))
.where(
and(
eq(agentEvalBenchmarks.identifier, identifier),
or(eq(agentEvalBenchmarks.userId, this.userId), isNull(agentEvalBenchmarks.userId)),
),
)
.limit(1);
return result;
};
@@ -153,7 +178,13 @@ export class AgentEvalBenchmarkModel {
const [result] = await this.db
.update(agentEvalBenchmarks)
.set({ ...value, updatedAt: new Date() })
.where(and(eq(agentEvalBenchmarks.id, id), eq(agentEvalBenchmarks.isSystem, false)))
.where(
and(
eq(agentEvalBenchmarks.id, id),
eq(agentEvalBenchmarks.isSystem, false),
eq(agentEvalBenchmarks.userId, this.userId),
),
)
.returning();
return result;
};

View File

@@ -65,13 +65,16 @@ export const agentEvalBenchmarks = pgTable(
metadata: jsonb('metadata').$type<Record<string, unknown>>(),
userId: text('user_id').references(() => users.id, { onDelete: 'cascade' }),
isSystem: boolean('is_system').default(true).notNull(),
...timestamps,
},
(t) => [
uniqueIndex('agent_eval_benchmarks_identifier_unique').on(t.identifier),
uniqueIndex('agent_eval_benchmarks_identifier_user_id_unique').on(t.identifier, t.userId),
index('agent_eval_benchmarks_is_system_idx').on(t.isSystem),
index('agent_eval_benchmarks_user_id_idx').on(t.userId),
],
);