fix benchmark table issue

2026-03-27 13:29:15 +07:00 · 2026-02-28 01:01:29 +08:00
parent 687b36c81c
commit 0eccc86b90
7 changed files with 12445 additions and 23 deletions
--- a/docs/development/database-schema.dbml
+++ b/docs/development/database-schema.dbml
@@ -110,14 +110,16 @@ table agent_eval_benchmarks {
  rubrics jsonb [not null]
  reference_url text
  metadata jsonb
+  user_id text
  is_system boolean [not null, default: true]
  accessed_at "timestamp with time zone" [not null, default: `now()`]
  created_at "timestamp with time zone" [not null, default: `now()`]
  updated_at "timestamp with time zone" [not null, default: `now()`]

  indexes {
-    identifier [name: 'agent_eval_benchmarks_identifier_unique', unique]
+    (identifier, user_id) [name: 'agent_eval_benchmarks_identifier_user_id_unique', unique]
    is_system [name: 'agent_eval_benchmarks_is_system_idx']
+    user_id [name: 'agent_eval_benchmarks_user_id_idx']
  }
 }

--- a/packages/database/migrations/0088_smiling_newton_destine.sql
+++ b/packages/database/migrations/0088_smiling_newton_destine.sql
@@ -0,0 +1,5 @@
+DROP INDEX "agent_eval_benchmarks_identifier_unique";--> statement-breakpoint
+ALTER TABLE "agent_eval_benchmarks" ADD COLUMN "user_id" text;--> statement-breakpoint
+ALTER TABLE "agent_eval_benchmarks" ADD CONSTRAINT "agent_eval_benchmarks_user_id_users_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."users"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
+CREATE UNIQUE INDEX "agent_eval_benchmarks_identifier_user_id_unique" ON "agent_eval_benchmarks" USING btree ("identifier","user_id");--> statement-breakpoint
+CREATE INDEX "agent_eval_benchmarks_user_id_idx" ON "agent_eval_benchmarks" USING btree ("user_id");
--- a/packages/database/migrations/meta/0088_snapshot.json
+++ b/packages/database/migrations/meta/0088_snapshot.json
--- a/packages/database/migrations/meta/_journal.json
+++ b/packages/database/migrations/meta/_journal.json
@@ -616,6 +616,13 @@
      "when": 1771386090928,
      "tag": "0087_add_eval_benchmark",
      "breakpoints": true
+    },
+    {
+      "idx": 88,
+      "version": "7",
+      "when": 1772186403999,
+      "tag": "0088_smiling_newton_destine",
+      "breakpoints": true
    }
  ],
  "version": "6"
--- a/packages/database/src/models/agentEval/tests/benchmark.test.ts
+++ b/packages/database/src/models/agentEval/tests/benchmark.test.ts
@@ -16,6 +16,7 @@ const serverDB = await getTestDB();
 const userId = 'benchmark-test-user';
 const userId2 = 'benchmark-test-user-2';
 const benchmarkModel = new AgentEvalBenchmarkModel(serverDB, userId);
+const benchmarkModel2 = new AgentEvalBenchmarkModel(serverDB, userId2);

 beforeEach(async () => {
  await serverDB.delete(agentEvalRuns);
@@ -38,7 +39,7 @@ afterEach(async () => {

 describe('AgentEvalBenchmarkModel', () => {
  describe('create', () => {
-    it('should create a new benchmark', async () => {
+    it('should create a new benchmark with userId', async () => {
      const params = {
        identifier: 'test-benchmark',
        name: 'Test Benchmark',
@@ -68,6 +69,7 @@ describe('AgentEvalBenchmarkModel', () => {
      expect(result.referenceUrl).toBe('https://example.com');
      expect(result.metadata).toEqual({ version: 1 });
      expect(result.isSystem).toBe(false);
+      expect(result.userId).toBe(userId);
      expect(result.createdAt).toBeDefined();
      expect(result.updatedAt).toBeDefined();
    });
@@ -84,6 +86,7 @@ describe('AgentEvalBenchmarkModel', () => {

      expect(result.isSystem).toBe(true);
      expect(result.identifier).toBe('system-benchmark');
+      expect(result.userId).toBe(userId);
    });
  });

@@ -95,7 +98,7 @@ describe('AgentEvalBenchmarkModel', () => {
          identifier: 'delete-test',
          name: 'Delete Test',
          rubrics: [],
-
+          userId,
          isSystem: false,
        })
        .returning();
@@ -115,7 +118,7 @@ describe('AgentEvalBenchmarkModel', () => {
          identifier: 'system-benchmark',
          name: 'System Benchmark',
          rubrics: [],
-
+          userId,
          isSystem: true,
        })
        .returning();
@@ -128,6 +131,26 @@ describe('AgentEvalBenchmarkModel', () => {
      expect(stillExists).toBeDefined();
    });

+    it("should not delete another user's benchmark", async () => {
+      const [benchmark] = await serverDB
+        .insert(agentEvalBenchmarks)
+        .values({
+          identifier: 'other-user-benchmark',
+          name: 'Other User Benchmark',
+          rubrics: [],
+          userId: userId2,
+          isSystem: false,
+        })
+        .returning();
+
+      await benchmarkModel.delete(benchmark.id);
+
+      const stillExists = await serverDB.query.agentEvalBenchmarks.findFirst({
+        where: eq(agentEvalBenchmarks.id, benchmark.id),
+      });
+      expect(stillExists).toBeDefined();
+    });
+
    it('should return 0 rowCount when benchmark not found', async () => {
      await benchmarkModel.delete('non-existent-id');
      // No rowCount in PGlite, just verify no error
@@ -141,33 +164,41 @@ describe('AgentEvalBenchmarkModel', () => {
          identifier: 'system-1',
          name: 'System 1',
          rubrics: [],
-
+          userId: null,
          isSystem: true,
        },
        {
          identifier: 'user-1',
          name: 'User 1',
          rubrics: [],
-
+          userId,
          isSystem: false,
        },
        {
          identifier: 'system-2',
          name: 'System 2',
          rubrics: [],
-
+          userId: null,
          isSystem: true,
        },
+        {
+          identifier: 'other-user-1',
+          name: 'Other User 1',
+          rubrics: [],
+          userId: userId2,
+          isSystem: false,
+        },
      ]);
    });

-    it('should query all benchmarks including system', async () => {
+    it('should query own + system benchmarks including system', async () => {
      const results = await benchmarkModel.query(true);

      expect(results).toHaveLength(3);
      expect(results.map((r) => r.identifier)).toContain('system-1');
      expect(results.map((r) => r.identifier)).toContain('user-1');
      expect(results.map((r) => r.identifier)).toContain('system-2');
+      expect(results.map((r) => r.identifier)).not.toContain('other-user-1');
    });

    it('should query only user-created benchmarks', async () => {
@@ -184,6 +215,13 @@ describe('AgentEvalBenchmarkModel', () => {
      expect(results).toHaveLength(3);
    });

+    it("should not return other user's benchmarks", async () => {
+      const results = await benchmarkModel.query(true);
+
+      const identifiers = results.map((r) => r.identifier);
+      expect(identifiers).not.toContain('other-user-1');
+    });
+
    it('should order by createdAt descending', async () => {
      const results = await benchmarkModel.query(true);

@@ -347,14 +385,14 @@ describe('AgentEvalBenchmarkModel', () => {
  });

  describe('findById', () => {
-    it('should find a benchmark by id', async () => {
+    it('should find own benchmark by id', async () => {
      const [benchmark] = await serverDB
        .insert(agentEvalBenchmarks)
        .values({
          identifier: 'find-test',
          name: 'Find Test',
          rubrics: [],
-
+          userId,
          isSystem: false,
        })
        .returning();
@@ -366,6 +404,41 @@ describe('AgentEvalBenchmarkModel', () => {
      expect(result?.identifier).toBe('find-test');
    });

+    it('should find system benchmark (null userId) by id', async () => {
+      const [benchmark] = await serverDB
+        .insert(agentEvalBenchmarks)
+        .values({
+          identifier: 'system-find-test',
+          name: 'System Find Test',
+          rubrics: [],
+          userId: null,
+          isSystem: true,
+        })
+        .returning();
+
+      const result = await benchmarkModel.findById(benchmark.id);
+
+      expect(result).toBeDefined();
+      expect(result?.id).toBe(benchmark.id);
+    });
+
+    it("should not find another user's benchmark by id", async () => {
+      const [benchmark] = await serverDB
+        .insert(agentEvalBenchmarks)
+        .values({
+          identifier: 'other-find-test',
+          name: 'Other Find Test',
+          rubrics: [],
+          userId: userId2,
+          isSystem: false,
+        })
+        .returning();
+
+      const result = await benchmarkModel.findById(benchmark.id);
+
+      expect(result).toBeUndefined();
+    });
+
    it('should return undefined when benchmark not found', async () => {
      const result = await benchmarkModel.findById('non-existent-id');
      expect(result).toBeUndefined();
@@ -373,11 +446,12 @@ describe('AgentEvalBenchmarkModel', () => {
  });

  describe('findByIdentifier', () => {
-    it('should find a benchmark by identifier', async () => {
+    it('should find own benchmark by identifier', async () => {
      await serverDB.insert(agentEvalBenchmarks).values({
        identifier: 'unique-identifier',
        name: 'Unique Test',
        rubrics: [],
+        userId,
        isSystem: false,
      });

@@ -388,6 +462,35 @@ describe('AgentEvalBenchmarkModel', () => {
      expect(result?.name).toBe('Unique Test');
    });

+    it('should find system benchmark (null userId) by identifier', async () => {
+      await serverDB.insert(agentEvalBenchmarks).values({
+        identifier: 'system-identifier',
+        name: 'System Test',
+        rubrics: [],
+        userId: null,
+        isSystem: true,
+      });
+
+      const result = await benchmarkModel.findByIdentifier('system-identifier');
+
+      expect(result).toBeDefined();
+      expect(result?.identifier).toBe('system-identifier');
+    });
+
+    it("should not find another user's benchmark by identifier", async () => {
+      await serverDB.insert(agentEvalBenchmarks).values({
+        identifier: 'other-identifier',
+        name: 'Other Test',
+        rubrics: [],
+        userId: userId2,
+        isSystem: false,
+      });
+
+      const result = await benchmarkModel.findByIdentifier('other-identifier');
+
+      expect(result).toBeUndefined();
+    });
+
    it('should return undefined when identifier not found', async () => {
      const result = await benchmarkModel.findByIdentifier('non-existent');
      expect(result).toBeUndefined();
@@ -402,7 +505,7 @@ describe('AgentEvalBenchmarkModel', () => {
          identifier: 'update-test',
          name: 'Original Name',
          rubrics: [],
-
+          userId,
          isSystem: false,
        })
        .returning();
@@ -426,7 +529,7 @@ describe('AgentEvalBenchmarkModel', () => {
          identifier: 'system-benchmark',
          name: 'System Benchmark',
          rubrics: [],
-
+          userId,
          isSystem: true,
        })
        .returning();
@@ -437,10 +540,36 @@ describe('AgentEvalBenchmarkModel', () => {

      expect(result).toBeUndefined();

-      const unchanged = await benchmarkModel.findById(systemBenchmark.id);
+      const unchanged = await serverDB.query.agentEvalBenchmarks.findFirst({
+        where: eq(agentEvalBenchmarks.id, systemBenchmark.id),
+      });
      expect(unchanged?.name).toBe('System Benchmark');
    });

+    it("should not update another user's benchmark", async () => {
+      const [benchmark] = await serverDB
+        .insert(agentEvalBenchmarks)
+        .values({
+          identifier: 'other-update-test',
+          name: 'Other User Benchmark',
+          rubrics: [],
+          userId: userId2,
+          isSystem: false,
+        })
+        .returning();
+
+      const result = await benchmarkModel.update(benchmark.id, {
+        name: 'Attempted Update',
+      });
+
+      expect(result).toBeUndefined();
+
+      const unchanged = await serverDB.query.agentEvalBenchmarks.findFirst({
+        where: eq(agentEvalBenchmarks.id, benchmark.id),
+      });
+      expect(unchanged?.name).toBe('Other User Benchmark');
+    });
+
    it('should return undefined when benchmark not found', async () => {
      const result = await benchmarkModel.update('non-existent-id', {
        name: 'New Name',
@@ -457,7 +586,7 @@ describe('AgentEvalBenchmarkModel', () => {
          name: 'Original',
          description: 'Original Desc',
          rubrics: [],
-
+          userId,
          isSystem: false,
        })
        .returning();
--- a/packages/database/src/models/agentEval/benchmark.ts
+++ b/packages/database/src/models/agentEval/benchmark.ts
@@ -1,4 +1,4 @@
-import { and, count, desc, eq, getTableColumns, sql } from 'drizzle-orm';
+import { and, count, desc, eq, getTableColumns, isNull, or, sql } from 'drizzle-orm';

 import {
  agentEvalBenchmarks,
@@ -22,7 +22,10 @@ export class AgentEvalBenchmarkModel {
   * Create a new benchmark
   */
  create = async (params: NewAgentEvalBenchmark) => {
-    const [result] = await this.db.insert(agentEvalBenchmarks).values(params).returning();
+    const [result] = await this.db
+      .insert(agentEvalBenchmarks)
+      .values({ ...params, userId: this.userId })
+      .returning();
    return result;
  };

@@ -32,7 +35,13 @@ export class AgentEvalBenchmarkModel {
  delete = async (id: string) => {
    return this.db
      .delete(agentEvalBenchmarks)
-      .where(and(eq(agentEvalBenchmarks.id, id), eq(agentEvalBenchmarks.isSystem, false)));
+      .where(
+        and(
+          eq(agentEvalBenchmarks.id, id),
+          eq(agentEvalBenchmarks.isSystem, false),
+          eq(agentEvalBenchmarks.userId, this.userId),
+        ),
+      );
  };

  /**
@@ -40,7 +49,13 @@ export class AgentEvalBenchmarkModel {
   * @param includeSystem - Whether to include system benchmarks (default: true)
   */
  query = async (includeSystem = true) => {
-    const conditions = includeSystem ? undefined : eq(agentEvalBenchmarks.isSystem, false);
+    const userCondition = or(
+      eq(agentEvalBenchmarks.userId, this.userId),
+      isNull(agentEvalBenchmarks.userId),
+    );
+    const conditions = includeSystem
+      ? userCondition
+      : and(eq(agentEvalBenchmarks.isSystem, false), userCondition);

    const datasetCountSq = this.db
      .select({
@@ -129,7 +144,12 @@ export class AgentEvalBenchmarkModel {
    const [result] = await this.db
      .select()
      .from(agentEvalBenchmarks)
-      .where(eq(agentEvalBenchmarks.id, id))
+      .where(
+        and(
+          eq(agentEvalBenchmarks.id, id),
+          or(eq(agentEvalBenchmarks.userId, this.userId), isNull(agentEvalBenchmarks.userId)),
+        ),
+      )
      .limit(1);
    return result;
  };
@@ -141,7 +161,12 @@ export class AgentEvalBenchmarkModel {
    const [result] = await this.db
      .select()
      .from(agentEvalBenchmarks)
-      .where(eq(agentEvalBenchmarks.identifier, identifier))
+      .where(
+        and(
+          eq(agentEvalBenchmarks.identifier, identifier),
+          or(eq(agentEvalBenchmarks.userId, this.userId), isNull(agentEvalBenchmarks.userId)),
+        ),
+      )
      .limit(1);
    return result;
  };
@@ -153,7 +178,13 @@ export class AgentEvalBenchmarkModel {
    const [result] = await this.db
      .update(agentEvalBenchmarks)
      .set({ ...value, updatedAt: new Date() })
-      .where(and(eq(agentEvalBenchmarks.id, id), eq(agentEvalBenchmarks.isSystem, false)))
+      .where(
+        and(
+          eq(agentEvalBenchmarks.id, id),
+          eq(agentEvalBenchmarks.isSystem, false),
+          eq(agentEvalBenchmarks.userId, this.userId),
+        ),
+      )
      .returning();
    return result;
  };
--- a/packages/database/src/schemas/agentEvals.ts
+++ b/packages/database/src/schemas/agentEvals.ts
@@ -65,13 +65,16 @@ export const agentEvalBenchmarks = pgTable(

    metadata: jsonb('metadata').$type<Record<string, unknown>>(),

+    userId: text('user_id').references(() => users.id, { onDelete: 'cascade' }),
+
    isSystem: boolean('is_system').default(true).notNull(),

    ...timestamps,
  },
  (t) => [
-    uniqueIndex('agent_eval_benchmarks_identifier_unique').on(t.identifier),
+    uniqueIndex('agent_eval_benchmarks_identifier_user_id_unique').on(t.identifier, t.userId),
    index('agent_eval_benchmarks_is_system_idx').on(t.isSystem),
+    index('agent_eval_benchmarks_user_id_idx').on(t.userId),
  ],
 );