✨ feat(cli): CLI Phase 5 - agent KB/file/pin, thread, eval and miscellaneous command enhancements (#12920)

* ✨ feat(cli): CLI Phase 5 - agent KB/file/pin, thread management, eval expansion - Add agent subcommands: pin/unpin, kb-files, add-file/remove-file/toggle-file, add-kb/remove-kb/toggle-kb - Create thread command with list/list-all/delete subcommands - Expand eval with internal benchmark/dataset/testcase/irun management - Move existing external eval commands under `eval ext` namespace - Add comprehensive unit tests for all new functionality Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * 💄 style(cli): rename eval `irun` to `run` since external moved to `ext` namespace Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * ♻️ refactor(cli): merge external eval commands into unified tree with --external flag Remove separate `eval ext` namespace; use `--external` flag on overlapping commands (dataset get, run get) and integrate external-only commands directly into the tree. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * ✨ feat(cli): CLI Phase 6 - miscellaneous command enhancements - file: add upload (hash check + create), edit (move to folder), kb-items - user: new command with info, settings, preferences, update-avatar, update-name - model: add batch-update, sort order - plugin: add create (without settings, distinct from install) - generation: add delete Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-26 13:19:34 +07:00 · 2026-03-12 09:47:16 +08:00
parent 165697ce47
commit f94f1ae08a
20 changed files with 2512 additions and 320 deletions
--- a/apps/cli/package.json
+++ b/apps/cli/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lobehub/cli",
-  "version": "0.0.1-canary.11",
+  "version": "0.0.1-canary.12",
  "type": "module",
  "bin": {
    "lh": "./dist/index.js",
--- a/apps/cli/src/api/client.ts
+++ b/apps/cli/src/api/client.ts
@@ -16,6 +16,13 @@ let _client: TrpcClient | undefined;
 let _toolsClient: ToolsTrpcClient | undefined;

 async function getAuthAndServer() {
+  // LOBEHUB_JWT + LOBEHUB_SERVER env vars (used by server-side sandbox execution)
+  const envJwt = process.env.LOBEHUB_JWT;
+  if (envJwt) {
+    const serverUrl = process.env.LOBEHUB_SERVER || OFFICIAL_SERVER_URL;
+    return { accessToken: envJwt, serverUrl: serverUrl.replace(/\/$/, '') };
+  }
+
  const result = await getValidToken();
  if (!result) {
    log.error("No authentication found. Run 'lh login' first.");
--- a/apps/cli/src/auth/resolveToken.ts
+++ b/apps/cli/src/auth/resolveToken.ts
@@ -29,6 +29,18 @@ function parseJwtSub(token: string): string | undefined {
 * Exits the process if no token can be resolved.
 */
 export async function resolveToken(options: ResolveTokenOptions): Promise<ResolvedAuth> {
+  // LOBEHUB_JWT env var takes highest priority (used by server-side sandbox execution)
+  const envJwt = process.env.LOBEHUB_JWT;
+  if (envJwt) {
+    const userId = parseJwtSub(envJwt);
+    if (!userId) {
+      log.error('Could not extract userId from LOBEHUB_JWT.');
+      process.exit(1);
+    }
+    log.debug('Using LOBEHUB_JWT from environment');
+    return { token: envJwt, userId };
+  }
+
  // Explicit token takes priority
  if (options.token) {
    const userId = parseJwtSub(options.token);
--- a/apps/cli/src/commands/agent.test.ts
+++ b/apps/cli/src/commands/agent.test.ts
@@ -8,12 +8,20 @@ const { mockTrpcClient } = vi.hoisted(() => ({
  mockTrpcClient: {
    agent: {
      createAgent: { mutate: vi.fn() },
+      createAgentFiles: { mutate: vi.fn() },
+      createAgentKnowledgeBase: { mutate: vi.fn() },
+      deleteAgentFile: { mutate: vi.fn() },
+      deleteAgentKnowledgeBase: { mutate: vi.fn() },
      duplicateAgent: { mutate: vi.fn() },
      getAgentConfigById: { query: vi.fn() },
      getBuiltinAgent: { query: vi.fn() },
+      getKnowledgeBasesAndFiles: { query: vi.fn() },
      queryAgents: { query: vi.fn() },
      removeAgent: { mutate: vi.fn() },
+      toggleFile: { mutate: vi.fn() },
+      toggleKnowledgeBase: { mutate: vi.fn() },
      updateAgentConfig: { mutate: vi.fn() },
+      updateAgentPinned: { mutate: vi.fn() },
    },
    aiAgent: {
      execAgent: { mutate: vi.fn() },
@@ -403,6 +411,158 @@ describe('agent command', () => {
    });
  });

+  describe('pin/unpin', () => {
+    it('should pin an agent', async () => {
+      mockTrpcClient.agent.updateAgentPinned.mutate.mockResolvedValue({});
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'agent', 'pin', 'a1']);
+
+      expect(mockTrpcClient.agent.updateAgentPinned.mutate).toHaveBeenCalledWith({
+        id: 'a1',
+        pinned: true,
+      });
+    });
+
+    it('should unpin an agent', async () => {
+      mockTrpcClient.agent.updateAgentPinned.mutate.mockResolvedValue({});
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'agent', 'unpin', 'a1']);
+
+      expect(mockTrpcClient.agent.updateAgentPinned.mutate).toHaveBeenCalledWith({
+        id: 'a1',
+        pinned: false,
+      });
+    });
+  });
+
+  describe('kb-files', () => {
+    it('should list kb and files', async () => {
+      mockTrpcClient.agent.getKnowledgeBasesAndFiles.query.mockResolvedValue([
+        { enabled: true, id: 'f1', name: 'file.txt', type: 'file' },
+      ]);
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'agent', 'kb-files', 'a1']);
+
+      expect(mockTrpcClient.agent.getKnowledgeBasesAndFiles.query).toHaveBeenCalledWith({
+        agentId: 'a1',
+      });
+    });
+
+    it('should show empty message', async () => {
+      mockTrpcClient.agent.getKnowledgeBasesAndFiles.query.mockResolvedValue([]);
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'agent', 'kb-files', 'a1']);
+
+      expect(consoleSpy).toHaveBeenCalledWith('No knowledge bases or files found.');
+    });
+  });
+
+  describe('add-file', () => {
+    it('should add files to agent', async () => {
+      mockTrpcClient.agent.createAgentFiles.mutate.mockResolvedValue({});
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'agent', 'add-file', 'a1', '--file-ids', 'f1,f2']);
+
+      expect(mockTrpcClient.agent.createAgentFiles.mutate).toHaveBeenCalledWith(
+        expect.objectContaining({ agentId: 'a1', fileIds: ['f1', 'f2'] }),
+      );
+    });
+  });
+
+  describe('remove-file', () => {
+    it('should remove a file from agent', async () => {
+      mockTrpcClient.agent.deleteAgentFile.mutate.mockResolvedValue({});
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'agent', 'remove-file', 'a1', '--file-id', 'f1']);
+
+      expect(mockTrpcClient.agent.deleteAgentFile.mutate).toHaveBeenCalledWith({
+        agentId: 'a1',
+        fileId: 'f1',
+      });
+    });
+  });
+
+  describe('toggle-file', () => {
+    it('should toggle file with enable', async () => {
+      mockTrpcClient.agent.toggleFile.mutate.mockResolvedValue({});
+
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'agent',
+        'toggle-file',
+        'a1',
+        '--file-id',
+        'f1',
+        '--enable',
+      ]);
+
+      expect(mockTrpcClient.agent.toggleFile.mutate).toHaveBeenCalledWith({
+        agentId: 'a1',
+        enabled: true,
+        fileId: 'f1',
+      });
+    });
+  });
+
+  describe('add-kb', () => {
+    it('should add kb to agent', async () => {
+      mockTrpcClient.agent.createAgentKnowledgeBase.mutate.mockResolvedValue({});
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'agent', 'add-kb', 'a1', '--kb-id', 'kb1']);
+
+      expect(mockTrpcClient.agent.createAgentKnowledgeBase.mutate).toHaveBeenCalledWith(
+        expect.objectContaining({ agentId: 'a1', knowledgeBaseId: 'kb1' }),
+      );
+    });
+  });
+
+  describe('remove-kb', () => {
+    it('should remove kb from agent', async () => {
+      mockTrpcClient.agent.deleteAgentKnowledgeBase.mutate.mockResolvedValue({});
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'agent', 'remove-kb', 'a1', '--kb-id', 'kb1']);
+
+      expect(mockTrpcClient.agent.deleteAgentKnowledgeBase.mutate).toHaveBeenCalledWith({
+        agentId: 'a1',
+        knowledgeBaseId: 'kb1',
+      });
+    });
+  });
+
+  describe('toggle-kb', () => {
+    it('should toggle kb with disable', async () => {
+      mockTrpcClient.agent.toggleKnowledgeBase.mutate.mockResolvedValue({});
+
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'agent',
+        'toggle-kb',
+        'a1',
+        '--kb-id',
+        'kb1',
+        '--disable',
+      ]);
+
+      expect(mockTrpcClient.agent.toggleKnowledgeBase.mutate).toHaveBeenCalledWith({
+        agentId: 'a1',
+        enabled: false,
+        knowledgeBaseId: 'kb1',
+      });
+    });
+  });
+
  describe('status', () => {
    it('should display operation status', async () => {
      mockTrpcClient.aiAgent.getOperationStatus.query.mockResolvedValue({
--- a/apps/cli/src/commands/agent.ts
+++ b/apps/cli/src/commands/agent.ts
@@ -316,6 +316,204 @@ export function registerAgentCommand(program: Command) {
      },
    );

+  // ── pin / unpin ─────────────────────────────────────
+
+  agent
+    .command('pin <agentId>')
+    .description('Pin an agent')
+    .action(async (agentId: string) => {
+      const client = await getTrpcClient();
+      await client.agent.updateAgentPinned.mutate({ id: agentId, pinned: true });
+      console.log(`${pc.green('✓')} Pinned agent ${pc.bold(agentId)}`);
+    });
+
+  agent
+    .command('unpin <agentId>')
+    .description('Unpin an agent')
+    .action(async (agentId: string) => {
+      const client = await getTrpcClient();
+      await client.agent.updateAgentPinned.mutate({ id: agentId, pinned: false });
+      console.log(`${pc.green('✓')} Unpinned agent ${pc.bold(agentId)}`);
+    });
+
+  // ── kb-files ───────────────────────────────────────
+
+  agent
+    .command('kb-files [agentId]')
+    .description('List knowledge bases and files associated with an agent')
+    .option('-s, --slug <slug>', 'Agent slug')
+    .option('--json [fields]', 'Output JSON, optionally specify fields (comma-separated)')
+    .action(
+      async (
+        agentIdArg: string | undefined,
+        options: { json?: string | boolean; slug?: string },
+      ) => {
+        const client = await getTrpcClient();
+        const agentId = await resolveAgentId(client, { agentId: agentIdArg, slug: options.slug });
+        const items = await client.agent.getKnowledgeBasesAndFiles.query({ agentId });
+
+        if (options.json !== undefined) {
+          const fields = typeof options.json === 'string' ? options.json : undefined;
+          outputJson(items, fields);
+          return;
+        }
+
+        const list = Array.isArray(items) ? items : [];
+        if (list.length === 0) {
+          console.log('No knowledge bases or files found.');
+          return;
+        }
+
+        const rows = list.map((item: any) => [
+          item.id || '',
+          truncate(item.name || '', 40),
+          item.type || '',
+          item.enabled ? 'enabled' : 'disabled',
+        ]);
+
+        printTable(rows, ['ID', 'NAME', 'TYPE', 'STATUS']);
+      },
+    );
+
+  // ── add-file ───────────────────────────────────────
+
+  agent
+    .command('add-file [agentId]')
+    .description('Associate files with an agent')
+    .option('-s, --slug <slug>', 'Agent slug')
+    .requiredOption('--file-ids <ids>', 'Comma-separated file IDs')
+    .option('--enabled', 'Enable files immediately')
+    .action(
+      async (
+        agentIdArg: string | undefined,
+        options: { enabled?: boolean; fileIds: string; slug?: string },
+      ) => {
+        const client = await getTrpcClient();
+        const agentId = await resolveAgentId(client, { agentId: agentIdArg, slug: options.slug });
+        const fileIds = options.fileIds.split(',').map((s) => s.trim());
+
+        const input: Record<string, any> = { agentId, fileIds };
+        if (options.enabled !== undefined) input.enabled = options.enabled;
+
+        await client.agent.createAgentFiles.mutate(input as any);
+        console.log(
+          `${pc.green('✓')} Added ${fileIds.length} file(s) to agent ${pc.bold(agentId)}`,
+        );
+      },
+    );
+
+  // ── remove-file ────────────────────────────────────
+
+  agent
+    .command('remove-file [agentId]')
+    .description('Remove a file from an agent')
+    .option('-s, --slug <slug>', 'Agent slug')
+    .requiredOption('--file-id <id>', 'File ID to remove')
+    .action(async (agentIdArg: string | undefined, options: { fileId: string; slug?: string }) => {
+      const client = await getTrpcClient();
+      const agentId = await resolveAgentId(client, { agentId: agentIdArg, slug: options.slug });
+      await client.agent.deleteAgentFile.mutate({ agentId, fileId: options.fileId });
+      console.log(
+        `${pc.green('✓')} Removed file ${pc.bold(options.fileId)} from agent ${pc.bold(agentId)}`,
+      );
+    });
+
+  // ── toggle-file ────────────────────────────────────
+
+  agent
+    .command('toggle-file [agentId]')
+    .description('Toggle a file on/off for an agent')
+    .option('-s, --slug <slug>', 'Agent slug')
+    .requiredOption('--file-id <id>', 'File ID')
+    .option('--enable', 'Enable the file')
+    .option('--disable', 'Disable the file')
+    .action(
+      async (
+        agentIdArg: string | undefined,
+        options: { disable?: boolean; enable?: boolean; fileId: string; slug?: string },
+      ) => {
+        const enabled = options.enable ? true : options.disable ? false : undefined;
+        const client = await getTrpcClient();
+        const agentId = await resolveAgentId(client, { agentId: agentIdArg, slug: options.slug });
+        await client.agent.toggleFile.mutate({ agentId, enabled, fileId: options.fileId });
+        console.log(
+          `${pc.green('✓')} Toggled file ${pc.bold(options.fileId)} for agent ${pc.bold(agentId)}`,
+        );
+      },
+    );
+
+  // ── add-kb ─────────────────────────────────────────
+
+  agent
+    .command('add-kb [agentId]')
+    .description('Associate a knowledge base with an agent')
+    .option('-s, --slug <slug>', 'Agent slug')
+    .requiredOption('--kb-id <id>', 'Knowledge base ID')
+    .option('--enabled', 'Enable immediately')
+    .action(
+      async (
+        agentIdArg: string | undefined,
+        options: { enabled?: boolean; kbId: string; slug?: string },
+      ) => {
+        const client = await getTrpcClient();
+        const agentId = await resolveAgentId(client, { agentId: agentIdArg, slug: options.slug });
+        const input: Record<string, any> = { agentId, knowledgeBaseId: options.kbId };
+        if (options.enabled !== undefined) input.enabled = options.enabled;
+
+        await client.agent.createAgentKnowledgeBase.mutate(input as any);
+        console.log(
+          `${pc.green('✓')} Added knowledge base ${pc.bold(options.kbId)} to agent ${pc.bold(agentId)}`,
+        );
+      },
+    );
+
+  // ── remove-kb ──────────────────────────────────────
+
+  agent
+    .command('remove-kb [agentId]')
+    .description('Remove a knowledge base from an agent')
+    .option('-s, --slug <slug>', 'Agent slug')
+    .requiredOption('--kb-id <id>', 'Knowledge base ID')
+    .action(async (agentIdArg: string | undefined, options: { kbId: string; slug?: string }) => {
+      const client = await getTrpcClient();
+      const agentId = await resolveAgentId(client, { agentId: agentIdArg, slug: options.slug });
+      await client.agent.deleteAgentKnowledgeBase.mutate({
+        agentId,
+        knowledgeBaseId: options.kbId,
+      });
+      console.log(
+        `${pc.green('✓')} Removed knowledge base ${pc.bold(options.kbId)} from agent ${pc.bold(agentId)}`,
+      );
+    });
+
+  // ── toggle-kb ──────────────────────────────────────
+
+  agent
+    .command('toggle-kb [agentId]')
+    .description('Toggle a knowledge base on/off for an agent')
+    .option('-s, --slug <slug>', 'Agent slug')
+    .requiredOption('--kb-id <id>', 'Knowledge base ID')
+    .option('--enable', 'Enable the knowledge base')
+    .option('--disable', 'Disable the knowledge base')
+    .action(
+      async (
+        agentIdArg: string | undefined,
+        options: { disable?: boolean; enable?: boolean; kbId: string; slug?: string },
+      ) => {
+        const enabled = options.enable ? true : options.disable ? false : undefined;
+        const client = await getTrpcClient();
+        const agentId = await resolveAgentId(client, { agentId: agentIdArg, slug: options.slug });
+        await client.agent.toggleKnowledgeBase.mutate({
+          agentId,
+          enabled,
+          knowledgeBaseId: options.kbId,
+        });
+        console.log(
+          `${pc.green('✓')} Toggled knowledge base ${pc.bold(options.kbId)} for agent ${pc.bold(agentId)}`,
+        );
+      },
+    );
+
  // ── status ──────────────────────────────────────────

  agent
--- a/apps/cli/src/commands/eval.test.ts
+++ b/apps/cli/src/commands/eval.test.ts
@@ -3,6 +3,32 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';

 const { mockTrpcClient } = vi.hoisted(() => ({
  mockTrpcClient: {
+    agentEval: {
+      abortRun: { mutate: vi.fn() },
+      createBenchmark: { mutate: vi.fn() },
+      createDataset: { mutate: vi.fn() },
+      createRun: { mutate: vi.fn() },
+      createTestCase: { mutate: vi.fn() },
+      deleteBenchmark: { mutate: vi.fn() },
+      deleteDataset: { mutate: vi.fn() },
+      deleteRun: { mutate: vi.fn() },
+      deleteTestCase: { mutate: vi.fn() },
+      getBenchmark: { query: vi.fn() },
+      getDataset: { query: vi.fn() },
+      getRunDetails: { query: vi.fn() },
+      getRunProgress: { query: vi.fn() },
+      getRunResults: { query: vi.fn() },
+      getTestCase: { query: vi.fn() },
+      listBenchmarks: { query: vi.fn() },
+      listDatasets: { query: vi.fn() },
+      listRuns: { query: vi.fn() },
+      listTestCases: { query: vi.fn() },
+      retryRunErrors: { mutate: vi.fn() },
+      startRun: { mutate: vi.fn() },
+      updateBenchmark: { mutate: vi.fn() },
+      updateDataset: { mutate: vi.fn() },
+      updateTestCase: { mutate: vi.fn() },
+    },
    agentEvalExternal: {
      datasetGet: { query: vi.fn() },
      messagesList: { query: vi.fn() },
@@ -48,9 +74,11 @@ describe('eval command', () => {
    exitSpy = vi.spyOn(process, 'exit').mockImplementation((() => {}) as any);
    logSpy = vi.spyOn(console, 'log').mockImplementation(() => {});

-    for (const method of Object.values(mockTrpcClient.agentEvalExternal)) {
-      for (const fn of Object.values(method)) {
-        (fn as ReturnType<typeof vi.fn>).mockReset();
+    for (const ns of Object.values(mockTrpcClient)) {
+      for (const method of Object.values(ns as Record<string, any>)) {
+        for (const fn of Object.values(method as Record<string, any>)) {
+          (fn as ReturnType<typeof vi.fn>).mockReset();
+        }
      }
    }
  });
@@ -68,218 +96,505 @@ describe('eval command', () => {
    return program;
  };

-  it('should call runGet and output json envelope', async () => {
-    mockTrpcClient.agentEvalExternal.runGet.query.mockResolvedValue({
-      config: { k: 1 },
-      datasetId: 'dataset-1',
-      id: 'run-1',
+  // ============================================
+  // Benchmark tests
+  // ============================================
+  describe('benchmark', () => {
+    it('should list benchmarks', async () => {
+      mockTrpcClient.agentEval.listBenchmarks.query.mockResolvedValue([
+        { id: 'b1', name: 'Bench 1' },
+      ]);
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'eval', 'benchmark', 'list', '--json']);
+
+      expect(mockTrpcClient.agentEval.listBenchmarks.query).toHaveBeenCalled();
    });

-    const program = createProgram();
-    await program.parseAsync(['node', 'test', 'eval', 'run', 'get', '--run-id', 'run-1', '--json']);
+    it('should create a benchmark', async () => {
+      mockTrpcClient.agentEval.createBenchmark.mutate.mockResolvedValue({ id: 'b1' });

-    expect(mockTrpcClient.agentEvalExternal.runGet.query).toHaveBeenCalledWith({ runId: 'run-1' });
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'eval',
+        'benchmark',
+        'create',
+        '--identifier',
+        'test-bench',
+        '-n',
+        'Test Bench',
+        '--json',
+      ]);

-    const payload = JSON.parse(logSpy.mock.calls[0][0]);
-    expect(payload).toEqual({
-      data: {
+      expect(mockTrpcClient.agentEval.createBenchmark.mutate).toHaveBeenCalledWith(
+        expect.objectContaining({ identifier: 'test-bench', name: 'Test Bench' }),
+      );
+    });
+
+    it('should delete a benchmark', async () => {
+      mockTrpcClient.agentEval.deleteBenchmark.mutate.mockResolvedValue({ success: true });
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'eval', 'benchmark', 'delete', '--id', 'b1']);
+
+      expect(mockTrpcClient.agentEval.deleteBenchmark.mutate).toHaveBeenCalledWith({ id: 'b1' });
+    });
+  });
+
+  // ============================================
+  // Dataset tests
+  // ============================================
+  describe('dataset', () => {
+    it('should list datasets', async () => {
+      mockTrpcClient.agentEval.listDatasets.query.mockResolvedValue([{ id: 'd1', name: 'DS 1' }]);
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'eval', 'dataset', 'list', '--json']);
+
+      expect(mockTrpcClient.agentEval.listDatasets.query).toHaveBeenCalled();
+    });
+
+    it('should get dataset via internal API', async () => {
+      mockTrpcClient.agentEval.getDataset.query.mockResolvedValue({ id: 'd1' });
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'eval', 'dataset', 'get', '--id', 'd1', '--json']);
+
+      expect(mockTrpcClient.agentEval.getDataset.query).toHaveBeenCalledWith({ id: 'd1' });
+    });
+
+    it('should get dataset via external API with --external', async () => {
+      mockTrpcClient.agentEvalExternal.datasetGet.query.mockResolvedValue({
+        id: 'dataset-1',
+        metadata: { preset: 'deepsearchqa' },
+      });
+
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'eval',
+        'dataset',
+        'get',
+        '--id',
+        'dataset-1',
+        '--external',
+        '--json',
+      ]);
+
+      expect(mockTrpcClient.agentEvalExternal.datasetGet.query).toHaveBeenCalledWith({
+        datasetId: 'dataset-1',
+      });
+    });
+
+    it('should create a dataset', async () => {
+      mockTrpcClient.agentEval.createDataset.mutate.mockResolvedValue({ id: 'd1' });
+
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'eval',
+        'dataset',
+        'create',
+        '--benchmark-id',
+        'b1',
+        '--identifier',
+        'ds1',
+        '-n',
+        'Dataset 1',
+        '--json',
+      ]);
+
+      expect(mockTrpcClient.agentEval.createDataset.mutate).toHaveBeenCalledWith(
+        expect.objectContaining({ benchmarkId: 'b1', identifier: 'ds1', name: 'Dataset 1' }),
+      );
+    });
+  });
+
+  // ============================================
+  // TestCase tests
+  // ============================================
+  describe('testcase', () => {
+    it('should list test cases', async () => {
+      mockTrpcClient.agentEval.listTestCases.query.mockResolvedValue({ data: [], total: 0 });
+
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'eval',
+        'testcase',
+        'list',
+        '--dataset-id',
+        'd1',
+        '--json',
+      ]);
+
+      expect(mockTrpcClient.agentEval.listTestCases.query).toHaveBeenCalledWith(
+        expect.objectContaining({ datasetId: 'd1' }),
+      );
+    });
+
+    it('should create a test case', async () => {
+      mockTrpcClient.agentEval.createTestCase.mutate.mockResolvedValue({ id: 'tc1' });
+
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'eval',
+        'testcase',
+        'create',
+        '--dataset-id',
+        'd1',
+        '--input',
+        'What is 2+2?',
+        '--expected',
+        '4',
+      ]);
+
+      expect(mockTrpcClient.agentEval.createTestCase.mutate).toHaveBeenCalledWith(
+        expect.objectContaining({
+          content: expect.objectContaining({ expected: '4', input: 'What is 2+2?' }),
+          datasetId: 'd1',
+        }),
+      );
+    });
+
+    it('should delete a test case', async () => {
+      mockTrpcClient.agentEval.deleteTestCase.mutate.mockResolvedValue({ success: true });
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'eval', 'testcase', 'delete', '--id', 'tc1']);
+
+      expect(mockTrpcClient.agentEval.deleteTestCase.mutate).toHaveBeenCalledWith({ id: 'tc1' });
+    });
+
+    it('should count test cases via external API', async () => {
+      mockTrpcClient.agentEvalExternal.testCasesCount.query.mockResolvedValue({ count: 12 });
+
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'eval',
+        'testcase',
+        'count',
+        '--dataset-id',
+        'dataset-1',
+        '--json',
+      ]);
+
+      expect(mockTrpcClient.agentEvalExternal.testCasesCount.query).toHaveBeenCalledWith({
+        datasetId: 'dataset-1',
+      });
+    });
+  });
+
+  // ============================================
+  // Run tests
+  // ============================================
+  describe('run', () => {
+    it('should list runs', async () => {
+      mockTrpcClient.agentEval.listRuns.query.mockResolvedValue({ data: [], total: 0 });
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'eval', 'run', 'list', '--json']);
+
+      expect(mockTrpcClient.agentEval.listRuns.query).toHaveBeenCalled();
+    });
+
+    it('should get run via internal API', async () => {
+      mockTrpcClient.agentEval.getRunDetails.query.mockResolvedValue({ id: 'r1' });
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'eval', 'run', 'get', '--id', 'r1', '--json']);
+
+      expect(mockTrpcClient.agentEval.getRunDetails.query).toHaveBeenCalledWith({ id: 'r1' });
+    });
+
+    it('should get run via external API with --external', async () => {
+      mockTrpcClient.agentEvalExternal.runGet.query.mockResolvedValue({
        config: { k: 1 },
        datasetId: 'dataset-1',
        id: 'run-1',
-      },
-      error: null,
-      ok: true,
-      version: 'v1',
+      });
+
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'eval',
+        'run',
+        'get',
+        '--id',
+        'run-1',
+        '--external',
+        '--json',
+      ]);
+
+      expect(mockTrpcClient.agentEvalExternal.runGet.query).toHaveBeenCalledWith({
+        runId: 'run-1',
+      });
+
+      const payload = JSON.parse(logSpy.mock.calls[0][0]);
+      expect(payload).toEqual({
+        data: { config: { k: 1 }, datasetId: 'dataset-1', id: 'run-1' },
+        error: null,
+        ok: true,
+        version: 'v1',
+      });
+    });
+
+    it('should create a run', async () => {
+      mockTrpcClient.agentEval.createRun.mutate.mockResolvedValue({ id: 'r1' });
+
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'eval',
+        'run',
+        'create',
+        '--dataset-id',
+        'd1',
+        '-n',
+        'Run 1',
+        '--json',
+      ]);
+
+      expect(mockTrpcClient.agentEval.createRun.mutate).toHaveBeenCalledWith(
+        expect.objectContaining({ datasetId: 'd1', name: 'Run 1' }),
+      );
+    });
+
+    it('should start a run', async () => {
+      mockTrpcClient.agentEval.startRun.mutate.mockResolvedValue({ success: true, runId: 'r1' });
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'eval', 'run', 'start', '--id', 'r1']);
+
+      expect(mockTrpcClient.agentEval.startRun.mutate).toHaveBeenCalledWith(
+        expect.objectContaining({ id: 'r1' }),
+      );
+    });
+
+    it('should abort a run', async () => {
+      mockTrpcClient.agentEval.abortRun.mutate.mockResolvedValue({ success: true });
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'eval', 'run', 'abort', '--id', 'r1']);
+
+      expect(mockTrpcClient.agentEval.abortRun.mutate).toHaveBeenCalledWith({ id: 'r1' });
+    });
+
+    it('should get run progress', async () => {
+      mockTrpcClient.agentEval.getRunProgress.query.mockResolvedValue({ status: 'running' });
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'eval', 'run', 'progress', '--id', 'r1', '--json']);
+
+      expect(mockTrpcClient.agentEval.getRunProgress.query).toHaveBeenCalledWith({ id: 'r1' });
+    });
+
+    it('should get run results', async () => {
+      mockTrpcClient.agentEval.getRunResults.query.mockResolvedValue({
+        results: [],
+        runId: 'r1',
+        total: 0,
+      });
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'eval', 'run', 'results', '--id', 'r1', '--json']);
+
+      expect(mockTrpcClient.agentEval.getRunResults.query).toHaveBeenCalledWith({ id: 'r1' });
+    });
+
+    it('should delete a run', async () => {
+      mockTrpcClient.agentEval.deleteRun.mutate.mockResolvedValue({ success: true });
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'eval', 'run', 'delete', '--id', 'r1']);
+
+      expect(mockTrpcClient.agentEval.deleteRun.mutate).toHaveBeenCalledWith({ id: 'r1' });
+    });
+
+    it('should set run status via external API', async () => {
+      mockTrpcClient.agentEvalExternal.runSetStatus.mutate.mockResolvedValue({
+        runId: 'run-1',
+        status: 'completed',
+        success: true,
+      });
+
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'eval',
+        'run',
+        'set-status',
+        '--id',
+        'run-1',
+        '--status',
+        'completed',
+      ]);
+
+      expect(mockTrpcClient.agentEvalExternal.runSetStatus.mutate).toHaveBeenCalledWith({
+        runId: 'run-1',
+        status: 'completed',
+      });
+      expect(logSpy).toHaveBeenCalledWith(expect.stringContaining('status updated to'));
    });
  });

-  it('should call datasetGet and output json envelope', async () => {
-    mockTrpcClient.agentEvalExternal.datasetGet.query.mockResolvedValue({
-      id: 'dataset-1',
-      metadata: { preset: 'deepsearchqa' },
+  // ============================================
+  // Run-Topic tests (external eval API)
+  // ============================================
+  describe('run-topic', () => {
+    it('should list run topics', async () => {
+      mockTrpcClient.agentEvalExternal.runTopicsList.query.mockResolvedValue([]);
+
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'eval',
+        'run-topic',
+        'list',
+        '--run-id',
+        'run-1',
+        '--only-external',
+        '--json',
+      ]);
+
+      expect(mockTrpcClient.agentEvalExternal.runTopicsList.query).toHaveBeenCalledWith({
+        onlyExternal: true,
+        runId: 'run-1',
+      });
    });

-    const program = createProgram();
-    await program.parseAsync([
-      'node',
-      'test',
-      'eval',
-      'dataset',
-      'get',
-      '--dataset-id',
-      'dataset-1',
-      '--json',
-    ]);
+    it('should report run-topic result', async () => {
+      mockTrpcClient.agentEvalExternal.runTopicReportResult.mutate.mockResolvedValue({
+        success: true,
+      });

-    expect(mockTrpcClient.agentEvalExternal.datasetGet.query).toHaveBeenCalledWith({
-      datasetId: 'dataset-1',
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'eval',
+        'run-topic',
+        'report-result',
+        '--run-id',
+        'run-1',
+        '--topic-id',
+        'topic-1',
+        '--thread-id',
+        'thread-1',
+        '--score',
+        '0.91',
+        '--correct',
+        'true',
+        '--result-json',
+        '{"grade":"A"}',
+        '--json',
+      ]);
+
+      expect(mockTrpcClient.agentEvalExternal.runTopicReportResult.mutate).toHaveBeenCalledWith({
+        correct: true,
+        result: { grade: 'A' },
+        runId: 'run-1',
+        score: 0.91,
+        threadId: 'thread-1',
+        topicId: 'topic-1',
+      });
    });
  });

-  it('should pass onlyExternal to runTopicsList', async () => {
-    mockTrpcClient.agentEvalExternal.runTopicsList.query.mockResolvedValue([]);
+  // ============================================
+  // Eval thread/message tests (external eval API)
+  // ============================================
+  describe('eval thread', () => {
+    it('should list threads by topic', async () => {
+      mockTrpcClient.agentEvalExternal.threadsList.query.mockResolvedValue([]);

-    const program = createProgram();
-    await program.parseAsync([
-      'node',
-      'test',
-      'eval',
-      'run-topics',
-      'list',
-      '--run-id',
-      'run-1',
-      '--only-external',
-      '--json',
-    ]);
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'eval',
+        'thread',
+        'list',
+        '--topic-id',
+        'topic-1',
+        '--json',
+      ]);

-    expect(mockTrpcClient.agentEvalExternal.runTopicsList.query).toHaveBeenCalledWith({
-      onlyExternal: true,
-      runId: 'run-1',
+      expect(mockTrpcClient.agentEvalExternal.threadsList.query).toHaveBeenCalledWith({
+        topicId: 'topic-1',
+      });
    });
  });

-  it('should pass topicId and threadId to messagesList', async () => {
-    mockTrpcClient.agentEvalExternal.messagesList.query.mockResolvedValue([]);
+  describe('eval message', () => {
+    it('should list messages by topic and thread', async () => {
+      mockTrpcClient.agentEvalExternal.messagesList.query.mockResolvedValue([]);

-    const program = createProgram();
-    await program.parseAsync([
-      'node',
-      'test',
-      'eval',
-      'messages',
-      'list',
-      '--topic-id',
-      'topic-1',
-      '--thread-id',
-      'thread-1',
-      '--json',
-    ]);
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'eval',
+        'message',
+        'list',
+        '--topic-id',
+        'topic-1',
+        '--thread-id',
+        'thread-1',
+        '--json',
+      ]);

-    expect(mockTrpcClient.agentEvalExternal.messagesList.query).toHaveBeenCalledWith({
-      threadId: 'thread-1',
-      topicId: 'topic-1',
+      expect(mockTrpcClient.agentEvalExternal.messagesList.query).toHaveBeenCalledWith({
+        threadId: 'thread-1',
+        topicId: 'topic-1',
+      });
    });
  });

-  it('should parse and report run-topic result', async () => {
-    mockTrpcClient.agentEvalExternal.runTopicReportResult.mutate.mockResolvedValue({
-      success: true,
+  // ============================================
+  // Error handling
+  // ============================================
+  describe('error handling', () => {
+    it('should output json error envelope when command fails', async () => {
+      const error = Object.assign(new Error('Run not found'), {
+        data: { code: 'NOT_FOUND' },
+      });
+      mockTrpcClient.agentEval.getRunDetails.query.mockRejectedValue(error);
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'eval', 'run', 'get', '--id', 'run-404', '--json']);
+
+      const payload = JSON.parse(logSpy.mock.calls[0][0]);
+      expect(payload).toEqual({
+        data: null,
+        error: { code: 'NOT_FOUND', message: 'Run not found' },
+        ok: false,
+        version: 'v1',
+      });
+      expect(exitSpy).toHaveBeenCalledWith(1);
    });

-    const program = createProgram();
-    await program.parseAsync([
-      'node',
-      'test',
-      'eval',
-      'run-topic',
-      'report-result',
-      '--run-id',
-      'run-1',
-      '--topic-id',
-      'topic-1',
-      '--thread-id',
-      'thread-1',
-      '--score',
-      '0.91',
-      '--correct',
-      'true',
-      '--result-json',
-      '{"grade":"A"}',
-      '--json',
-    ]);
+    it('should log plain error without --json', async () => {
+      mockTrpcClient.agentEvalExternal.threadsList.query.mockRejectedValue(new Error('boom'));

-    expect(mockTrpcClient.agentEvalExternal.runTopicReportResult.mutate).toHaveBeenCalledWith({
-      correct: true,
-      result: { grade: 'A' },
-      runId: 'run-1',
-      score: 0.91,
-      threadId: 'thread-1',
-      topicId: 'topic-1',
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'eval', 'thread', 'list', '--topic-id', 'topic-1']);
+
+      expect(log.error).toHaveBeenCalledWith('boom');
+      expect(exitSpy).toHaveBeenCalledWith(1);
    });
  });
-
-  it('should update run status', async () => {
-    mockTrpcClient.agentEvalExternal.runSetStatus.mutate.mockResolvedValue({
-      runId: 'run-1',
-      status: 'completed',
-      success: true,
-    });
-
-    const program = createProgram();
-    await program.parseAsync([
-      'node',
-      'test',
-      'eval',
-      'run',
-      'set-status',
-      '--run-id',
-      'run-1',
-      '--status',
-      'completed',
-    ]);
-
-    expect(mockTrpcClient.agentEvalExternal.runSetStatus.mutate).toHaveBeenCalledWith({
-      runId: 'run-1',
-      status: 'completed',
-    });
-    expect(logSpy).toHaveBeenCalledWith(expect.stringContaining('status updated to'));
-  });
-
-  it('should output json error envelope when command fails', async () => {
-    const error = Object.assign(new Error('Run not found'), {
-      data: { code: 'NOT_FOUND' },
-    });
-    mockTrpcClient.agentEvalExternal.runGet.query.mockRejectedValue(error);
-
-    const program = createProgram();
-    await program.parseAsync([
-      'node',
-      'test',
-      'eval',
-      'run',
-      'get',
-      '--run-id',
-      'run-404',
-      '--json',
-    ]);
-
-    const payload = JSON.parse(logSpy.mock.calls[0][0]);
-    expect(payload).toEqual({
-      data: null,
-      error: { code: 'NOT_FOUND', message: 'Run not found' },
-      ok: false,
-      version: 'v1',
-    });
-    expect(exitSpy).toHaveBeenCalledWith(1);
-  });
-
-  it('should query test case count', async () => {
-    mockTrpcClient.agentEvalExternal.testCasesCount.query.mockResolvedValue({ count: 12 });
-
-    const program = createProgram();
-    await program.parseAsync([
-      'node',
-      'test',
-      'eval',
-      'test-cases',
-      'count',
-      '--dataset-id',
-      'dataset-1',
-      '--json',
-    ]);
-
-    expect(mockTrpcClient.agentEvalExternal.testCasesCount.query).toHaveBeenCalledWith({
-      datasetId: 'dataset-1',
-    });
-  });
-
-  it('should log plain error without --json', async () => {
-    mockTrpcClient.agentEvalExternal.threadsList.query.mockRejectedValue(new Error('boom'));
-
-    const program = createProgram();
-    await program.parseAsync(['node', 'test', 'eval', 'threads', 'list', '--topic-id', 'topic-1']);
-
-    expect(log.error).toHaveBeenCalledWith('boom');
-    expect(exitSpy).toHaveBeenCalledWith(1);
-  });
 });
--- a/apps/cli/src/commands/eval.ts
+++ b/apps/cli/src/commands/eval.ts
@@ -23,46 +23,6 @@ interface JsonOption {
  json?: boolean;
 }

-interface RunGetOptions extends JsonOption {
-  runId: string;
-}
-
-interface RunSetStatusOptions extends JsonOption {
-  runId: string;
-  status: 'completed' | 'external';
-}
-
-interface DatasetGetOptions extends JsonOption {
-  datasetId: string;
-}
-
-interface RunTopicsListOptions extends JsonOption {
-  onlyExternal?: boolean;
-  runId: string;
-}
-
-interface ThreadsListOptions extends JsonOption {
-  topicId: string;
-}
-
-interface MessagesListOptions extends JsonOption {
-  threadId?: string;
-  topicId: string;
-}
-
-interface TestCasesCountOptions extends JsonOption {
-  datasetId: string;
-}
-
-interface RunTopicReportResultOptions extends JsonOption {
-  correct: boolean;
-  resultJson: Record<string, unknown>;
-  runId: string;
-  score: number;
-  threadId?: string;
-  topicId: string;
-}
-
 const printJson = (data: unknown) => {
  console.log(JSON.stringify(data, null, 2));
 };
@@ -180,65 +140,587 @@ const executeCommand = async (
 };

 export function registerEvalCommand(program: Command) {
-  const evalCmd = program.command('eval').description('Manage external evaluation workflows');
+  const evalCmd = program.command('eval').description('Manage evaluation workflows');

+  // ============================================
+  // Benchmark Operations
+  // ============================================
+  const benchmarkCmd = evalCmd.command('benchmark').description('Manage evaluation benchmarks');
+
+  benchmarkCmd
+    .command('list')
+    .description('List benchmarks')
+    .option('--include-system', 'Include system benchmarks')
+    .option('--json', 'Output JSON envelope')
+    .action(async (options: JsonOption & { includeSystem?: boolean }) =>
+      executeCommand(options, async () => {
+        const client = await getTrpcClient();
+        return client.agentEval.listBenchmarks.query({
+          includeSystem: options.includeSystem ?? true,
+        });
+      }),
+    );
+
+  benchmarkCmd
+    .command('get')
+    .description('Get benchmark details')
+    .requiredOption('--id <id>', 'Benchmark ID')
+    .option('--json', 'Output JSON envelope')
+    .action(async (options: JsonOption & { id: string }) =>
+      executeCommand(options, async () => {
+        const client = await getTrpcClient();
+        return client.agentEval.getBenchmark.query({ id: options.id });
+      }),
+    );
+
+  benchmarkCmd
+    .command('create')
+    .description('Create a benchmark')
+    .requiredOption('--identifier <identifier>', 'Unique identifier')
+    .requiredOption('-n, --name <name>', 'Benchmark name')
+    .option('-d, --description <desc>', 'Description')
+    .option('--reference-url <url>', 'Reference URL')
+    .option('--json', 'Output JSON envelope')
+    .action(
+      async (
+        options: JsonOption & {
+          description?: string;
+          identifier: string;
+          name: string;
+          referenceUrl?: string;
+        },
+      ) =>
+        executeCommand(
+          options,
+          async () => {
+            const client = await getTrpcClient();
+            const input: Record<string, any> = {
+              identifier: options.identifier,
+              name: options.name,
+            };
+            if (options.description) input.description = options.description;
+            if (options.referenceUrl) input.referenceUrl = options.referenceUrl;
+            return client.agentEval.createBenchmark.mutate(input as any);
+          },
+          `Created benchmark ${pc.bold(options.name)}`,
+        ),
+    );
+
+  benchmarkCmd
+    .command('update')
+    .description('Update a benchmark')
+    .requiredOption('--id <id>', 'Benchmark ID')
+    .option('-n, --name <name>', 'New name')
+    .option('-d, --description <desc>', 'New description')
+    .option('--reference-url <url>', 'New reference URL')
+    .option('--json', 'Output JSON envelope')
+    .action(
+      async (
+        options: JsonOption & {
+          description?: string;
+          id: string;
+          name?: string;
+          referenceUrl?: string;
+        },
+      ) =>
+        executeCommand(
+          options,
+          async () => {
+            const client = await getTrpcClient();
+            const input: Record<string, any> = { id: options.id };
+            if (options.name) input.name = options.name;
+            if (options.description) input.description = options.description;
+            if (options.referenceUrl) input.referenceUrl = options.referenceUrl;
+            return client.agentEval.updateBenchmark.mutate(input as any);
+          },
+          `Updated benchmark ${pc.bold(options.id)}`,
+        ),
+    );
+
+  benchmarkCmd
+    .command('delete')
+    .description('Delete a benchmark')
+    .requiredOption('--id <id>', 'Benchmark ID')
+    .option('--json', 'Output JSON envelope')
+    .action(async (options: JsonOption & { id: string }) =>
+      executeCommand(
+        options,
+        async () => {
+          const client = await getTrpcClient();
+          return client.agentEval.deleteBenchmark.mutate({ id: options.id });
+        },
+        `Deleted benchmark ${pc.bold(options.id)}`,
+      ),
+    );
+
+  // ============================================
+  // Dataset Operations
+  // ============================================
+  const datasetCmd = evalCmd.command('dataset').description('Manage evaluation datasets');
+
+  datasetCmd
+    .command('list')
+    .description('List datasets')
+    .option('--benchmark-id <id>', 'Filter by benchmark ID')
+    .option('--json', 'Output JSON envelope')
+    .action(async (options: JsonOption & { benchmarkId?: string }) =>
+      executeCommand(options, async () => {
+        const client = await getTrpcClient();
+        return client.agentEval.listDatasets.query(
+          options.benchmarkId ? { benchmarkId: options.benchmarkId } : undefined,
+        );
+      }),
+    );
+
+  datasetCmd
+    .command('get')
+    .description('Get dataset details (use --external for external eval API)')
+    .requiredOption('--id <id>', 'Dataset ID')
+    .option('--external', 'Use external evaluation API')
+    .option('--json', 'Output JSON envelope')
+    .action(async (options: JsonOption & { external?: boolean; id: string }) =>
+      executeCommand(options, async () => {
+        const client = await getTrpcClient();
+        if (options.external) {
+          return client.agentEvalExternal.datasetGet.query({ datasetId: options.id });
+        }
+        return client.agentEval.getDataset.query({ id: options.id });
+      }),
+    );
+
+  datasetCmd
+    .command('create')
+    .description('Create a dataset')
+    .requiredOption('--benchmark-id <id>', 'Benchmark ID')
+    .requiredOption('--identifier <identifier>', 'Unique identifier')
+    .requiredOption('-n, --name <name>', 'Dataset name')
+    .option('-d, --description <desc>', 'Description')
+    .option('--eval-mode <mode>', 'Evaluation mode')
+    .option('--json', 'Output JSON envelope')
+    .action(
+      async (
+        options: JsonOption & {
+          benchmarkId: string;
+          description?: string;
+          evalMode?: string;
+          identifier: string;
+          name: string;
+        },
+      ) =>
+        executeCommand(
+          options,
+          async () => {
+            const client = await getTrpcClient();
+            const input: Record<string, any> = {
+              benchmarkId: options.benchmarkId,
+              identifier: options.identifier,
+              name: options.name,
+            };
+            if (options.description) input.description = options.description;
+            if (options.evalMode) input.evalMode = options.evalMode;
+            return client.agentEval.createDataset.mutate(input as any);
+          },
+          `Created dataset ${pc.bold(options.name)}`,
+        ),
+    );
+
+  datasetCmd
+    .command('update')
+    .description('Update a dataset')
+    .requiredOption('--id <id>', 'Dataset ID')
+    .option('-n, --name <name>', 'New name')
+    .option('-d, --description <desc>', 'New description')
+    .option('--eval-mode <mode>', 'New evaluation mode')
+    .option('--json', 'Output JSON envelope')
+    .action(
+      async (
+        options: JsonOption & {
+          description?: string;
+          evalMode?: string;
+          id: string;
+          name?: string;
+        },
+      ) =>
+        executeCommand(
+          options,
+          async () => {
+            const client = await getTrpcClient();
+            const input: Record<string, any> = { id: options.id };
+            if (options.name) input.name = options.name;
+            if (options.description) input.description = options.description;
+            if (options.evalMode) input.evalMode = options.evalMode;
+            return client.agentEval.updateDataset.mutate(input as any);
+          },
+          `Updated dataset ${pc.bold(options.id)}`,
+        ),
+    );
+
+  datasetCmd
+    .command('delete')
+    .description('Delete a dataset')
+    .requiredOption('--id <id>', 'Dataset ID')
+    .option('--json', 'Output JSON envelope')
+    .action(async (options: JsonOption & { id: string }) =>
+      executeCommand(
+        options,
+        async () => {
+          const client = await getTrpcClient();
+          return client.agentEval.deleteDataset.mutate({ id: options.id });
+        },
+        `Deleted dataset ${pc.bold(options.id)}`,
+      ),
+    );
+
+  // ============================================
+  // TestCase Operations
+  // ============================================
+  const testcaseCmd = evalCmd.command('testcase').description('Manage evaluation test cases');
+
+  testcaseCmd
+    .command('list')
+    .description('List test cases')
+    .requiredOption('--dataset-id <id>', 'Dataset ID')
+    .option('-L, --limit <n>', 'Page size', '50')
+    .option('--offset <n>', 'Offset', '0')
+    .option('--json', 'Output JSON envelope')
+    .action(async (options: JsonOption & { datasetId: string; limit?: string; offset?: string }) =>
+      executeCommand(options, async () => {
+        const client = await getTrpcClient();
+        return client.agentEval.listTestCases.query({
+          datasetId: options.datasetId,
+          limit: Number.parseInt(options.limit || '50', 10),
+          offset: Number.parseInt(options.offset || '0', 10),
+        });
+      }),
+    );
+
+  testcaseCmd
+    .command('get')
+    .description('Get test case details')
+    .requiredOption('--id <id>', 'Test case ID')
+    .option('--json', 'Output JSON envelope')
+    .action(async (options: JsonOption & { id: string }) =>
+      executeCommand(options, async () => {
+        const client = await getTrpcClient();
+        return client.agentEval.getTestCase.query({ id: options.id });
+      }),
+    );
+
+  testcaseCmd
+    .command('create')
+    .description('Create a test case')
+    .requiredOption('--dataset-id <id>', 'Dataset ID')
+    .requiredOption('--input <text>', 'Input text')
+    .option('--expected <text>', 'Expected output')
+    .option('--category <cat>', 'Category')
+    .option('--sort-order <n>', 'Sort order')
+    .option('--json', 'Output JSON envelope')
+    .action(
+      async (
+        options: JsonOption & {
+          category?: string;
+          datasetId: string;
+          expected?: string;
+          input: string;
+          sortOrder?: string;
+        },
+      ) =>
+        executeCommand(
+          options,
+          async () => {
+            const client = await getTrpcClient();
+            const content: Record<string, any> = { input: options.input };
+            if (options.expected) content.expected = options.expected;
+            if (options.category) content.category = options.category;
+
+            const input: Record<string, any> = { content, datasetId: options.datasetId };
+            if (options.sortOrder) input.sortOrder = Number.parseInt(options.sortOrder, 10);
+            return client.agentEval.createTestCase.mutate(input as any);
+          },
+          'Created test case',
+        ),
+    );
+
+  testcaseCmd
+    .command('update')
+    .description('Update a test case')
+    .requiredOption('--id <id>', 'Test case ID')
+    .option('--input <text>', 'New input text')
+    .option('--expected <text>', 'New expected output')
+    .option('--category <cat>', 'New category')
+    .option('--sort-order <n>', 'New sort order')
+    .option('--json', 'Output JSON envelope')
+    .action(
+      async (
+        options: JsonOption & {
+          category?: string;
+          expected?: string;
+          id: string;
+          input?: string;
+          sortOrder?: string;
+        },
+      ) =>
+        executeCommand(
+          options,
+          async () => {
+            const client = await getTrpcClient();
+            const input: Record<string, any> = { id: options.id };
+            const content: Record<string, any> = {};
+            if (options.input) content.input = options.input;
+            if (options.expected) content.expected = options.expected;
+            if (options.category) content.category = options.category;
+            if (Object.keys(content).length > 0) input.content = content;
+            if (options.sortOrder) input.sortOrder = Number.parseInt(options.sortOrder, 10);
+            return client.agentEval.updateTestCase.mutate(input as any);
+          },
+          `Updated test case ${pc.bold(options.id)}`,
+        ),
+    );
+
+  testcaseCmd
+    .command('delete')
+    .description('Delete a test case')
+    .requiredOption('--id <id>', 'Test case ID')
+    .option('--json', 'Output JSON envelope')
+    .action(async (options: JsonOption & { id: string }) =>
+      executeCommand(
+        options,
+        async () => {
+          const client = await getTrpcClient();
+          return client.agentEval.deleteTestCase.mutate({ id: options.id });
+        },
+        `Deleted test case ${pc.bold(options.id)}`,
+      ),
+    );
+
+  testcaseCmd
+    .command('count')
+    .description('Count test cases by dataset (external eval API)')
+    .requiredOption('--dataset-id <id>', 'Dataset ID')
+    .option('--json', 'Output JSON envelope')
+    .action(async (options: JsonOption & { datasetId: string }) =>
+      executeCommand(options, async () => {
+        const client = await getTrpcClient();
+        return client.agentEvalExternal.testCasesCount.query({ datasetId: options.datasetId });
+      }),
+    );
+
+  // ============================================
+  // Run Operations
+  // ============================================
  const runCmd = evalCmd.command('run').description('Manage evaluation runs');

  runCmd
-    .command('get')
-    .description('Get run information')
-    .requiredOption('--run-id <id>', 'Run ID')
+    .command('list')
+    .description('List evaluation runs')
+    .option('--benchmark-id <id>', 'Filter by benchmark ID')
+    .option('--dataset-id <id>', 'Filter by dataset ID')
+    .option('--status <status>', 'Filter by status')
+    .option('-L, --limit <n>', 'Page size', '50')
+    .option('--offset <n>', 'Offset', '0')
    .option('--json', 'Output JSON envelope')
-    .action(async (options: RunGetOptions) =>
+    .action(
+      async (
+        options: JsonOption & {
+          benchmarkId?: string;
+          datasetId?: string;
+          limit?: string;
+          offset?: string;
+          status?: string;
+        },
+      ) =>
+        executeCommand(options, async () => {
+          const client = await getTrpcClient();
+          const input: Record<string, any> = {};
+          if (options.benchmarkId) input.benchmarkId = options.benchmarkId;
+          if (options.datasetId) input.datasetId = options.datasetId;
+          if (options.status) input.status = options.status;
+          input.limit = Number.parseInt(options.limit || '50', 10);
+          input.offset = Number.parseInt(options.offset || '0', 10);
+          return client.agentEval.listRuns.query(input as any);
+        }),
+    );
+
+  runCmd
+    .command('get')
+    .description('Get run details (use --external for external eval API)')
+    .requiredOption('--id <id>', 'Run ID')
+    .option('--external', 'Use external evaluation API')
+    .option('--json', 'Output JSON envelope')
+    .action(async (options: JsonOption & { external?: boolean; id: string }) =>
      executeCommand(options, async () => {
        const client = await getTrpcClient();
-        return client.agentEvalExternal.runGet.query({ runId: options.runId });
+        if (options.external) {
+          return client.agentEvalExternal.runGet.query({ runId: options.id });
+        }
+        return client.agentEval.getRunDetails.query({ id: options.id });
+      }),
+    );
+
+  runCmd
+    .command('create')
+    .description('Create an evaluation run')
+    .requiredOption('--dataset-id <id>', 'Dataset ID')
+    .option('--agent-id <id>', 'Target agent ID')
+    .option('-n, --name <name>', 'Run name')
+    .option('--k <n>', 'Number of runs per test case (1-10)')
+    .option('--max-concurrency <n>', 'Max concurrency (1-10)')
+    .option('--max-steps <n>', 'Max steps (1-1000)')
+    .option('--timeout <ms>', 'Timeout in ms (60000-3600000)')
+    .option('--json', 'Output JSON envelope')
+    .action(
+      async (
+        options: JsonOption & {
+          agentId?: string;
+          datasetId: string;
+          k?: string;
+          maxConcurrency?: string;
+          maxSteps?: string;
+          name?: string;
+          timeout?: string;
+        },
+      ) =>
+        executeCommand(
+          options,
+          async () => {
+            const client = await getTrpcClient();
+            const input: Record<string, any> = { datasetId: options.datasetId };
+            if (options.agentId) input.targetAgentId = options.agentId;
+            if (options.name) input.name = options.name;
+            const config: Record<string, any> = {};
+            if (options.k) config.k = Number.parseInt(options.k, 10);
+            if (options.maxConcurrency)
+              config.maxConcurrency = Number.parseInt(options.maxConcurrency, 10);
+            if (options.maxSteps) config.maxSteps = Number.parseInt(options.maxSteps, 10);
+            if (options.timeout) config.timeout = Number.parseInt(options.timeout, 10);
+            if (Object.keys(config).length > 0) input.config = config;
+            return client.agentEval.createRun.mutate(input as any);
+          },
+          'Created evaluation run',
+        ),
+    );
+
+  runCmd
+    .command('delete')
+    .description('Delete an evaluation run')
+    .requiredOption('--id <id>', 'Run ID')
+    .option('--json', 'Output JSON envelope')
+    .action(async (options: JsonOption & { id: string }) =>
+      executeCommand(
+        options,
+        async () => {
+          const client = await getTrpcClient();
+          return client.agentEval.deleteRun.mutate({ id: options.id });
+        },
+        `Deleted run ${pc.bold(options.id)}`,
+      ),
+    );
+
+  runCmd
+    .command('start')
+    .description('Start an evaluation run')
+    .requiredOption('--id <id>', 'Run ID')
+    .option('--force', 'Force restart even if already running')
+    .option('--json', 'Output JSON envelope')
+    .action(async (options: JsonOption & { force?: boolean; id: string }) =>
+      executeCommand(
+        options,
+        async () => {
+          const client = await getTrpcClient();
+          return client.agentEval.startRun.mutate({ id: options.id, force: options.force });
+        },
+        `Started run ${pc.bold(options.id)}`,
+      ),
+    );
+
+  runCmd
+    .command('abort')
+    .description('Abort a running evaluation')
+    .requiredOption('--id <id>', 'Run ID')
+    .option('--json', 'Output JSON envelope')
+    .action(async (options: JsonOption & { id: string }) =>
+      executeCommand(
+        options,
+        async () => {
+          const client = await getTrpcClient();
+          return client.agentEval.abortRun.mutate({ id: options.id });
+        },
+        `Aborted run ${pc.bold(options.id)}`,
+      ),
+    );
+
+  runCmd
+    .command('retry-errors')
+    .description('Retry failed test cases in a run')
+    .requiredOption('--id <id>', 'Run ID')
+    .option('--json', 'Output JSON envelope')
+    .action(async (options: JsonOption & { id: string }) =>
+      executeCommand(
+        options,
+        async () => {
+          const client = await getTrpcClient();
+          return client.agentEval.retryRunErrors.mutate({ id: options.id });
+        },
+        `Retrying errors for run ${pc.bold(options.id)}`,
+      ),
+    );
+
+  runCmd
+    .command('progress')
+    .description('Get run progress')
+    .requiredOption('--id <id>', 'Run ID')
+    .option('--json', 'Output JSON envelope')
+    .action(async (options: JsonOption & { id: string }) =>
+      executeCommand(options, async () => {
+        const client = await getTrpcClient();
+        return client.agentEval.getRunProgress.query({ id: options.id });
+      }),
+    );
+
+  runCmd
+    .command('results')
+    .description('Get run results')
+    .requiredOption('--id <id>', 'Run ID')
+    .option('--json', 'Output JSON envelope')
+    .action(async (options: JsonOption & { id: string }) =>
+      executeCommand(options, async () => {
+        const client = await getTrpcClient();
+        return client.agentEval.getRunResults.query({ id: options.id });
      }),
    );

  runCmd
    .command('set-status')
-    .description('Set run status (external API supports completed or external)')
-    .requiredOption('--run-id <id>', 'Run ID')
+    .description('Set run status (external eval API, supports completed or external)')
+    .requiredOption('--id <id>', 'Run ID')
    .requiredOption('--status <status>', 'Status (completed | external)', parseRunStatus)
    .option('--json', 'Output JSON envelope')
-    .action(async (options: RunSetStatusOptions) =>
+    .action(async (options: JsonOption & { id: string; status: 'completed' | 'external' }) =>
      executeCommand(
        options,
        async () => {
          const client = await getTrpcClient();
          return client.agentEvalExternal.runSetStatus.mutate({
-            runId: options.runId,
+            runId: options.id,
            status: options.status,
          });
        },
-        `Run ${pc.bold(options.runId)} status updated to ${pc.bold(options.status)}`,
+        `Run ${pc.bold(options.id)} status updated to ${pc.bold(options.status)}`,
      ),
    );

-  evalCmd
-    .command('dataset')
-    .description('Manage evaluation datasets')
-    .command('get')
-    .description('Get dataset information')
-    .requiredOption('--dataset-id <id>', 'Dataset ID')
-    .option('--json', 'Output JSON envelope')
-    .action(async (options: DatasetGetOptions) =>
-      executeCommand(options, async () => {
-        const client = await getTrpcClient();
-        return client.agentEvalExternal.datasetGet.query({ datasetId: options.datasetId });
-      }),
-    );
+  // ============================================
+  // Run-Topic Operations (external eval API)
+  // ============================================
+  const runTopicCmd = evalCmd.command('run-topic').description('Manage evaluation run topics');

-  evalCmd
-    .command('run-topics')
-    .description('Manage run topics')
+  runTopicCmd
    .command('list')
    .description('List topics in a run')
    .requiredOption('--run-id <id>', 'Run ID')
    .option('--only-external', 'Only return topics pending external evaluation')
    .option('--json', 'Output JSON envelope')
-    .action(async (options: RunTopicsListOptions) =>
+    .action(async (options: JsonOption & { onlyExternal?: boolean; runId: string }) =>
      executeCommand(options, async () => {
        const client = await getTrpcClient();
        return client.agentEvalExternal.runTopicsList.query({
@@ -248,55 +730,7 @@ export function registerEvalCommand(program: Command) {
      }),
    );

-  evalCmd
-    .command('threads')
-    .description('Manage evaluation threads')
-    .command('list')
-    .description('List threads by topic')
-    .requiredOption('--topic-id <id>', 'Topic ID')
-    .option('--json', 'Output JSON envelope')
-    .action(async (options: ThreadsListOptions) =>
-      executeCommand(options, async () => {
-        const client = await getTrpcClient();
-        return client.agentEvalExternal.threadsList.query({ topicId: options.topicId });
-      }),
-    );
-
-  evalCmd
-    .command('messages')
-    .description('Manage evaluation messages')
-    .command('list')
-    .description('List messages by topic and optional thread')
-    .requiredOption('--topic-id <id>', 'Topic ID')
-    .option('--thread-id <id>', 'Thread ID')
-    .option('--json', 'Output JSON envelope')
-    .action(async (options: MessagesListOptions) =>
-      executeCommand(options, async () => {
-        const client = await getTrpcClient();
-        return client.agentEvalExternal.messagesList.query({
-          threadId: options.threadId,
-          topicId: options.topicId,
-        });
-      }),
-    );
-
-  evalCmd
-    .command('test-cases')
-    .description('Manage evaluation test cases')
-    .command('count')
-    .description('Count test cases by dataset')
-    .requiredOption('--dataset-id <id>', 'Dataset ID')
-    .option('--json', 'Output JSON envelope')
-    .action(async (options: TestCasesCountOptions) =>
-      executeCommand(options, async () => {
-        const client = await getTrpcClient();
-        return client.agentEvalExternal.testCasesCount.query({ datasetId: options.datasetId });
-      }),
-    );
-
-  evalCmd
-    .command('run-topic')
-    .description('Manage evaluation run-topic reporting')
+  runTopicCmd
    .command('report-result')
    .description('Report one evaluation result for a run topic')
    .requiredOption('--run-id <id>', 'Run ID')
@@ -306,21 +740,69 @@ export function registerEvalCommand(program: Command) {
    .requiredOption('--correct <boolean>', 'Whether the result is correct', parseBoolean)
    .requiredOption('--result-json <json>', 'Raw evaluation result JSON object', parseResultJson)
    .option('--json', 'Output JSON envelope')
-    .action(async (options: RunTopicReportResultOptions) =>
-      executeCommand(
-        options,
-        async () => {
-          const client = await getTrpcClient();
-          return client.agentEvalExternal.runTopicReportResult.mutate({
-            correct: options.correct,
-            result: options.resultJson,
-            runId: options.runId,
-            score: options.score,
-            threadId: options.threadId,
-            topicId: options.topicId,
-          });
+    .action(
+      async (
+        options: JsonOption & {
+          correct: boolean;
+          resultJson: Record<string, unknown>;
+          runId: string;
+          score: number;
+          threadId?: string;
+          topicId: string;
        },
-        `Reported result for topic ${pc.bold(options.topicId)}`,
-      ),
+      ) =>
+        executeCommand(
+          options,
+          async () => {
+            const client = await getTrpcClient();
+            return client.agentEvalExternal.runTopicReportResult.mutate({
+              correct: options.correct,
+              result: options.resultJson,
+              runId: options.runId,
+              score: options.score,
+              threadId: options.threadId,
+              topicId: options.topicId,
+            });
+          },
+          `Reported result for topic ${pc.bold(options.topicId)}`,
+        ),
+    );
+
+  // ============================================
+  // Eval Thread Operations (external eval API)
+  // ============================================
+  evalCmd
+    .command('thread')
+    .description('Manage evaluation threads')
+    .command('list')
+    .description('List threads by topic')
+    .requiredOption('--topic-id <id>', 'Topic ID')
+    .option('--json', 'Output JSON envelope')
+    .action(async (options: JsonOption & { topicId: string }) =>
+      executeCommand(options, async () => {
+        const client = await getTrpcClient();
+        return client.agentEvalExternal.threadsList.query({ topicId: options.topicId });
+      }),
+    );
+
+  // ============================================
+  // Eval Message Operations (external eval API)
+  // ============================================
+  evalCmd
+    .command('message')
+    .description('Manage evaluation messages')
+    .command('list')
+    .description('List messages by topic and optional thread')
+    .requiredOption('--topic-id <id>', 'Topic ID')
+    .option('--thread-id <id>', 'Thread ID')
+    .option('--json', 'Output JSON envelope')
+    .action(async (options: JsonOption & { threadId?: string; topicId: string }) =>
+      executeCommand(options, async () => {
+        const client = await getTrpcClient();
+        return client.agentEvalExternal.messagesList.query({
+          threadId: options.threadId,
+          topicId: options.topicId,
+        });
+      }),
    );
 }
--- a/apps/cli/src/commands/file.test.ts
+++ b/apps/cli/src/commands/file.test.ts
@@ -7,11 +7,15 @@ import { registerFileCommand } from './file';
 const { mockTrpcClient } = vi.hoisted(() => ({
  mockTrpcClient: {
    file: {
+      checkFileHash: { mutate: vi.fn() },
+      createFile: { mutate: vi.fn() },
      getFileItemById: { query: vi.fn() },
      getFiles: { query: vi.fn() },
+      getKnowledgeItems: { query: vi.fn() },
      recentFiles: { query: vi.fn() },
      removeFile: { mutate: vi.fn() },
      removeFiles: { mutate: vi.fn() },
+      updateFile: { mutate: vi.fn() },
    },
  },
 }));
@@ -152,6 +156,105 @@ describe('file command', () => {
    });
  });

+  describe('upload', () => {
+    it('should upload file by URL', async () => {
+      mockTrpcClient.file.checkFileHash.mutate.mockResolvedValue({ isExist: false });
+      mockTrpcClient.file.createFile.mutate.mockResolvedValue({
+        id: 'f-new',
+        url: 'https://cdn.example.com/f-new',
+      });
+
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'file',
+        'upload',
+        'https://example.com/doc.pdf',
+        '--hash',
+        'abc123',
+        '--name',
+        'doc.pdf',
+      ]);
+
+      expect(mockTrpcClient.file.checkFileHash.mutate).toHaveBeenCalledWith({ hash: 'abc123' });
+      expect(mockTrpcClient.file.createFile.mutate).toHaveBeenCalledWith(
+        expect.objectContaining({
+          url: 'https://example.com/doc.pdf',
+          name: 'doc.pdf',
+          hash: 'abc123',
+        }),
+      );
+      expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('File created'));
+    });
+
+    it('should skip upload when hash exists', async () => {
+      mockTrpcClient.file.checkFileHash.mutate.mockResolvedValue({ isExist: true });
+
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'file',
+        'upload',
+        'https://example.com/doc.pdf',
+        '--hash',
+        'abc123',
+      ]);
+
+      expect(mockTrpcClient.file.createFile.mutate).not.toHaveBeenCalled();
+      expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('already exists'));
+    });
+  });
+
+  describe('edit', () => {
+    it('should update file parent', async () => {
+      mockTrpcClient.file.updateFile.mutate.mockResolvedValue({ success: true });
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'file', 'edit', 'f1', '--parent-id', 'folder1']);
+
+      expect(mockTrpcClient.file.updateFile.mutate).toHaveBeenCalledWith({
+        id: 'f1',
+        parentId: 'folder1',
+      });
+      expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('Updated file'));
+    });
+
+    it('should error when no changes specified', async () => {
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'file', 'edit', 'f1']);
+
+      expect(log.error).toHaveBeenCalledWith(expect.stringContaining('No changes'));
+      expect(exitSpy).toHaveBeenCalledWith(1);
+    });
+  });
+
+  describe('kb-items', () => {
+    it('should list knowledge items for a file', async () => {
+      mockTrpcClient.file.getKnowledgeItems.query.mockResolvedValue({
+        items: [{ id: 'ki1', name: 'Item 1', type: 'chunk' }],
+      });
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'file', 'kb-items', 'f1']);
+
+      expect(mockTrpcClient.file.getKnowledgeItems.query).toHaveBeenCalledWith(
+        expect.objectContaining({ fileId: 'f1' }),
+      );
+      expect(consoleSpy).toHaveBeenCalledTimes(2);
+    });
+
+    it('should show empty message', async () => {
+      mockTrpcClient.file.getKnowledgeItems.query.mockResolvedValue({ items: [] });
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'file', 'kb-items', 'f1']);
+
+      expect(consoleSpy).toHaveBeenCalledWith('No knowledge items found.');
+    });
+  });
+
  describe('recent', () => {
    it('should list recent files', async () => {
      mockTrpcClient.file.recentFiles.query.mockResolvedValue([
--- a/apps/cli/src/commands/file.ts
+++ b/apps/cli/src/commands/file.ts
@@ -110,6 +110,117 @@ export function registerFileCommand(program: Command) {
      console.log(`${pc.green('✓')} Deleted ${ids.length} file(s)`);
    });

+  // ── upload ───────────────────────────────────────────
+
+  file
+    .command('upload <url>')
+    .description('Upload a file by URL (checks hash first)')
+    .option('--hash <hash>', 'File hash for deduplication check')
+    .option('--name <name>', 'File name')
+    .option('--type <type>', 'File MIME type')
+    .option('--size <size>', 'File size in bytes')
+    .option('--parent-id <id>', 'Parent folder ID')
+    .option('--json [fields]', 'Output JSON, optionally specify fields (comma-separated)')
+    .action(
+      async (
+        url: string,
+        options: {
+          hash?: string;
+          json?: string | boolean;
+          name?: string;
+          parentId?: string;
+          size?: string;
+          type?: string;
+        },
+      ) => {
+        const client = await getTrpcClient();
+
+        // Check hash first if provided
+        if (options.hash) {
+          const check = await client.file.checkFileHash.mutate({ hash: options.hash });
+          if ((check as any)?.isExist) {
+            console.log(`${pc.yellow('!')} File with this hash already exists.`);
+            if (options.json !== undefined) {
+              outputJson(check);
+            }
+            return;
+          }
+        }
+
+        const input: Record<string, any> = { url };
+        if (options.name) input.name = options.name;
+        if (options.type) input.fileType = options.type;
+        if (options.size) input.size = Number.parseInt(options.size, 10);
+        if (options.hash) input.hash = options.hash;
+        if (options.parentId) input.parentId = options.parentId;
+
+        const result = await client.file.createFile.mutate(input as any);
+
+        if (options.json !== undefined) {
+          const fields = typeof options.json === 'string' ? options.json : undefined;
+          outputJson(result, fields);
+          return;
+        }
+
+        const r = result as any;
+        console.log(`${pc.green('✓')} File created: ${pc.bold(r.id || '')}`);
+        if (r.url) console.log(`  URL: ${pc.dim(r.url)}`);
+      },
+    );
+
+  // ── edit ─────────────────────────────────────────────
+
+  file
+    .command('edit <id>')
+    .description('Update file info (e.g. move to folder)')
+    .option('--parent-id <id>', 'Move file to a folder (use "null" to unset)')
+    .action(async (id: string, options: { parentId?: string }) => {
+      if (!options.parentId) {
+        log.error('No changes specified. Use --parent-id.');
+        process.exit(1);
+      }
+
+      const client = await getTrpcClient();
+      const parentId = options.parentId === 'null' ? null : options.parentId;
+      await client.file.updateFile.mutate({ id, parentId } as any);
+      console.log(`${pc.green('✓')} Updated file ${pc.bold(id)}`);
+    });
+
+  // ── kb-items ────────────────────────────────────────
+
+  file
+    .command('kb-items <id>')
+    .description('View knowledge base items associated with a file')
+    .option('-L, --limit <n>', 'Maximum number of items', '30')
+    .option('--json [fields]', 'Output JSON, optionally specify fields (comma-separated)')
+    .action(async (id: string, options: { json?: string | boolean; limit?: string }) => {
+      const client = await getTrpcClient();
+      const input: any = { fileId: id };
+      if (options.limit) input.limit = Number.parseInt(options.limit, 10);
+
+      const result = await client.file.getKnowledgeItems.query(input);
+      const items = Array.isArray(result) ? result : ((result as any).items ?? []);
+
+      if (options.json !== undefined) {
+        const fields = typeof options.json === 'string' ? options.json : undefined;
+        outputJson(items, fields);
+        return;
+      }
+
+      if (items.length === 0) {
+        console.log('No knowledge items found.');
+        return;
+      }
+
+      const rows = items.map((item: any) => [
+        item.id || '',
+        truncate(item.name || item.text || '', 60),
+        item.type || '',
+      ]);
+
+      printTable(rows, ['ID', 'CONTENT', 'TYPE']);
+    });
+
  // ── recent ────────────────────────────────────────────

  file
--- a/apps/cli/src/commands/generate.test.ts
+++ b/apps/cli/src/commands/generate.test.ts
@@ -7,6 +7,7 @@ import { registerGenerateCommand } from './generate';
 const { mockTrpcClient } = vi.hoisted(() => ({
  mockTrpcClient: {
    generation: {
+      deleteGeneration: { mutate: vi.fn() },
      getGenerationStatus: { query: vi.fn() },
    },
    generationTopic: {
@@ -329,6 +330,20 @@ describe('generate command', () => {
    });
  });

+  describe('delete', () => {
+    it('should delete a generation with --yes', async () => {
+      mockTrpcClient.generation.deleteGeneration.mutate.mockResolvedValue({});
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'generate', 'delete', 'gen-1', '--yes']);
+
+      expect(mockTrpcClient.generation.deleteGeneration.mutate).toHaveBeenCalledWith({
+        generationId: 'gen-1',
+      });
+      expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('Deleted generation'));
+    });
+  });
+
  describe('status', () => {
    it('should show generation status', async () => {
      mockTrpcClient.generation.getGenerationStatus.query.mockResolvedValue({
--- a/apps/cli/src/commands/generate/index.ts
+++ b/apps/cli/src/commands/generate/index.ts
@@ -2,7 +2,7 @@ import type { Command } from 'commander';
 import pc from 'picocolors';

 import { getTrpcClient } from '../../api/client';
-import { outputJson, printTable, timeAgo, truncate } from '../../utils/format';
+import { confirm, outputJson, printTable, timeAgo, truncate } from '../../utils/format';
 import { registerAsrCommand } from './asr';
 import { registerImageCommand } from './image';
 import { registerTextCommand } from './text';
@@ -137,6 +137,25 @@ export function registerGenerateCommand(program: Command) {
      },
    );

+  // ── delete ─────────────────────────────────────────
+  generate
+    .command('delete <generationId>')
+    .description('Delete a generation record')
+    .option('--yes', 'Skip confirmation prompt')
+    .action(async (generationId: string, options: { yes?: boolean }) => {
+      if (!options.yes) {
+        const confirmed = await confirm('Are you sure you want to delete this generation?');
+        if (!confirmed) {
+          console.log('Cancelled.');
+          return;
+        }
+      }
+
+      const client = await getTrpcClient();
+      await client.generation.deleteGeneration.mutate({ generationId });
+      console.log(`${pc.green('✓')} Deleted generation ${pc.bold(generationId)}`);
+    });
+
  // ── list ────────────────────────────────────────────
  generate
    .command('list')
--- a/apps/cli/src/commands/model.test.ts
+++ b/apps/cli/src/commands/model.test.ts
@@ -8,6 +8,7 @@ const { mockTrpcClient } = vi.hoisted(() => ({
  mockTrpcClient: {
    aiModel: {
      batchToggleAiModels: { mutate: vi.fn() },
+      batchUpdateAiModels: { mutate: vi.fn() },
      clearModelsByProvider: { mutate: vi.fn() },
      clearRemoteModels: { mutate: vi.fn() },
      createAiModel: { mutate: vi.fn() },
@@ -16,6 +17,7 @@ const { mockTrpcClient } = vi.hoisted(() => ({
      removeAiModel: { mutate: vi.fn() },
      toggleModelEnabled: { mutate: vi.fn() },
      updateAiModel: { mutate: vi.fn() },
+      updateAiModelOrder: { mutate: vi.fn() },
    },
  },
 }));
@@ -279,6 +281,83 @@ describe('model command', () => {
    });
  });

+  describe('batch-update', () => {
+    it('should batch update models', async () => {
+      mockTrpcClient.aiModel.batchUpdateAiModels.mutate.mockResolvedValue({});
+
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'model',
+        'batch-update',
+        'openai',
+        '--models',
+        '[{"id":"gpt-4","displayName":"GPT-4 Updated"}]',
+      ]);
+
+      expect(mockTrpcClient.aiModel.batchUpdateAiModels.mutate).toHaveBeenCalledWith(
+        expect.objectContaining({
+          id: 'openai',
+          models: [{ id: 'gpt-4', displayName: 'GPT-4 Updated' }],
+        }),
+      );
+      expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('Batch updated'));
+    });
+
+    it('should reject invalid JSON', async () => {
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'model',
+        'batch-update',
+        'openai',
+        '--models',
+        'not-json',
+      ]);
+
+      expect(log.error).toHaveBeenCalledWith(expect.stringContaining('Invalid models JSON'));
+      expect(exitSpy).toHaveBeenCalledWith(1);
+    });
+  });
+
+  describe('sort', () => {
+    it('should update model sort order', async () => {
+      mockTrpcClient.aiModel.updateAiModelOrder.mutate.mockResolvedValue({});
+
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'model',
+        'sort',
+        'openai',
+        '--sort-map',
+        '[{"id":"gpt-4","sort":0},{"id":"gpt-3.5","sort":1}]',
+      ]);
+
+      expect(mockTrpcClient.aiModel.updateAiModelOrder.mutate).toHaveBeenCalledWith(
+        expect.objectContaining({
+          providerId: 'openai',
+          sortMap: [
+            { id: 'gpt-4', sort: 0 },
+            { id: 'gpt-3.5', sort: 1 },
+          ],
+        }),
+      );
+      expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('Updated sort order'));
+    });
+
+    it('should reject invalid JSON', async () => {
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'model', 'sort', 'openai', '--sort-map', '{bad}']);
+
+      expect(log.error).toHaveBeenCalledWith(expect.stringContaining('Invalid sort-map JSON'));
+      expect(exitSpy).toHaveBeenCalledWith(1);
+    });
+  });
+
  describe('clear', () => {
    it('should clear all models for provider', async () => {
      mockTrpcClient.aiModel.clearModelsByProvider.mutate.mockResolvedValue({});
--- a/apps/cli/src/commands/model.ts
+++ b/apps/cli/src/commands/model.ts
@@ -228,6 +228,64 @@ export function registerModelCommand(program: Command) {
      },
    );

+  // ── batch-update ──────────────────────────────────────
+
+  model
+    .command('batch-update <providerId>')
+    .description('Batch update models for a provider')
+    .requiredOption('--models <json>', 'JSON array of model objects')
+    .action(async (providerId: string, options: { models: string }) => {
+      let models: any[];
+      try {
+        models = JSON.parse(options.models);
+      } catch {
+        log.error('Invalid models JSON. Provide a JSON array.');
+        process.exit(1);
+        return;
+      }
+
+      if (!Array.isArray(models)) {
+        log.error('--models must be a JSON array.');
+        process.exit(1);
+        return;
+      }
+
+      const client = await getTrpcClient();
+      await client.aiModel.batchUpdateAiModels.mutate({ id: providerId, models } as any);
+      console.log(
+        `${pc.green('✓')} Batch updated ${models.length} model(s) for provider ${pc.bold(providerId)}`,
+      );
+    });
+
+  // ── sort ──────────────────────────────────────────────
+
+  model
+    .command('sort <providerId>')
+    .description('Update model sort order')
+    .requiredOption('--sort-map <json>', 'JSON array of {id, sort, type?} objects')
+    .action(async (providerId: string, options: { sortMap: string }) => {
+      let sortMap: any[];
+      try {
+        sortMap = JSON.parse(options.sortMap);
+      } catch {
+        log.error('Invalid sort-map JSON. Provide a JSON array.');
+        process.exit(1);
+        return;
+      }
+
+      if (!Array.isArray(sortMap)) {
+        log.error('--sort-map must be a JSON array.');
+        process.exit(1);
+        return;
+      }
+
+      const client = await getTrpcClient();
+      await client.aiModel.updateAiModelOrder.mutate({ providerId, sortMap } as any);
+      console.log(
+        `${pc.green('✓')} Updated sort order for ${sortMap.length} model(s) in provider ${pc.bold(providerId)}`,
+      );
+    });
+
  // ── clear ───────────────────────────────────────────

  model
--- a/apps/cli/src/commands/plugin.test.ts
+++ b/apps/cli/src/commands/plugin.test.ts
@@ -8,6 +8,7 @@ const { mockTrpcClient } = vi.hoisted(() => ({
  mockTrpcClient: {
    plugin: {
      createOrInstallPlugin: { mutate: vi.fn() },
+      createPlugin: { mutate: vi.fn() },
      getPlugins: { query: vi.fn() },
      removePlugin: { mutate: vi.fn() },
      updatePlugin: { mutate: vi.fn() },
@@ -75,6 +76,50 @@ describe('plugin command', () => {
    });
  });

+  describe('create', () => {
+    it('should create a plugin', async () => {
+      mockTrpcClient.plugin.createPlugin.mutate.mockResolvedValue({ identifier: 'my-plugin' });
+
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'plugin',
+        'create',
+        '-i',
+        'my-plugin',
+        '--manifest',
+        '{"name":"test"}',
+      ]);
+
+      expect(mockTrpcClient.plugin.createPlugin.mutate).toHaveBeenCalledWith(
+        expect.objectContaining({
+          identifier: 'my-plugin',
+          manifest: { name: 'test' },
+          type: 'plugin',
+        }),
+      );
+      expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('Created plugin'));
+    });
+
+    it('should reject invalid manifest JSON', async () => {
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'plugin',
+        'create',
+        '-i',
+        'my-plugin',
+        '--manifest',
+        'not-json',
+      ]);
+
+      expect(log.error).toHaveBeenCalledWith('Invalid manifest JSON.');
+      expect(exitSpy).toHaveBeenCalledWith(1);
+    });
+  });
+
  describe('install', () => {
    it('should install a plugin', async () => {
      mockTrpcClient.plugin.createOrInstallPlugin.mutate.mockResolvedValue({});
--- a/apps/cli/src/commands/plugin.ts
+++ b/apps/cli/src/commands/plugin.ts
@@ -40,6 +40,56 @@ export function registerPluginCommand(program: Command) {
      printTable(rows, ['ID', 'IDENTIFIER', 'TYPE', 'TITLE']);
    });

+  // ── create ──────────────────────────────────────────
+
+  plugin
+    .command('create')
+    .description('Create a new plugin (without settings)')
+    .requiredOption('-i, --identifier <id>', 'Plugin identifier')
+    .requiredOption('--manifest <json>', 'Plugin manifest JSON')
+    .option('--type <type>', 'Plugin type: plugin or customPlugin', 'plugin')
+    .option('--custom-params <json>', 'Custom parameters JSON')
+    .action(
+      async (options: {
+        customParams?: string;
+        identifier: string;
+        manifest: string;
+        type: string;
+      }) => {
+        let manifest: any;
+        let customParams: any = {};
+        try {
+          manifest = JSON.parse(options.manifest);
+        } catch {
+          log.error('Invalid manifest JSON.');
+          process.exit(1);
+          return;
+        }
+        if (options.customParams) {
+          try {
+            customParams = JSON.parse(options.customParams);
+          } catch {
+            log.error('Invalid custom-params JSON.');
+            process.exit(1);
+            return;
+          }
+        }
+
+        const client = await getTrpcClient();
+        const result = await client.plugin.createPlugin.mutate({
+          customParams,
+          identifier: options.identifier,
+          manifest,
+          type: options.type as 'plugin' | 'customPlugin',
+        });
+
+        const r = result as any;
+        console.log(
+          `${pc.green('✓')} Created plugin ${pc.bold(r.identifier || options.identifier)}`,
+        );
+      },
+    );
+
  // ── install ───────────────────────────────────────────

  plugin
--- a/apps/cli/src/commands/thread.test.ts
+++ b/apps/cli/src/commands/thread.test.ts
@@ -0,0 +1,121 @@
+import { Command } from 'commander';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+import { registerThreadCommand } from './thread';
+
+const { mockTrpcClient } = vi.hoisted(() => ({
+  mockTrpcClient: {
+    thread: {
+      getThread: { query: vi.fn() },
+      getThreads: { query: vi.fn() },
+      removeThread: { mutate: vi.fn() },
+    },
+  },
+}));
+
+const { getTrpcClient: mockGetTrpcClient } = vi.hoisted(() => ({
+  getTrpcClient: vi.fn(),
+}));
+
+vi.mock('../api/client', () => ({ getTrpcClient: mockGetTrpcClient }));
+vi.mock('../utils/logger', () => ({
+  log: { debug: vi.fn(), error: vi.fn(), info: vi.fn(), warn: vi.fn() },
+  setVerbose: vi.fn(),
+}));
+
+describe('thread command', () => {
+  let exitSpy: ReturnType<typeof vi.spyOn>;
+  let consoleSpy: ReturnType<typeof vi.spyOn>;
+
+  beforeEach(() => {
+    exitSpy = vi.spyOn(process, 'exit').mockImplementation((() => {}) as any);
+    consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {});
+    mockGetTrpcClient.mockResolvedValue(mockTrpcClient);
+    for (const method of Object.values(mockTrpcClient.thread)) {
+      for (const fn of Object.values(method)) {
+        (fn as ReturnType<typeof vi.fn>).mockReset();
+      }
+    }
+  });
+
+  afterEach(() => {
+    exitSpy.mockRestore();
+    consoleSpy.mockRestore();
+  });
+
+  function createProgram() {
+    const program = new Command();
+    program.exitOverride();
+    registerThreadCommand(program);
+    return program;
+  }
+
+  describe('list', () => {
+    it('should list threads by topic', async () => {
+      mockTrpcClient.thread.getThreads.query.mockResolvedValue([
+        { id: 't1', title: 'Thread 1', type: 'standalone' },
+      ]);
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'thread', 'list', '--topic-id', 'topic1']);
+
+      expect(mockTrpcClient.thread.getThreads.query).toHaveBeenCalledWith({ topicId: 'topic1' });
+    });
+
+    it('should show empty message when no threads', async () => {
+      mockTrpcClient.thread.getThreads.query.mockResolvedValue([]);
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'thread', 'list', '--topic-id', 'topic1']);
+
+      expect(consoleSpy).toHaveBeenCalledWith('No threads found.');
+    });
+  });
+
+  describe('list-all', () => {
+    it('should list all threads', async () => {
+      mockTrpcClient.thread.getThread.query.mockResolvedValue([
+        { id: 't1', title: 'Thread 1', type: 'standalone' },
+      ]);
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'thread', 'list-all']);
+
+      expect(mockTrpcClient.thread.getThread.query).toHaveBeenCalled();
+    });
+  });
+
+  describe('delete', () => {
+    it('should delete a thread', async () => {
+      mockTrpcClient.thread.removeThread.mutate.mockResolvedValue({});
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'thread', 'delete', 't1', '--yes']);
+
+      expect(mockTrpcClient.thread.removeThread.mutate).toHaveBeenCalledWith({
+        id: 't1',
+        removeChildren: undefined,
+      });
+    });
+
+    it('should delete with remove-children flag', async () => {
+      mockTrpcClient.thread.removeThread.mutate.mockResolvedValue({});
+
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'thread',
+        'delete',
+        't1',
+        '--remove-children',
+        '--yes',
+      ]);
+
+      expect(mockTrpcClient.thread.removeThread.mutate).toHaveBeenCalledWith({
+        id: 't1',
+        removeChildren: true,
+      });
+    });
+  });
+});
--- a/apps/cli/src/commands/thread.ts
+++ b/apps/cli/src/commands/thread.ts
@@ -0,0 +1,99 @@
+import type { Command } from 'commander';
+import pc from 'picocolors';
+
+import { getTrpcClient } from '../api/client';
+import { confirm, outputJson, printTable, timeAgo, truncate } from '../utils/format';
+
+export function registerThreadCommand(program: Command) {
+  const thread = program.command('thread').description('Manage message threads');
+
+  // ── list ──────────────────────────────────────────────
+
+  thread
+    .command('list')
+    .description('List threads by topic')
+    .requiredOption('--topic-id <id>', 'Topic ID')
+    .option('--json [fields]', 'Output JSON, optionally specify fields (comma-separated)')
+    .action(async (options: { json?: string | boolean; topicId: string }) => {
+      const client = await getTrpcClient();
+      const result = await client.thread.getThreads.query({ topicId: options.topicId });
+      const items = Array.isArray(result) ? result : [];
+
+      if (options.json !== undefined) {
+        const fields = typeof options.json === 'string' ? options.json : undefined;
+        outputJson(items, fields);
+        return;
+      }
+
+      if (items.length === 0) {
+        console.log('No threads found.');
+        return;
+      }
+
+      const rows = items.map((t: any) => [
+        t.id || '',
+        truncate(t.title || 'Untitled', 50),
+        t.type || '',
+        t.updatedAt ? timeAgo(t.updatedAt) : '',
+      ]);
+
+      printTable(rows, ['ID', 'TITLE', 'TYPE', 'UPDATED']);
+    });
+
+  // ── list-all ──────────────────────────────────────────
+
+  thread
+    .command('list-all')
+    .description('List all threads for the current user')
+    .option('--json [fields]', 'Output JSON, optionally specify fields (comma-separated)')
+    .action(async (options: { json?: string | boolean }) => {
+      const client = await getTrpcClient();
+      const result = await client.thread.getThread.query();
+      const items = Array.isArray(result) ? result : [];
+
+      if (options.json !== undefined) {
+        const fields = typeof options.json === 'string' ? options.json : undefined;
+        outputJson(items, fields);
+        return;
+      }
+
+      if (items.length === 0) {
+        console.log('No threads found.');
+        return;
+      }
+
+      const rows = items.map((t: any) => [
+        t.id || '',
+        truncate(t.title || 'Untitled', 50),
+        t.type || '',
+        t.topicId || '',
+        t.updatedAt ? timeAgo(t.updatedAt) : '',
+      ]);
+
+      printTable(rows, ['ID', 'TITLE', 'TYPE', 'TOPIC', 'UPDATED']);
+    });
+
+  // ── delete ────────────────────────────────────────────
+
+  thread
+    .command('delete <id>')
+    .description('Delete a thread')
+    .option('--remove-children', 'Also remove child messages')
+    .option('--yes', 'Skip confirmation prompt')
+    .action(async (id: string, options: { removeChildren?: boolean; yes?: boolean }) => {
+      if (!options.yes) {
+        const confirmed = await confirm('Are you sure you want to delete this thread?');
+        if (!confirmed) {
+          console.log('Cancelled.');
+          return;
+        }
+      }
+
+      const client = await getTrpcClient();
+      await client.thread.removeThread.mutate({
+        id,
+        removeChildren: options.removeChildren,
+      });
+      console.log(`${pc.green('✓')} Deleted thread ${pc.bold(id)}`);
+    });
+}
--- a/apps/cli/src/commands/user.test.ts
+++ b/apps/cli/src/commands/user.test.ts
@@ -0,0 +1,191 @@
+import { Command } from 'commander';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+import { log } from '../utils/logger';
+import { registerUserCommand } from './user';
+
+const { mockTrpcClient } = vi.hoisted(() => ({
+  mockTrpcClient: {
+    user: {
+      getUserRegistrationDuration: { query: vi.fn() },
+      updateAvatar: { mutate: vi.fn() },
+      updateFullName: { mutate: vi.fn() },
+      updatePreference: { mutate: vi.fn() },
+      updateSettings: { mutate: vi.fn() },
+      updateUsername: { mutate: vi.fn() },
+    },
+  },
+}));
+
+const { getTrpcClient: mockGetTrpcClient } = vi.hoisted(() => ({
+  getTrpcClient: vi.fn(),
+}));
+
+vi.mock('../api/client', () => ({ getTrpcClient: mockGetTrpcClient }));
+vi.mock('../utils/logger', () => ({
+  log: { debug: vi.fn(), error: vi.fn(), info: vi.fn(), warn: vi.fn() },
+  setVerbose: vi.fn(),
+}));
+
+describe('user command', () => {
+  let exitSpy: ReturnType<typeof vi.spyOn>;
+  let consoleSpy: ReturnType<typeof vi.spyOn>;
+
+  beforeEach(() => {
+    exitSpy = vi.spyOn(process, 'exit').mockImplementation((() => {}) as any);
+    consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => {});
+    mockGetTrpcClient.mockResolvedValue(mockTrpcClient);
+    for (const method of Object.values(mockTrpcClient.user)) {
+      for (const fn of Object.values(method)) {
+        (fn as ReturnType<typeof vi.fn>).mockReset();
+      }
+    }
+  });
+
+  afterEach(() => {
+    exitSpy.mockRestore();
+    consoleSpy.mockRestore();
+  });
+
+  function createProgram() {
+    const program = new Command();
+    program.exitOverride();
+    registerUserCommand(program);
+    return program;
+  }
+
+  describe('info', () => {
+    it('should display registration duration', async () => {
+      const durationMs = 30 * 24 * 60 * 60 * 1000; // 30 days
+      mockTrpcClient.user.getUserRegistrationDuration.query.mockResolvedValue(durationMs);
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'user', 'info']);
+
+      expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('30'));
+    });
+
+    it('should output JSON', async () => {
+      mockTrpcClient.user.getUserRegistrationDuration.query.mockResolvedValue(86400000);
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'user', 'info', '--json']);
+
+      expect(consoleSpy).toHaveBeenCalledWith(JSON.stringify(86400000, null, 2));
+    });
+  });
+
+  describe('settings', () => {
+    it('should update settings', async () => {
+      mockTrpcClient.user.updateSettings.mutate.mockResolvedValue({});
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'user', 'settings', '--data', '{"language":"en"}']);
+
+      expect(mockTrpcClient.user.updateSettings.mutate).toHaveBeenCalledWith({ language: 'en' });
+      expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('Settings updated'));
+    });
+
+    it('should reject invalid JSON', async () => {
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'user', 'settings', '--data', 'not-json']);
+
+      expect(log.error).toHaveBeenCalledWith('Invalid settings JSON.');
+      expect(exitSpy).toHaveBeenCalledWith(1);
+    });
+  });
+
+  describe('preferences', () => {
+    it('should update preferences', async () => {
+      mockTrpcClient.user.updatePreference.mutate.mockResolvedValue({});
+
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'user',
+        'preferences',
+        '--data',
+        '{"theme":"dark"}',
+      ]);
+
+      expect(mockTrpcClient.user.updatePreference.mutate).toHaveBeenCalledWith({ theme: 'dark' });
+      expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('Preferences updated'));
+    });
+
+    it('should reject invalid JSON', async () => {
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'user', 'preferences', '--data', '{bad}']);
+
+      expect(log.error).toHaveBeenCalledWith('Invalid preferences JSON.');
+      expect(exitSpy).toHaveBeenCalledWith(1);
+    });
+  });
+
+  describe('update-avatar', () => {
+    it('should update avatar', async () => {
+      mockTrpcClient.user.updateAvatar.mutate.mockResolvedValue({ avatar: 'new-url' });
+
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'user',
+        'update-avatar',
+        'https://example.com/avatar.png',
+      ]);
+
+      expect(mockTrpcClient.user.updateAvatar.mutate).toHaveBeenCalledWith(
+        'https://example.com/avatar.png',
+      );
+      expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('Avatar updated'));
+    });
+
+    it('should output JSON', async () => {
+      const result = { avatar: 'new-url' };
+      mockTrpcClient.user.updateAvatar.mutate.mockResolvedValue(result);
+
+      const program = createProgram();
+      await program.parseAsync([
+        'node',
+        'test',
+        'user',
+        'update-avatar',
+        'https://example.com/avatar.png',
+        '--json',
+      ]);
+
+      expect(consoleSpy).toHaveBeenCalledWith(JSON.stringify(result, null, 2));
+    });
+  });
+
+  describe('update-name', () => {
+    it('should update full name', async () => {
+      mockTrpcClient.user.updateFullName.mutate.mockResolvedValue({});
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'user', 'update-name', '--full-name', 'John Doe']);
+
+      expect(mockTrpcClient.user.updateFullName.mutate).toHaveBeenCalledWith('John Doe');
+      expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('Full name updated'));
+    });
+
+    it('should update username', async () => {
+      mockTrpcClient.user.updateUsername.mutate.mockResolvedValue({});
+
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'user', 'update-name', '--username', 'johndoe']);
+
+      expect(mockTrpcClient.user.updateUsername.mutate).toHaveBeenCalledWith('johndoe');
+      expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('Username updated'));
+    });
+
+    it('should error when no changes specified', async () => {
+      const program = createProgram();
+      await program.parseAsync(['node', 'test', 'user', 'update-name']);
+
+      expect(log.error).toHaveBeenCalledWith(expect.stringContaining('No changes'));
+      expect(exitSpy).toHaveBeenCalledWith(1);
+    });
+  });
+});
--- a/apps/cli/src/commands/user.ts
+++ b/apps/cli/src/commands/user.ts
@@ -0,0 +1,123 @@
+import type { Command } from 'commander';
+import pc from 'picocolors';
+
+import { getTrpcClient } from '../api/client';
+import { outputJson } from '../utils/format';
+import { log } from '../utils/logger';
+
+export function registerUserCommand(program: Command) {
+  const user = program.command('user').description('Manage user account and settings');
+
+  // ── info ──────────────────────────────────────────────
+
+  user
+    .command('info')
+    .description('View user registration info')
+    .option('--json [fields]', 'Output JSON, optionally specify fields (comma-separated)')
+    .action(async (options: { json?: string | boolean }) => {
+      const client = await getTrpcClient();
+      const result = await client.user.getUserRegistrationDuration.query();
+
+      if (options.json !== undefined) {
+        const fields = typeof options.json === 'string' ? options.json : undefined;
+        outputJson(result, fields);
+        return;
+      }
+
+      const r = result as any;
+      if (typeof r === 'number') {
+        const days = Math.floor(r / (1000 * 60 * 60 * 24));
+        console.log(`Registered for ${pc.bold(String(days))} day(s).`);
+      } else {
+        console.log(JSON.stringify(result, null, 2));
+      }
+    });
+
+  // ── settings ──────────────────────────────────────────
+
+  user
+    .command('settings')
+    .description('Update user settings')
+    .requiredOption('--data <json>', 'Settings JSON')
+    .action(async (options: { data: string }) => {
+      let data: any;
+      try {
+        data = JSON.parse(options.data);
+      } catch {
+        log.error('Invalid settings JSON.');
+        process.exit(1);
+        return;
+      }
+
+      const client = await getTrpcClient();
+      await client.user.updateSettings.mutate(data);
+      console.log(`${pc.green('✓')} Settings updated.`);
+    });
+
+  // ── preferences ───────────────────────────────────────
+
+  user
+    .command('preferences')
+    .description('Update user preferences')
+    .requiredOption('--data <json>', 'Preferences JSON')
+    .action(async (options: { data: string }) => {
+      let data: any;
+      try {
+        data = JSON.parse(options.data);
+      } catch {
+        log.error('Invalid preferences JSON.');
+        process.exit(1);
+        return;
+      }
+
+      const client = await getTrpcClient();
+      await client.user.updatePreference.mutate(data);
+      console.log(`${pc.green('✓')} Preferences updated.`);
+    });
+
+  // ── update-avatar ─────────────────────────────────────
+
+  user
+    .command('update-avatar <url>')
+    .description('Update user avatar (URL or Base64)')
+    .option('--json [fields]', 'Output JSON, optionally specify fields (comma-separated)')
+    .action(async (url: string, options: { json?: string | boolean }) => {
+      const client = await getTrpcClient();
+      const result = await client.user.updateAvatar.mutate(url);
+
+      if (options.json !== undefined) {
+        const fields = typeof options.json === 'string' ? options.json : undefined;
+        outputJson(result, fields);
+        return;
+      }
+
+      console.log(`${pc.green('✓')} Avatar updated.`);
+    });
+
+  // ── update-name ───────────────────────────────────────
+
+  user
+    .command('update-name')
+    .description('Update user full name or username')
+    .option('--full-name <name>', 'Update full name (max 64 chars)')
+    .option('--username <name>', 'Update username (alphanumeric + underscore)')
+    .action(async (options: { fullName?: string; username?: string }) => {
+      if (!options.fullName && !options.username) {
+        log.error('No changes specified. Use --full-name or --username.');
+        process.exit(1);
+        return;
+      }
+
+      const client = await getTrpcClient();
+
+      if (options.fullName) {
+        await client.user.updateFullName.mutate(options.fullName);
+        console.log(`${pc.green('✓')} Full name updated to ${pc.bold(options.fullName)}`);
+      }
+
+      if (options.username) {
+        await client.user.updateUsername.mutate(options.username);
+        console.log(`${pc.green('✓')} Username updated to ${pc.bold(options.username)}`);
+      }
+    });
+}
--- a/apps/cli/src/index.ts
+++ b/apps/cli/src/index.ts
@@ -25,7 +25,9 @@ import { registerSearchCommand } from './commands/search';
 import { registerSessionGroupCommand } from './commands/session-group';
 import { registerSkillCommand } from './commands/skill';
 import { registerStatusCommand } from './commands/status';
+import { registerThreadCommand } from './commands/thread';
 import { registerTopicCommand } from './commands/topic';
+import { registerUserCommand } from './commands/user';

 const require = createRequire(import.meta.url);
 const { version } = require('../package.json');
@@ -54,11 +56,13 @@ registerGenerateCommand(program);
 registerFileCommand(program);
 registerSkillCommand(program);
 registerSessionGroupCommand(program);
+registerThreadCommand(program);
 registerTopicCommand(program);
 registerMessageCommand(program);
 registerModelCommand(program);
 registerProviderCommand(program);
 registerPluginCommand(program);
+registerUserCommand(program);
 registerConfigCommand(program);
 registerEvalCommand(program);