👷 build: Update schema for incoming folder (#10217)

* feat: Update schema

* fix: Circular deps

* feat: Add more validate

* fix: Vercel build error

* fix: Duplicated import

* fix: Circular deps

* feat: Set varchar from 30 to 255

* feat: Regenerate migration file

* feat: Regenerate migration

* feat: Regenerate migration
This commit is contained in:
René Wang
2025-11-18 16:42:13 +08:00
committed by GitHub
parent 5889e8e85c
commit 10e44dfb6b
16 changed files with 8101 additions and 167 deletions

View File

@@ -170,20 +170,8 @@ table chat_groups_agents {
}
}
table document_chunks {
document_id varchar(30) [not null]
chunk_id uuid [not null]
page_index integer
user_id text [not null]
created_at "timestamp with time zone" [not null, default: `now()`]
indexes {
(document_id, chunk_id) [pk]
}
}
table documents {
id varchar(30) [pk, not null]
id varchar(255) [pk, not null]
title text
content text
file_type varchar(255) [not null]
@@ -195,6 +183,7 @@ table documents {
source_type text [not null]
source text [not null]
file_id text
parent_id varchar(255)
user_id text [not null]
client_id text
editor_data jsonb
@@ -206,6 +195,7 @@ table documents {
source [name: 'documents_source_idx']
file_type [name: 'documents_file_type_idx']
file_id [name: 'documents_file_id_idx']
parent_id [name: 'documents_parent_id_idx']
(client_id, user_id) [name: 'documents_client_id_user_id_unique', unique]
}
}
@@ -219,6 +209,7 @@ table files {
size integer [not null]
url text [not null]
source text
parent_id varchar(255)
client_id text
metadata jsonb
chunk_task_id uuid
@@ -229,6 +220,7 @@ table files {
indexes {
file_hash [name: 'file_hash_idx']
parent_id [name: 'files_parent_id_idx']
(client_id, user_id) [name: 'files_client_id_user_id_unique', unique]
}
}
@@ -660,6 +652,18 @@ table chunks {
}
}
table document_chunks {
document_id varchar(30) [not null]
chunk_id uuid [not null]
page_index integer
user_id text [not null]
created_at "timestamp with time zone" [not null, default: `now()`]
indexes {
(document_id, chunk_id) [pk]
}
}
table embeddings {
id uuid [pk, not null, default: `gen_random_uuid()`]
chunk_id uuid [unique]

View File

@@ -0,0 +1,15 @@
ALTER TABLE "documents" ALTER COLUMN "id" SET DATA TYPE varchar(255);--> statement-breakpoint
ALTER TABLE "documents" ADD COLUMN IF NOT EXISTS "parent_id" varchar(255);--> statement-breakpoint
ALTER TABLE "files" ADD COLUMN IF NOT EXISTS "parent_id" varchar(255);--> statement-breakpoint
DO $$ BEGIN
ALTER TABLE "documents" ADD CONSTRAINT "documents_parent_id_documents_id_fk" FOREIGN KEY ("parent_id") REFERENCES "public"."documents"("id") ON DELETE set null ON UPDATE no action;
EXCEPTION
WHEN duplicate_object THEN null;
END $$;--> statement-breakpoint
DO $$ BEGIN
ALTER TABLE "files" ADD CONSTRAINT "files_parent_id_documents_id_fk" FOREIGN KEY ("parent_id") REFERENCES "public"."documents"("id") ON DELETE set null ON UPDATE no action;
EXCEPTION
WHEN duplicate_object THEN null;
END $$;--> statement-breakpoint
CREATE INDEX IF NOT EXISTS "documents_parent_id_idx" ON "documents" USING btree ("parent_id");--> statement-breakpoint
CREATE INDEX IF NOT EXISTS "files_parent_id_idx" ON "files" USING btree ("parent_id");

File diff suppressed because it is too large Load Diff

View File

@@ -322,6 +322,13 @@
"when": 1762911968658,
"tag": "0045_add_tool_intervention",
"breakpoints": true
},
{
"idx": 46,
"version": "7",
"when": 1763453175961,
"tag": "0046_add_parent_id",
"breakpoints": true
}
],
"version": "6"

View File

@@ -223,10 +223,7 @@
"hash": "9646161fa041354714f823d726af27247bcd6e60fa3be5698c0d69f337a5700b"
},
{
"sql": [
"DROP TABLE \"user_budgets\";",
"\nDROP TABLE \"user_subscriptions\";"
],
"sql": ["DROP TABLE \"user_budgets\";", "\nDROP TABLE \"user_subscriptions\";"],
"bps": true,
"folderMillis": 1729699958471,
"hash": "7dad43a2a25d1aec82124a4e53f8d82f8505c3073f23606c1dc5d2a4598eacf9"
@@ -298,9 +295,7 @@
"hash": "845a692ceabbfc3caf252a97d3e19a213bc0c433df2689900135f9cfded2cf49"
},
{
"sql": [
"ALTER TABLE \"messages\" ADD COLUMN \"reasoning\" jsonb;"
],
"sql": ["ALTER TABLE \"messages\" ADD COLUMN \"reasoning\" jsonb;"],
"bps": true,
"folderMillis": 1737609172353,
"hash": "2cb36ae4fcdd7b7064767e04bfbb36ae34518ff4bb1b39006f2dd394d1893868"
@@ -515,9 +510,7 @@
"hash": "a7ccf007fd185ff922823148d1eae6fafe652fc98d2fd2793f84a84f29e93cd1"
},
{
"sql": [
"ALTER TABLE \"ai_providers\" ADD COLUMN \"config\" jsonb;"
],
"sql": ["ALTER TABLE \"ai_providers\" ADD COLUMN \"config\" jsonb;"],
"bps": true,
"folderMillis": 1749309388370,
"hash": "39cea379f08ee4cb944875c0b67f7791387b508c2d47958bb4cd501ed1ef33eb"
@@ -635,9 +628,7 @@
"hash": "1ba9b1f74ea13348da98d6fcdad7867ab4316ed565bf75d84d160c526cdac14b"
},
{
"sql": [
"ALTER TABLE \"agents\" ADD COLUMN IF NOT EXISTS \"virtual\" boolean DEFAULT false;"
],
"sql": ["ALTER TABLE \"agents\" ADD COLUMN IF NOT EXISTS \"virtual\" boolean DEFAULT false;"],
"bps": true,
"folderMillis": 1759116400580,
"hash": "433ddae88e785f2db734e49a4c115eee93e60afe389f7919d66e5ba9aa159a37"
@@ -687,17 +678,13 @@
"hash": "4bdc6505797d7a33b622498c138cfd47f637239f6905e1c484cd01d9d5f21d6b"
},
{
"sql": [
"ALTER TABLE \"user_settings\" ADD COLUMN IF NOT EXISTS \"image\" jsonb;"
],
"sql": ["ALTER TABLE \"user_settings\" ADD COLUMN IF NOT EXISTS \"image\" jsonb;"],
"bps": true,
"folderMillis": 1760108430562,
"hash": "ce09b301abb80f6563abc2f526bdd20b4f69bae430f09ba2179b9e3bfec43067"
},
{
"sql": [
"ALTER TABLE \"documents\" ADD COLUMN IF NOT EXISTS \"editor_data\" jsonb;"
],
"sql": ["ALTER TABLE \"documents\" ADD COLUMN IF NOT EXISTS \"editor_data\" jsonb;"],
"bps": true,
"folderMillis": 1761554153406,
"hash": "bf2f21293e90e11cf60a784cf3ec219eafa95f7545d7d2f9d1449c0b0949599a"
@@ -777,19 +764,29 @@
"hash": "923ccbdf46c32be9a981dabd348e6923b4a365444241e9b8cc174bf5b914cbc5"
},
{
"sql": [
"ALTER TABLE \"agents\" ADD COLUMN IF NOT EXISTS \"market_identifier\" text;\n"
],
"sql": ["ALTER TABLE \"agents\" ADD COLUMN IF NOT EXISTS \"market_identifier\" text;\n"],
"bps": true,
"folderMillis": 1762870034882,
"hash": "4178aacb4b8892b7fd15d29209bbf9b1d1f9d7c406ba796f27542c0bcd919680"
},
{
"sql": [
"ALTER TABLE \"message_plugins\" ADD COLUMN IF NOT EXISTS \"intervention\" jsonb;\n"
],
"sql": ["ALTER TABLE \"message_plugins\" ADD COLUMN IF NOT EXISTS \"intervention\" jsonb;\n"],
"bps": true,
"folderMillis": 1762911968658,
"hash": "552a032cc0e595277232e70b5f9338658585bafe9481ae8346a5f322b673a68b"
},
{
"sql": [
"ALTER TABLE \"documents\" ALTER COLUMN \"id\" SET DATA TYPE varchar(255);",
"\nALTER TABLE \"documents\" ADD COLUMN \"parent_id\" varchar(255);",
"\nALTER TABLE \"files\" ADD COLUMN \"parent_id\" varchar(255);",
"\nALTER TABLE \"documents\" ADD CONSTRAINT \"documents_parent_id_documents_id_fk\" FOREIGN KEY (\"parent_id\") REFERENCES \"public\".\"documents\"(\"id\") ON DELETE set null ON UPDATE no action;",
"\nALTER TABLE \"files\" ADD CONSTRAINT \"files_parent_id_documents_id_fk\" FOREIGN KEY (\"parent_id\") REFERENCES \"public\".\"documents\"(\"id\") ON DELETE set null ON UPDATE no action;",
"\nCREATE INDEX \"documents_parent_id_idx\" ON \"documents\" USING btree (\"parent_id\");",
"\nCREATE INDEX \"files_parent_id_idx\" ON \"files\" USING btree (\"parent_id\");"
],
"bps": true,
"folderMillis": 1763453175961,
"hash": "6cfc00744de6a8f4d60b793673911bb740f9a50661663e28b843e5adae08f94a"
}
]
]

View File

@@ -1,8 +1,7 @@
import { and, desc, eq } from 'drizzle-orm';
import { LobeChatDatabase } from '../type';
import { DocumentItem, NewDocument, documents } from '../schemas';
import { LobeChatDatabase } from '../type';
export class DocumentModel {
private userId: string;
@@ -13,13 +12,13 @@ export class DocumentModel {
this.db = db;
}
create = async (params: Omit<NewDocument, 'userId'>) => {
const [result] = await this.db
create = async (params: Omit<NewDocument, 'userId'>): Promise<DocumentItem> => {
const result = (await this.db
.insert(documents)
.values({ ...params, userId: this.userId })
.returning();
.returning()) as DocumentItem[];
return result;
return result[0]!;
};
delete = async (id: string) => {

View File

@@ -43,8 +43,8 @@ export class FileModel {
params: Omit<NewFile, 'id' | 'userId'> & { id?: string; knowledgeBaseId?: string },
insertToGlobalFiles?: boolean,
trx?: Transaction,
) => {
const executeInTransaction = async (tx: Transaction) => {
): Promise<{ id: string }> => {
const executeInTransaction = async (tx: Transaction): Promise<FileItem> => {
if (insertToGlobalFiles) {
await tx.insert(globalFiles).values({
creator: this.userId,
@@ -56,12 +56,12 @@ export class FileModel {
});
}
const result = await tx
const result = (await tx
.insert(files)
.values({ ...params, userId: this.userId })
.returning();
.returning()) as FileItem[];
const item = result[0];
const item = result[0]!;
if (params.knowledgeBaseId) {
await tx.insert(knowledgeBaseFiles).values({

View File

@@ -3,7 +3,7 @@ import { FilesTabs } from '@lobechat/types';
import { beforeEach, describe, expect, it } from 'vitest';
import { getTestDB } from '../../models/__tests__/_util';
import { NewDocument, documents } from '../../schemas/document';
import { NewDocument, documents } from '../../schemas/file';
import { NewFile, files } from '../../schemas/file';
import { users } from '../../schemas/user';
import { LobeChatDatabase } from '../../type';

View File

@@ -1,105 +0,0 @@
/* eslint-disable sort-keys-fix/sort-keys-fix */
import type { LobeDocumentPage } from '@lobechat/types';
import {
index,
integer,
jsonb,
pgTable,
primaryKey,
text,
uniqueIndex,
uuid,
varchar,
} from 'drizzle-orm/pg-core';
import { createInsertSchema } from 'drizzle-zod';
import { idGenerator } from '../utils/idGenerator';
import { createdAt, timestamps } from './_helpers';
import { files } from './file';
import { chunks } from './rag';
import { users } from './user';
/**
* 文档表 - 存储文件内容或网页搜索结果
*/
export const documents = pgTable(
'documents',
{
id: varchar('id', { length: 30 })
.$defaultFn(() => idGenerator('documents', 16))
.primaryKey(),
// 基本信息
title: text('title'),
content: text('content'),
fileType: varchar('file_type', { length: 255 }).notNull(),
filename: text('filename'),
// 统计信息
totalCharCount: integer('total_char_count').notNull(),
totalLineCount: integer('total_line_count').notNull(),
// 元数据
metadata: jsonb('metadata').$type<Record<string, any>>(),
// 页面/块数据
pages: jsonb('pages').$type<LobeDocumentPage[]>(),
// 来源类型
sourceType: text('source_type', { enum: ['file', 'web', 'api'] }).notNull(),
source: text('source').notNull(), // 文件路径或网页URL
// 关联文件(可选)
fileId: text('file_id').references(() => files.id, { onDelete: 'set null' }),
// 用户关联
userId: text('user_id')
.references(() => users.id, { onDelete: 'cascade' })
.notNull(),
clientId: text('client_id'),
editorData: jsonb('editor_data').$type<Record<string, any>>(),
// 时间戳
...timestamps,
},
(table) => [
index('documents_source_idx').on(table.source),
index('documents_file_type_idx').on(table.fileType),
index('documents_file_id_idx').on(table.fileId),
uniqueIndex('documents_client_id_user_id_unique').on(table.clientId, table.userId),
],
);
export type NewDocument = typeof documents.$inferInsert;
export type DocumentItem = typeof documents.$inferSelect;
export const insertDocumentSchema = createInsertSchema(documents);
/**
* 文档块表 - 将文档内容分割成块并关联到 chunks 表,用于向量检索
* 注意:此表可选,如果已经使用 pages 字段存储了文档块,可以不需要此表
*/
export const documentChunks = pgTable(
'document_chunks',
{
documentId: varchar('document_id', { length: 30 })
.references(() => documents.id, { onDelete: 'cascade' })
.notNull(),
chunkId: uuid('chunk_id')
.references(() => chunks.id, { onDelete: 'cascade' })
.notNull(),
pageIndex: integer('page_index'),
userId: text('user_id')
.references(() => users.id, { onDelete: 'cascade' })
.notNull(),
createdAt: createdAt(),
},
(t) => [primaryKey({ columns: [t.documentId, t.chunkId] })],
);
export type NewDocumentChunk = typeof documentChunks.$inferInsert;
export type DocumentChunkItem = typeof documentChunks.$inferSelect;

View File

@@ -1,5 +1,4 @@
/* eslint-disable sort-keys-fix/sort-keys-fix */
import { FileSource } from '@lobechat/types';
import {
boolean,
index,
@@ -14,6 +13,9 @@ import {
} from 'drizzle-orm/pg-core';
import { createInsertSchema } from 'drizzle-zod';
import { LobeDocumentPage } from '@/types/document';
import { FileSource } from '@/types/files';
import { idGenerator } from '../utils/idGenerator';
import { accessedAt, createdAt, timestamps } from './_helpers';
import { asyncTasks } from './asyncTask';
@@ -35,6 +37,77 @@ export const globalFiles = pgTable('global_files', {
export type NewGlobalFile = typeof globalFiles.$inferInsert;
export type GlobalFileItem = typeof globalFiles.$inferSelect;
/**
* 文档表 - 存储文件内容或网页搜索结果
*/
// @ts-ignore
export const documents = pgTable(
'documents',
{
id: varchar('id', { length: 255 })
.$defaultFn(() => idGenerator('documents', 16))
.primaryKey(),
// 基本信息
title: text('title'),
content: text('content'),
// Special type: custom/folder
fileType: varchar('file_type', { length: 255 }).notNull(),
filename: text('filename'),
// 统计信息
totalCharCount: integer('total_char_count').notNull(),
totalLineCount: integer('total_line_count').notNull(),
// 元数据
metadata: jsonb('metadata').$type<Record<string, any>>(),
// 页面/块数据
pages: jsonb('pages').$type<LobeDocumentPage[]>(),
// 来源类型
sourceType: text('source_type', { enum: ['file', 'web', 'api'] }).notNull(),
source: text('source').notNull(), // 文件路径或网页URL
// 关联文件(可选)
// Forward reference to files table defined below
// eslint-disable-next-line @typescript-eslint/no-use-before-define
// @ts-expect-error - files is defined later in this file, forward reference is valid at runtime
// eslint-disable-next-line @typescript-eslint/no-use-before-define
fileId: text('file_id').references(() => files.id, { onDelete: 'set null' }),
// 父文档(用于文件夹层级结构)
// @ts-ignore
parentId: varchar('parent_id', { length: 255 }).references(() => documents.id, {
onDelete: 'set null',
}),
// 用户关联
userId: text('user_id')
.references(() => users.id, { onDelete: 'cascade' })
.notNull(),
clientId: text('client_id'),
editorData: jsonb('editor_data').$type<Record<string, any>>(),
// 时间戳
...timestamps,
},
(table) => [
index('documents_source_idx').on(table.source),
index('documents_file_type_idx').on(table.fileType),
index('documents_file_id_idx').on(table.fileId),
index('documents_parent_id_idx').on(table.parentId),
uniqueIndex('documents_client_id_user_id_unique').on(table.clientId, table.userId),
],
);
export type NewDocument = typeof documents.$inferInsert;
export type DocumentItem = typeof documents.$inferSelect;
export const insertDocumentSchema = createInsertSchema(documents);
// @ts-ignore
export const files = pgTable(
'files',
{
@@ -60,6 +133,12 @@ export const files = pgTable(
url: text('url').notNull(),
source: text('source').$type<FileSource>(),
// 父文档(用于文件夹层级结构)
// @ts-ignore
parentId: varchar('parent_id', { length: 255 }).references(() => documents.id, {
onDelete: 'set null',
}),
clientId: text('client_id'),
metadata: jsonb('metadata'),
chunkTaskId: uuid('chunk_task_id').references(() => asyncTasks.id, { onDelete: 'set null' }),
@@ -72,6 +151,7 @@ export const files = pgTable(
(table) => {
return {
fileHashIdx: index('file_hash_idx').on(table.fileHash),
parentIdIdx: index('files_parent_id_idx').on(table.parentId),
clientIdUnique: uniqueIndex('files_client_id_user_id_unique').on(
table.clientId,
table.userId,

View File

@@ -3,7 +3,6 @@ export * from './aiInfra';
export * from './apiKey';
export * from './asyncTask';
export * from './chatGroup';
export * from './document';
export * from './file';
export * from './generation';
export * from './message';

View File

@@ -4,6 +4,7 @@ import {
integer,
jsonb,
pgTable,
primaryKey,
text,
uniqueIndex,
uuid,
@@ -11,8 +12,8 @@ import {
vector,
} from 'drizzle-orm/pg-core';
import { timestamps } from './_helpers';
import { files } from './file';
import { createdAt, timestamps } from './_helpers';
import { documents, files } from './file';
import { users } from './user';
export const chunks = pgTable(
@@ -86,3 +87,32 @@ export const embeddings = pgTable(
export type NewEmbeddingsItem = typeof embeddings.$inferInsert;
export type EmbeddingsSelectItem = typeof embeddings.$inferSelect;
/**
* 文档块表 - 将文档内容分割成块并关联到 chunks 表,用于向量检索
* 注意:此表可选,如果已经使用 pages 字段存储了文档块,可以不需要此表
*/
export const documentChunks = pgTable(
'document_chunks',
{
documentId: varchar('document_id', { length: 30 })
.references(() => documents.id, { onDelete: 'cascade' })
.notNull(),
chunkId: uuid('chunk_id')
.references(() => chunks.id, { onDelete: 'cascade' })
.notNull(),
pageIndex: integer('page_index'),
userId: text('user_id')
.references(() => users.id, { onDelete: 'cascade' })
.notNull(),
createdAt: createdAt(),
},
(t) => [primaryKey({ columns: [t.documentId, t.chunkId] })],
);
export type NewDocumentChunk = typeof documentChunks.$inferInsert;
export type DocumentChunkItem = typeof documentChunks.$inferSelect;

View File

@@ -6,11 +6,10 @@ import { createdAt } from './_helpers';
import { agents, agentsFiles, agentsKnowledgeBases } from './agent';
import { asyncTasks } from './asyncTask';
import { chatGroups, chatGroupsAgents } from './chatGroup';
import { documentChunks, documents } from './document';
import { files, knowledgeBases } from './file';
import { documents, files, knowledgeBases } from './file';
import { generationBatches, generationTopics, generations } from './generation';
import { messageGroups, messages, messagesFiles } from './message';
import { chunks, unstructuredChunks } from './rag';
import { chunks, documentChunks, unstructuredChunks } from './rag';
import { sessionGroups, sessions } from './session';
import { threads, topicDocuments, topics } from './topic';
import { users } from './user';

View File

@@ -6,7 +6,7 @@ import { createInsertSchema } from 'drizzle-zod';
import { idGenerator } from '../utils/idGenerator';
import { createdAt, timestamps, timestamptz } from './_helpers';
import { chatGroups } from './chatGroup';
import { documents } from './document';
import { documents } from './file';
import { sessions } from './session';
import { users } from './user';

View File

@@ -66,7 +66,23 @@ export const fileRouter = router({
const item = await ctx.fileModel.findById(input.id);
if (!item) throw new TRPCError({ code: 'BAD_REQUEST', message: 'File not found' });
return { ...item, url: await ctx.fileService.getFullFileUrl(item?.url) };
return {
chunkTaskId: item.chunkTaskId,
clientId: item.clientId,
createdAt: item.createdAt,
embeddingTaskId: item.embeddingTaskId,
fileHash: item.fileHash,
fileType: item.fileType,
id: item.id,
metadata: item.metadata,
name: item.name,
parentId: item.parentId,
size: item.size,
source: item.source,
updatedAt: item.updatedAt,
url: await ctx.fileService.getFullFileUrl(item.url),
userId: item.userId,
};
}),
getFileItemById: fileProcedure
@@ -92,15 +108,20 @@ export const fileRouter = router({
const chunkCount = await ctx.chunkModel.countByFileId(input.id);
return {
...item,
chunkCount,
chunkingError: chunkingTask?.error,
chunkingStatus: chunkingTask?.status as AsyncTaskStatus,
createdAt: item.createdAt,
embeddingError: embeddingTask?.error,
embeddingStatus: embeddingTask?.status as AsyncTaskStatus,
fileType: item.fileType,
finishEmbedding: embeddingTask?.status === AsyncTaskStatus.Success,
id: item.id,
metadata: item.metadata as Record<string, any> | null | undefined,
name: item.name,
size: item.size,
sourceType: 'file' as const,
updatedAt: item.updatedAt,
url: await ctx.fileService.getFullFileUrl(item.url!),
};
}),

View File

@@ -27,7 +27,16 @@ export class FileService {
throw new Error('file not found');
}
return { ...item, type: item.fileType };
return {
createdAt: item.createdAt,
id: item.id,
name: item.name,
size: item.size,
source: item.source,
type: item.fileType,
updatedAt: item.updatedAt,
url: item.url,
};
};
removeFile = async (id: string): Promise<void> => {