mirror of
https://github.com/lobehub/lobehub.git
synced 2026-03-27 13:29:15 +07:00
🐛 fix: Separate agent file injection from knowledge base RAG search (#10398)
* only search kb * support inject files * support files * fix search * fix kb search * clean console.log * add tests
This commit is contained in:
@@ -10,7 +10,7 @@ export type { ContextEngineConfig } from './pipeline';
|
||||
export { ContextEngine } from './pipeline';
|
||||
|
||||
// Context Providers
|
||||
export { HistorySummaryProvider, SystemRoleInjector, ToolSystemRoleProvider } from './providers';
|
||||
export * from './providers';
|
||||
|
||||
// Processors
|
||||
export {
|
||||
|
||||
78
packages/context-engine/src/providers/KnowledgeInjector.ts
Normal file
78
packages/context-engine/src/providers/KnowledgeInjector.ts
Normal file
@@ -0,0 +1,78 @@
|
||||
import { promptAgentKnowledge } from '@lobechat/prompts';
|
||||
import type { FileContent, KnowledgeBaseInfo } from '@lobechat/prompts';
|
||||
import debug from 'debug';
|
||||
|
||||
import { BaseProvider } from '../base/BaseProvider';
|
||||
import type { PipelineContext, ProcessorOptions } from '../types';
|
||||
|
||||
const log = debug('context-engine:provider:KnowledgeInjector');
|
||||
|
||||
export interface KnowledgeInjectorConfig {
|
||||
/** File contents to inject */
|
||||
fileContents?: FileContent[];
|
||||
/** Knowledge bases to inject */
|
||||
knowledgeBases?: KnowledgeBaseInfo[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Knowledge Injector
|
||||
* Responsible for injecting agent's knowledge (files and knowledge bases) into context
|
||||
*/
|
||||
export class KnowledgeInjector extends BaseProvider {
|
||||
readonly name = 'KnowledgeInjector';
|
||||
|
||||
constructor(
|
||||
private config: KnowledgeInjectorConfig,
|
||||
options: ProcessorOptions = {},
|
||||
) {
|
||||
super(options);
|
||||
}
|
||||
|
||||
protected async doProcess(context: PipelineContext): Promise<PipelineContext> {
|
||||
const clonedContext = this.cloneContext(context);
|
||||
|
||||
const fileContents = this.config.fileContents || [];
|
||||
const knowledgeBases = this.config.knowledgeBases || [];
|
||||
|
||||
// Generate unified knowledge prompt
|
||||
const formattedContent = promptAgentKnowledge({ fileContents, knowledgeBases });
|
||||
|
||||
// Skip injection if no knowledge at all
|
||||
if (!formattedContent) {
|
||||
log('No knowledge to inject');
|
||||
return this.markAsExecuted(clonedContext);
|
||||
}
|
||||
|
||||
// Find the first user message index
|
||||
const firstUserIndex = clonedContext.messages.findIndex((msg) => msg.role === 'user');
|
||||
|
||||
if (firstUserIndex === -1) {
|
||||
log('No user messages found, skipping injection');
|
||||
return this.markAsExecuted(clonedContext);
|
||||
}
|
||||
|
||||
// Insert a new user message with knowledge before the first user message
|
||||
// Mark it as application-level system injection
|
||||
const knowledgeMessage = {
|
||||
content: formattedContent,
|
||||
createdAt: Date.now(),
|
||||
id: `knowledge-${Date.now()}`,
|
||||
meta: { injectType: 'knowledge', systemInjection: true },
|
||||
role: 'user' as const,
|
||||
updatedAt: Date.now(),
|
||||
};
|
||||
|
||||
clonedContext.messages.splice(firstUserIndex, 0, knowledgeMessage);
|
||||
|
||||
// Update metadata
|
||||
clonedContext.metadata.knowledgeInjected = true;
|
||||
clonedContext.metadata.filesCount = fileContents.length;
|
||||
clonedContext.metadata.knowledgeBasesCount = knowledgeBases.length;
|
||||
|
||||
log(
|
||||
`Agent knowledge injected as new user message: ${fileContents.length} file(s), ${knowledgeBases.length} knowledge base(s)`,
|
||||
);
|
||||
|
||||
return this.markAsExecuted(clonedContext);
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,11 @@
|
||||
// Context Provider exports
|
||||
export { HistorySummaryProvider } from './HistorySummary';
|
||||
export { KnowledgeInjector } from './KnowledgeInjector';
|
||||
export { SystemRoleInjector } from './SystemRoleInjector';
|
||||
export { ToolSystemRoleProvider } from './ToolSystemRole';
|
||||
|
||||
// Re-export types
|
||||
export type { HistorySummaryConfig } from './HistorySummary';
|
||||
export type { KnowledgeInjectorConfig } from './KnowledgeInjector';
|
||||
export type { SystemRoleInjectorConfig } from './SystemRoleInjector';
|
||||
export type { ToolSystemRoleConfig } from './ToolSystemRole';
|
||||
|
||||
@@ -2,17 +2,18 @@
|
||||
import { eq } from 'drizzle-orm';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
|
||||
import { LobeChatDatabase } from '../../type';
|
||||
import {
|
||||
agents,
|
||||
agentsFiles,
|
||||
agentsKnowledgeBases,
|
||||
agentsToSessions,
|
||||
documents,
|
||||
files,
|
||||
knowledgeBases,
|
||||
sessions,
|
||||
users,
|
||||
} from '../../schemas';
|
||||
import { LobeChatDatabase } from '../../type';
|
||||
import { AgentModel } from '../agent';
|
||||
import { getTestDB } from './_util';
|
||||
|
||||
@@ -69,6 +70,76 @@ describe('AgentModel', () => {
|
||||
expect(result.knowledgeBases).toHaveLength(1);
|
||||
expect(result.files).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('should fetch and include document content for enabled files', async () => {
|
||||
const agentId = 'test-agent-with-docs';
|
||||
await serverDB.insert(agents).values({ id: agentId, userId });
|
||||
await serverDB.insert(agentsFiles).values({ agentId, fileId: '1', userId, enabled: true });
|
||||
await serverDB.insert(documents).values({
|
||||
id: 'doc1',
|
||||
fileId: '1',
|
||||
userId,
|
||||
content: 'This is document content',
|
||||
fileType: 'application/pdf',
|
||||
totalCharCount: 100,
|
||||
totalLineCount: 10,
|
||||
sourceType: 'file',
|
||||
source: 'document.pdf',
|
||||
});
|
||||
|
||||
const result = await agentModel.getAgentConfigById(agentId);
|
||||
|
||||
expect(result).toBeDefined();
|
||||
expect(result.files).toHaveLength(1);
|
||||
expect(result.files[0].content).toBe('This is document content');
|
||||
expect(result.files[0].enabled).toBe(true);
|
||||
});
|
||||
|
||||
it('should not include content for disabled files', async () => {
|
||||
const agentId = 'test-agent-disabled-file';
|
||||
await serverDB.insert(agents).values({ id: agentId, userId });
|
||||
await serverDB.insert(agentsFiles).values({ agentId, fileId: '1', userId, enabled: false });
|
||||
await serverDB.insert(documents).values({
|
||||
id: 'doc2',
|
||||
fileId: '1',
|
||||
userId,
|
||||
content: 'This should not be included',
|
||||
fileType: 'application/pdf',
|
||||
totalCharCount: 100,
|
||||
totalLineCount: 10,
|
||||
sourceType: 'file',
|
||||
source: 'document.pdf',
|
||||
});
|
||||
|
||||
const result = await agentModel.getAgentConfigById(agentId);
|
||||
|
||||
expect(result).toBeDefined();
|
||||
expect(result.files).toHaveLength(1);
|
||||
expect(result.files[0].content).toBeUndefined();
|
||||
expect(result.files[0].enabled).toBe(false);
|
||||
});
|
||||
|
||||
it('should handle files without documents', async () => {
|
||||
const agentId = 'test-agent-no-docs';
|
||||
await serverDB.insert(agents).values({ id: agentId, userId });
|
||||
await serverDB.insert(agentsFiles).values({ agentId, fileId: '2', userId, enabled: true });
|
||||
|
||||
const result = await agentModel.getAgentConfigById(agentId);
|
||||
|
||||
expect(result).toBeDefined();
|
||||
expect(result.files).toHaveLength(1);
|
||||
expect(result.files[0].content).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should handle agent with no files', async () => {
|
||||
const agentId = 'test-agent-no-files';
|
||||
await serverDB.insert(agents).values({ id: agentId, userId });
|
||||
|
||||
const result = await agentModel.getAgentConfigById(agentId);
|
||||
|
||||
expect(result).toBeDefined();
|
||||
expect(result.files).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('findBySessionId', () => {
|
||||
@@ -84,10 +155,16 @@ describe('AgentModel', () => {
|
||||
expect(result).toBeDefined();
|
||||
expect(result?.id).toBe(agentId);
|
||||
});
|
||||
|
||||
it('should return undefined when session is not found', async () => {
|
||||
const result = await agentModel.findBySessionId('non-existent-session');
|
||||
|
||||
expect(result).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('createAgentKnowledgeBase', () => {
|
||||
it('should create a new agent knowledge base association', async () => {
|
||||
it('should create a new agent knowledge base association with enabled=true by default', async () => {
|
||||
const agent = await serverDB
|
||||
.insert(agents)
|
||||
.values({ userId })
|
||||
@@ -107,6 +184,27 @@ describe('AgentModel', () => {
|
||||
enabled: true,
|
||||
});
|
||||
});
|
||||
|
||||
it('should create a new agent knowledge base association with enabled=false', async () => {
|
||||
const agent = await serverDB
|
||||
.insert(agents)
|
||||
.values({ userId })
|
||||
.returning()
|
||||
.then((res) => res[0]);
|
||||
|
||||
await agentModel.createAgentKnowledgeBase(agent.id, knowledgeBase.id, false);
|
||||
|
||||
const result = await serverDB.query.agentsKnowledgeBases.findFirst({
|
||||
where: eq(agentsKnowledgeBases.agentId, agent.id),
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
agentId: agent.id,
|
||||
knowledgeBaseId: knowledgeBase.id,
|
||||
userId,
|
||||
enabled: false,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('deleteAgentKnowledgeBase', () => {
|
||||
@@ -153,7 +251,7 @@ describe('AgentModel', () => {
|
||||
});
|
||||
|
||||
describe('createAgentFiles', () => {
|
||||
it('should create new agent file associations', async () => {
|
||||
it('should create new agent file associations with enabled=true by default', async () => {
|
||||
const agent = await serverDB
|
||||
.insert(agents)
|
||||
.values({ userId })
|
||||
@@ -174,6 +272,77 @@ describe('AgentModel', () => {
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it('should create new agent file associations with enabled=false', async () => {
|
||||
const agent = await serverDB
|
||||
.insert(agents)
|
||||
.values({ userId })
|
||||
.returning()
|
||||
.then((res) => res[0]);
|
||||
|
||||
await agentModel.createAgentFiles(agent.id, ['1'], false);
|
||||
|
||||
const results = await serverDB.query.agentsFiles.findMany({
|
||||
where: eq(agentsFiles.agentId, agent.id),
|
||||
});
|
||||
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0]).toMatchObject({
|
||||
agentId: agent.id,
|
||||
fileId: '1',
|
||||
userId,
|
||||
enabled: false,
|
||||
});
|
||||
});
|
||||
|
||||
it('should skip files that already exist', async () => {
|
||||
const agent = await serverDB
|
||||
.insert(agents)
|
||||
.values({ userId })
|
||||
.returning()
|
||||
.then((res) => res[0]);
|
||||
|
||||
// First insert
|
||||
await serverDB.insert(agentsFiles).values({ agentId: agent.id, fileId: '1', userId });
|
||||
|
||||
// Try to insert the same file again
|
||||
await agentModel.createAgentFiles(agent.id, ['1', '2']);
|
||||
|
||||
const results = await serverDB.query.agentsFiles.findMany({
|
||||
where: eq(agentsFiles.agentId, agent.id),
|
||||
});
|
||||
|
||||
// Should only have 2 files (1 existing + 1 new), not 3
|
||||
expect(results).toHaveLength(2);
|
||||
expect(results.map((r) => r.fileId).sort()).toEqual(['1', '2']);
|
||||
});
|
||||
|
||||
it('should return early when all files already exist', async () => {
|
||||
const agent = await serverDB
|
||||
.insert(agents)
|
||||
.values({ userId })
|
||||
.returning()
|
||||
.then((res) => res[0]);
|
||||
|
||||
// First insert
|
||||
await serverDB.insert(agentsFiles).values([
|
||||
{ agentId: agent.id, fileId: '1', userId },
|
||||
{ agentId: agent.id, fileId: '2', userId },
|
||||
]);
|
||||
|
||||
// Try to insert the same files again
|
||||
const result = await agentModel.createAgentFiles(agent.id, ['1', '2']);
|
||||
|
||||
// Should return undefined (early return)
|
||||
expect(result).toBeUndefined();
|
||||
|
||||
const results = await serverDB.query.agentsFiles.findMany({
|
||||
where: eq(agentsFiles.agentId, agent.id),
|
||||
});
|
||||
|
||||
// Should still only have 2 files
|
||||
expect(results).toHaveLength(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe('deleteAgentFile', () => {
|
||||
|
||||
@@ -5,6 +5,7 @@ import {
|
||||
agentsFiles,
|
||||
agentsKnowledgeBases,
|
||||
agentsToSessions,
|
||||
documents,
|
||||
files,
|
||||
knowledgeBases,
|
||||
} from '../schemas';
|
||||
@@ -24,6 +25,22 @@ export class AgentModel {
|
||||
|
||||
const knowledge = await this.getAgentAssignedKnowledge(id);
|
||||
|
||||
// Fetch document content for enabled files
|
||||
const enabledFileIds = knowledge.files.filter((f) => f.enabled).map((f) => f.id);
|
||||
|
||||
if (enabledFileIds.length > 0) {
|
||||
const documentsData = await this.db.query.documents.findMany({
|
||||
where: and(eq(documents.userId, this.userId), inArray(documents.fileId, enabledFileIds)),
|
||||
});
|
||||
|
||||
const documentMap = new Map(documentsData.map((doc) => [doc.fileId, doc.content]));
|
||||
|
||||
knowledge.files = knowledge.files.map((file) => ({
|
||||
...file,
|
||||
content: file.enabled && file.id ? documentMap.get(file.id) : undefined,
|
||||
}));
|
||||
}
|
||||
|
||||
return { ...agent, ...knowledge };
|
||||
};
|
||||
|
||||
|
||||
@@ -0,0 +1,103 @@
|
||||
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
|
||||
|
||||
exports[`promptAgentKnowledge > should format both files and knowledge bases 1`] = `
|
||||
"<agent_knowledge>
|
||||
<instruction>The following files and knowledge bases are available. For files, refer to their content directly. For knowledge bases, use the searchKnowledgeBase tool to find relevant information.</instruction>
|
||||
<files totalCount="1">
|
||||
<file id="file1" name="readme.md">
|
||||
File content here
|
||||
</file>
|
||||
</files>
|
||||
<knowledge_bases totalCount="1">
|
||||
<knowledge_base id="kb1" name="Internal Docs" description="Company knowledge base" />
|
||||
</knowledge_bases>
|
||||
</agent_knowledge>"
|
||||
`;
|
||||
|
||||
exports[`promptAgentKnowledge > should format only files when no knowledge bases 1`] = `
|
||||
"<agent_knowledge>
|
||||
<instruction>The following files are available. Refer to their content directly to answer questions. No knowledge bases are associated.</instruction>
|
||||
<files totalCount="2">
|
||||
<file id="file1" name="doc1.txt">
|
||||
This is the content of document 1
|
||||
</file>
|
||||
<file id="file2" name="doc2.md">
|
||||
This is the content of document 2
|
||||
</file>
|
||||
</files>
|
||||
</agent_knowledge>"
|
||||
`;
|
||||
|
||||
exports[`promptAgentKnowledge > should format only knowledge bases when no files 1`] = `
|
||||
"<agent_knowledge>
|
||||
<instruction>The following knowledge bases are available for semantic search. Use the searchKnowledgeBase tool to find relevant information.</instruction>
|
||||
<knowledge_bases totalCount="2">
|
||||
<knowledge_base id="kb1" name="Documentation" description="API documentation" />
|
||||
<knowledge_base id="kb2" name="FAQs" />
|
||||
</knowledge_bases>
|
||||
</agent_knowledge>"
|
||||
`;
|
||||
|
||||
exports[`promptAgentKnowledge > should handle file with error 1`] = `
|
||||
"<agent_knowledge>
|
||||
<instruction>The following files are available. Refer to their content directly to answer questions. No knowledge bases are associated.</instruction>
|
||||
<files totalCount="1">
|
||||
<file id="file1" name="missing.txt" error="File not found" />
|
||||
</files>
|
||||
</agent_knowledge>"
|
||||
`;
|
||||
|
||||
exports[`promptAgentKnowledge > should handle file with multiline content 1`] = `
|
||||
"<agent_knowledge>
|
||||
<instruction>The following files are available. Refer to their content directly to answer questions. No knowledge bases are associated.</instruction>
|
||||
<files totalCount="1">
|
||||
<file id="file1" name="multiline.txt">
|
||||
Line 1
|
||||
Line 2
|
||||
Line 3
|
||||
|
||||
Line 5 with gap
|
||||
</file>
|
||||
</files>
|
||||
</agent_knowledge>"
|
||||
`;
|
||||
|
||||
exports[`promptAgentKnowledge > should handle file with special characters in filename 1`] = `
|
||||
"<agent_knowledge>
|
||||
<instruction>The following files are available. Refer to their content directly to answer questions. No knowledge bases are associated.</instruction>
|
||||
<files totalCount="1">
|
||||
<file id="file1" name="file with spaces & special-chars.txt">
|
||||
Special content
|
||||
</file>
|
||||
</files>
|
||||
</agent_knowledge>"
|
||||
`;
|
||||
|
||||
exports[`promptAgentKnowledge > should handle knowledge base without description 1`] = `
|
||||
"<agent_knowledge>
|
||||
<instruction>The following knowledge bases are available for semantic search. Use the searchKnowledgeBase tool to find relevant information.</instruction>
|
||||
<knowledge_bases totalCount="1">
|
||||
<knowledge_base id="kb1" name="Simple KB" />
|
||||
</knowledge_bases>
|
||||
</agent_knowledge>"
|
||||
`;
|
||||
|
||||
exports[`promptAgentKnowledge > should handle multiple files and multiple knowledge bases 1`] = `
|
||||
"<agent_knowledge>
|
||||
<instruction>The following files and knowledge bases are available. For files, refer to their content directly. For knowledge bases, use the searchKnowledgeBase tool to find relevant information.</instruction>
|
||||
<files totalCount="3">
|
||||
<file id="file1" name="first.txt">
|
||||
Content of first file
|
||||
</file>
|
||||
<file id="file2" name="second.md">
|
||||
Content of second file
|
||||
</file>
|
||||
<file id="file3" name="broken.pdf" error="Parse error" />
|
||||
</files>
|
||||
<knowledge_bases totalCount="3">
|
||||
<knowledge_base id="kb1" name="Tech Docs" description="Technical documentation" />
|
||||
<knowledge_base id="kb2" name="User Guides" />
|
||||
<knowledge_base id="kb3" name="FAQ Database" description="Frequently asked questions" />
|
||||
</knowledge_bases>
|
||||
</agent_knowledge>"
|
||||
`;
|
||||
@@ -4,6 +4,9 @@ import { filePrompts } from './file';
|
||||
import { imagesPrompts } from './image';
|
||||
import { videosPrompts } from './video';
|
||||
|
||||
export type { KnowledgeBaseInfo, PromptKnowledgeOptions } from './knowledgeBase';
|
||||
export { promptAgentKnowledge } from './knowledgeBase';
|
||||
|
||||
export const filesPrompts = ({
|
||||
imageList,
|
||||
fileList,
|
||||
|
||||
167
packages/prompts/src/prompts/files/knowledgeBase.test.ts
Normal file
167
packages/prompts/src/prompts/files/knowledgeBase.test.ts
Normal file
@@ -0,0 +1,167 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import type { FileContent } from '../knowledgeBaseQA';
|
||||
import { promptAgentKnowledge } from './knowledgeBase';
|
||||
import type { KnowledgeBaseInfo } from './knowledgeBase';
|
||||
|
||||
describe('promptAgentKnowledge', () => {
|
||||
it('should return empty string when no files and no knowledge bases', () => {
|
||||
const result = promptAgentKnowledge({});
|
||||
expect(result).toBe('');
|
||||
});
|
||||
|
||||
it('should format only files when no knowledge bases', () => {
|
||||
const fileContents: FileContent[] = [
|
||||
{
|
||||
content: 'This is the content of document 1',
|
||||
fileId: 'file1',
|
||||
filename: 'doc1.txt',
|
||||
},
|
||||
{
|
||||
content: 'This is the content of document 2',
|
||||
fileId: 'file2',
|
||||
filename: 'doc2.md',
|
||||
},
|
||||
];
|
||||
|
||||
const result = promptAgentKnowledge({ fileContents });
|
||||
expect(result).toMatchSnapshot();
|
||||
});
|
||||
|
||||
it('should format only knowledge bases when no files', () => {
|
||||
const knowledgeBases: KnowledgeBaseInfo[] = [
|
||||
{
|
||||
description: 'API documentation',
|
||||
id: 'kb1',
|
||||
name: 'Documentation',
|
||||
},
|
||||
{
|
||||
description: null,
|
||||
id: 'kb2',
|
||||
name: 'FAQs',
|
||||
},
|
||||
];
|
||||
|
||||
const result = promptAgentKnowledge({ knowledgeBases });
|
||||
expect(result).toMatchSnapshot();
|
||||
});
|
||||
|
||||
it('should format both files and knowledge bases', () => {
|
||||
const fileContents: FileContent[] = [
|
||||
{
|
||||
content: 'File content here',
|
||||
fileId: 'file1',
|
||||
filename: 'readme.md',
|
||||
},
|
||||
];
|
||||
|
||||
const knowledgeBases: KnowledgeBaseInfo[] = [
|
||||
{
|
||||
description: 'Company knowledge base',
|
||||
id: 'kb1',
|
||||
name: 'Internal Docs',
|
||||
},
|
||||
];
|
||||
|
||||
const result = promptAgentKnowledge({ fileContents, knowledgeBases });
|
||||
expect(result).toMatchSnapshot();
|
||||
});
|
||||
|
||||
it('should handle file with error', () => {
|
||||
const fileContents: FileContent[] = [
|
||||
{
|
||||
content: '',
|
||||
error: 'File not found',
|
||||
fileId: 'file1',
|
||||
filename: 'missing.txt',
|
||||
},
|
||||
];
|
||||
|
||||
const result = promptAgentKnowledge({ fileContents });
|
||||
expect(result).toMatchSnapshot();
|
||||
});
|
||||
|
||||
it('should handle multiple files and multiple knowledge bases', () => {
|
||||
const fileContents: FileContent[] = [
|
||||
{
|
||||
content: 'Content of first file',
|
||||
fileId: 'file1',
|
||||
filename: 'first.txt',
|
||||
},
|
||||
{
|
||||
content: 'Content of second file',
|
||||
fileId: 'file2',
|
||||
filename: 'second.md',
|
||||
},
|
||||
{
|
||||
content: '',
|
||||
error: 'Parse error',
|
||||
fileId: 'file3',
|
||||
filename: 'broken.pdf',
|
||||
},
|
||||
];
|
||||
|
||||
const knowledgeBases: KnowledgeBaseInfo[] = [
|
||||
{
|
||||
description: 'Technical documentation',
|
||||
id: 'kb1',
|
||||
name: 'Tech Docs',
|
||||
},
|
||||
{
|
||||
description: null,
|
||||
id: 'kb2',
|
||||
name: 'User Guides',
|
||||
},
|
||||
{
|
||||
description: 'Frequently asked questions',
|
||||
id: 'kb3',
|
||||
name: 'FAQ Database',
|
||||
},
|
||||
];
|
||||
|
||||
const result = promptAgentKnowledge({ fileContents, knowledgeBases });
|
||||
expect(result).toMatchSnapshot();
|
||||
});
|
||||
|
||||
it('should handle knowledge base without description', () => {
|
||||
const knowledgeBases: KnowledgeBaseInfo[] = [
|
||||
{
|
||||
id: 'kb1',
|
||||
name: 'Simple KB',
|
||||
},
|
||||
];
|
||||
|
||||
const result = promptAgentKnowledge({ knowledgeBases });
|
||||
expect(result).toMatchSnapshot();
|
||||
});
|
||||
|
||||
it('should handle file with special characters in filename', () => {
|
||||
const fileContents: FileContent[] = [
|
||||
{
|
||||
content: 'Special content',
|
||||
fileId: 'file1',
|
||||
filename: 'file with spaces & special-chars.txt',
|
||||
},
|
||||
];
|
||||
|
||||
const result = promptAgentKnowledge({ fileContents });
|
||||
expect(result).toMatchSnapshot();
|
||||
});
|
||||
|
||||
it('should handle file with multiline content', () => {
|
||||
const fileContents: FileContent[] = [
|
||||
{
|
||||
content: `Line 1
|
||||
Line 2
|
||||
Line 3
|
||||
|
||||
Line 5 with gap`,
|
||||
fileId: 'file1',
|
||||
filename: 'multiline.txt',
|
||||
},
|
||||
];
|
||||
|
||||
const result = promptAgentKnowledge({ fileContents });
|
||||
expect(result).toMatchSnapshot();
|
||||
});
|
||||
});
|
||||
85
packages/prompts/src/prompts/files/knowledgeBase.ts
Normal file
85
packages/prompts/src/prompts/files/knowledgeBase.ts
Normal file
@@ -0,0 +1,85 @@
|
||||
import type { FileContent } from '../knowledgeBaseQA';
|
||||
|
||||
export interface KnowledgeBaseInfo {
|
||||
description?: string | null;
|
||||
id: string;
|
||||
name: string;
|
||||
}
|
||||
|
||||
export interface PromptKnowledgeOptions {
|
||||
/** File contents to inject */
|
||||
fileContents?: FileContent[];
|
||||
/** Knowledge bases to include */
|
||||
knowledgeBases?: KnowledgeBaseInfo[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Formats a single file content with XML tags
|
||||
*/
|
||||
const formatFileContent = (file: FileContent): string => {
|
||||
if (file.error) {
|
||||
return `<file id="${file.fileId}" name="${file.filename}" error="${file.error}" />`;
|
||||
}
|
||||
|
||||
return `<file id="${file.fileId}" name="${file.filename}">
|
||||
${file.content}
|
||||
</file>`;
|
||||
};
|
||||
|
||||
/**
|
||||
* Format agent knowledge (files + knowledge bases) as unified XML prompt
|
||||
*/
|
||||
export const promptAgentKnowledge = ({
|
||||
fileContents = [],
|
||||
knowledgeBases = [],
|
||||
}: PromptKnowledgeOptions) => {
|
||||
const hasFiles = fileContents.length > 0;
|
||||
const hasKnowledgeBases = knowledgeBases.length > 0;
|
||||
|
||||
// If no knowledge at all, return empty
|
||||
if (!hasFiles && !hasKnowledgeBases) {
|
||||
return '';
|
||||
}
|
||||
|
||||
const contentParts: string[] = [];
|
||||
|
||||
// Add instruction based on what's available
|
||||
if (hasFiles && hasKnowledgeBases) {
|
||||
contentParts.push(
|
||||
'<instruction>The following files and knowledge bases are available. For files, refer to their content directly. For knowledge bases, use the searchKnowledgeBase tool to find relevant information.</instruction>',
|
||||
);
|
||||
} else if (hasFiles) {
|
||||
contentParts.push(
|
||||
'<instruction>The following files are available. Refer to their content directly to answer questions. No knowledge bases are associated.</instruction>',
|
||||
);
|
||||
} else {
|
||||
contentParts.push(
|
||||
'<instruction>The following knowledge bases are available for semantic search. Use the searchKnowledgeBase tool to find relevant information.</instruction>',
|
||||
);
|
||||
}
|
||||
|
||||
// Add files section
|
||||
if (hasFiles) {
|
||||
const filesXml = fileContents.map((file) => formatFileContent(file)).join('\n');
|
||||
contentParts.push(`<files totalCount="${fileContents.length}">
|
||||
${filesXml}
|
||||
</files>`);
|
||||
}
|
||||
|
||||
// Add knowledge bases section
|
||||
if (hasKnowledgeBases) {
|
||||
const kbItems = knowledgeBases
|
||||
.map(
|
||||
(kb) =>
|
||||
`<knowledge_base id="${kb.id}" name="${kb.name}"${kb.description ? ` description="${kb.description}"` : ''} />`,
|
||||
)
|
||||
.join('\n');
|
||||
contentParts.push(`<knowledge_bases totalCount="${knowledgeBases.length}">
|
||||
${kbItems}
|
||||
</knowledge_bases>`);
|
||||
}
|
||||
|
||||
return `<agent_knowledge>
|
||||
${contentParts.join('\n')}
|
||||
</agent_knowledge>`;
|
||||
};
|
||||
@@ -14,6 +14,7 @@ export enum FileSource {
|
||||
}
|
||||
|
||||
export interface FileItem {
|
||||
content?: string;
|
||||
createdAt: Date;
|
||||
enabled?: boolean;
|
||||
id: string;
|
||||
|
||||
@@ -38,6 +38,7 @@ export enum KnowledgeType {
|
||||
|
||||
export interface KnowledgeItem {
|
||||
avatar?: string | null;
|
||||
content?: string;
|
||||
description?: string | null;
|
||||
enabled?: boolean;
|
||||
fileType?: string;
|
||||
|
||||
@@ -6,16 +6,16 @@ import type { PluginEnableChecker } from '@lobechat/context-engine';
|
||||
import { ChatCompletionTool, WorkingModel } from '@lobechat/types';
|
||||
import { LobeChatPluginManifest } from '@lobehub/chat-plugin-sdk';
|
||||
|
||||
import { getAgentStoreState } from '@/store/agent';
|
||||
import { agentSelectors } from '@/store/agent/selectors';
|
||||
import { getToolStoreState } from '@/store/tool';
|
||||
import { pluginSelectors } from '@/store/tool/selectors';
|
||||
import { KnowledgeBaseManifest } from '@/tools/knowledge-base';
|
||||
import { WebBrowsingManifest } from '@/tools/web-browsing';
|
||||
|
||||
import { getSearchConfig } from '../getSearchConfig';
|
||||
import { isCanUseFC } from '../isCanUseFC';
|
||||
import { shouldEnableTool } from '../toolFilters';
|
||||
import { KnowledgeBaseManifest } from '@/tools/knowledge-base';
|
||||
import { getAgentStoreState } from '@/store/agent';
|
||||
import { agentSelectors } from '@/store/agent/slices/chat';
|
||||
|
||||
/**
|
||||
* Tools engine configuration options
|
||||
@@ -59,11 +59,7 @@ export const createToolsEngine = (config: ToolsEngineConfig = {}): ToolsEngine =
|
||||
export const createAgentToolsEngine = (workingModel: WorkingModel) =>
|
||||
createToolsEngine({
|
||||
// Add default tools based on configuration
|
||||
defaultToolIds: [
|
||||
WebBrowsingManifest.identifier,
|
||||
// Only add KnowledgeBase tool if knowledge is enabled
|
||||
KnowledgeBaseManifest.identifier,
|
||||
],
|
||||
defaultToolIds: [WebBrowsingManifest.identifier, KnowledgeBaseManifest.identifier],
|
||||
// Create search-aware enableChecker for this request
|
||||
enableChecker: ({ pluginId }) => {
|
||||
// Check platform-specific constraints (e.g., LocalSystem desktop-only)
|
||||
@@ -80,7 +76,7 @@ export const createAgentToolsEngine = (workingModel: WorkingModel) =>
|
||||
if (pluginId === KnowledgeBaseManifest.identifier) {
|
||||
const agentState = getAgentStoreState();
|
||||
|
||||
return agentSelectors.hasEnabledKnowledge(agentState);
|
||||
return agentSelectors.hasEnabledKnowledgeBases(agentState);
|
||||
}
|
||||
|
||||
// For all other plugins, enable by default
|
||||
|
||||
@@ -85,28 +85,30 @@ export function useQueryParam<T>(
|
||||
|
||||
const updateParams = () => {
|
||||
// 使用函数式更新,确保基于最新的 searchParams
|
||||
setSearchParams((prevParams) => {
|
||||
const newSearchParams = new URLSearchParams(prevParams);
|
||||
console.log('updateParams', newSearchParams.toString());
|
||||
const serialized = currentParser.serialize(actualValue);
|
||||
setSearchParams(
|
||||
(prevParams) => {
|
||||
const newSearchParams = new URLSearchParams(prevParams);
|
||||
const serialized = currentParser.serialize(actualValue);
|
||||
|
||||
// 处理 clearOnDefault 选项
|
||||
if (
|
||||
currentClearOnDefault &&
|
||||
currentDefaultValue !== undefined &&
|
||||
serialized === currentParser.serialize(currentDefaultValue as T)
|
||||
) {
|
||||
newSearchParams.delete(key);
|
||||
} else if (serialized === null || serialized === undefined) {
|
||||
newSearchParams.delete(key);
|
||||
} else {
|
||||
newSearchParams.set(key, serialized);
|
||||
}
|
||||
// 处理 clearOnDefault 选项
|
||||
if (
|
||||
currentClearOnDefault &&
|
||||
currentDefaultValue !== undefined &&
|
||||
serialized === currentParser.serialize(currentDefaultValue as T)
|
||||
) {
|
||||
newSearchParams.delete(key);
|
||||
} else if (serialized === null || serialized === undefined) {
|
||||
newSearchParams.delete(key);
|
||||
} else {
|
||||
newSearchParams.set(key, serialized);
|
||||
}
|
||||
|
||||
console.log('updateParams', newSearchParams.toString());
|
||||
console.log('updateParams', newSearchParams.toString());
|
||||
|
||||
return newSearchParams;
|
||||
}, { replace: currentHistory === 'replace' });
|
||||
return newSearchParams;
|
||||
},
|
||||
{ replace: currentHistory === 'replace' },
|
||||
);
|
||||
};
|
||||
|
||||
// 处理节流
|
||||
|
||||
@@ -90,13 +90,8 @@ export const fileRouter = router({
|
||||
try {
|
||||
await pMap(
|
||||
requestArray,
|
||||
async (chunks, index) => {
|
||||
const agentRuntime = await initModelRuntimeWithUserPayload(
|
||||
provider,
|
||||
ctx.jwtPayload,
|
||||
);
|
||||
|
||||
console.log(`run embedding task ${index + 1}`);
|
||||
async (chunks) => {
|
||||
const agentRuntime = initModelRuntimeWithUserPayload(provider, ctx.jwtPayload);
|
||||
|
||||
const embeddings = await agentRuntime.embeddings({
|
||||
dimensions: 1024,
|
||||
|
||||
@@ -5,6 +5,7 @@ import {
|
||||
HistorySummaryProvider,
|
||||
HistoryTruncateProcessor,
|
||||
InputTemplateProcessor,
|
||||
KnowledgeInjector,
|
||||
MessageCleanupProcessor,
|
||||
MessageContentProcessor,
|
||||
PlaceholderVariablesProcessor,
|
||||
@@ -19,6 +20,8 @@ import { OpenAIChatMessage, UIChatMessage } from '@lobechat/types';
|
||||
import { VARIABLE_GENERATORS } from '@lobechat/utils/client';
|
||||
|
||||
import { isCanUseFC } from '@/helpers/isCanUseFC';
|
||||
import { getAgentStoreState } from '@/store/agent';
|
||||
import { agentSelectors } from '@/store/agent/selectors';
|
||||
import { getToolStoreState } from '@/store/tool';
|
||||
import { toolSelectors } from '@/store/tool/selectors';
|
||||
|
||||
@@ -50,6 +53,19 @@ export const contextEngineering = async ({
|
||||
}: ContextEngineeringContext): Promise<OpenAIChatMessage[]> => {
|
||||
const toolNameResolver = new ToolNameResolver();
|
||||
|
||||
// Get enabled agent files with content and knowledge bases from agent store
|
||||
const agentStoreState = getAgentStoreState();
|
||||
const agentFiles = agentSelectors.currentAgentFiles(agentStoreState);
|
||||
const agentKnowledgeBases = agentSelectors.currentAgentKnowledgeBases(agentStoreState);
|
||||
|
||||
const fileContents = agentFiles
|
||||
.filter((file) => file.enabled && file.content)
|
||||
.map((file) => ({ content: file.content!, fileId: file.id, filename: file.name }));
|
||||
|
||||
const knowledgeBases = agentKnowledgeBases
|
||||
.filter((kb) => kb.enabled)
|
||||
.map((kb) => ({ description: kb.description, id: kb.id, name: kb.name }));
|
||||
|
||||
const pipeline = new ContextEngine({
|
||||
pipeline: [
|
||||
// 1. History truncation (MUST be first, before any message injection)
|
||||
@@ -60,6 +76,9 @@ export const contextEngineering = async ({
|
||||
// 2. System role injection (agent's system role)
|
||||
new SystemRoleInjector({ systemRole }),
|
||||
|
||||
// 3. Knowledge injection (full content for agent files + metadata for knowledge bases)
|
||||
new KnowledgeInjector({ fileContents, knowledgeBases }),
|
||||
|
||||
// 4. Tool system role injection
|
||||
new ToolSystemRoleProvider({
|
||||
getToolSystemRoles: (tools) => toolSelectors.enabledSystemRoles(tools)(getToolStoreState()),
|
||||
|
||||
@@ -152,6 +152,9 @@ const hasFiles = (s: AgentStoreState) => {
|
||||
|
||||
const hasKnowledge = (s: AgentStoreState) => hasKnowledgeBases(s) || hasFiles(s);
|
||||
const hasEnabledKnowledge = (s: AgentStoreState) => currentEnabledKnowledge(s).length > 0;
|
||||
const hasEnabledKnowledgeBases = (s: AgentStoreState) =>
|
||||
currentAgentKnowledgeBases(s).some((s) => s.enabled);
|
||||
|
||||
const currentKnowledgeIds = (s: AgentStoreState) => {
|
||||
return {
|
||||
fileIds: currentAgentFiles(s)
|
||||
@@ -185,6 +188,7 @@ export const agentSelectors = {
|
||||
getAgentConfigByAgentId,
|
||||
getAgentConfigById,
|
||||
hasEnabledKnowledge,
|
||||
hasEnabledKnowledgeBases,
|
||||
hasKnowledge,
|
||||
hasSystemRole,
|
||||
inboxAgentConfig,
|
||||
|
||||
@@ -54,26 +54,17 @@ export const knowledgeBaseSlice: StateCreator<
|
||||
},
|
||||
|
||||
searchKnowledgeBase: async (id, params) => {
|
||||
// Get knowledge base IDs and file IDs from agent store
|
||||
// Get knowledge base IDs from agent store
|
||||
const agentState = getAgentStoreState();
|
||||
const knowledgeIds = agentSelectors.currentKnowledgeIds(agentState);
|
||||
|
||||
// Get user-selected files from messages
|
||||
const userFiles = dbMessageSelectors
|
||||
.dbUserFiles(get())
|
||||
.map((f) => f?.id)
|
||||
.filter(Boolean) as string[];
|
||||
|
||||
// Merge knowledge base files and user-selected files
|
||||
const options = {
|
||||
fileIds: [...knowledgeIds.fileIds, ...userFiles],
|
||||
knowledgeBaseIds: knowledgeIds.knowledgeBaseIds,
|
||||
};
|
||||
// Only search in knowledge bases, not agent files
|
||||
// Agent files will be injected as full content in context-engine
|
||||
const knowledgeBaseIds = knowledgeIds.knowledgeBaseIds;
|
||||
|
||||
return get().internal_triggerKnowledgeBaseToolCalling(id, async () => {
|
||||
return await runtime.searchKnowledgeBase(params, {
|
||||
fileIds: options.fileIds,
|
||||
knowledgeBaseIds: options.knowledgeBaseIds,
|
||||
knowledgeBaseIds,
|
||||
messageId: id,
|
||||
});
|
||||
});
|
||||
|
||||
@@ -16,7 +16,6 @@ export class KnowledgeBaseExecutionRuntime {
|
||||
async searchKnowledgeBase(
|
||||
args: SearchKnowledgeBaseArgs,
|
||||
options?: {
|
||||
fileIds?: string[];
|
||||
knowledgeBaseIds?: string[];
|
||||
messageId?: string;
|
||||
signal?: AbortSignal;
|
||||
@@ -25,9 +24,10 @@ export class KnowledgeBaseExecutionRuntime {
|
||||
try {
|
||||
const { query, topK = 20 } = args;
|
||||
|
||||
// Call the existing RAG service
|
||||
// Only search in knowledge bases, not agent files
|
||||
// Agent files will be injected as full content in context-engine
|
||||
const { chunks, fileResults } = await ragService.semanticSearchForChat(
|
||||
{ fileIds: options?.fileIds, knowledgeIds: options?.knowledgeBaseIds, query, topK },
|
||||
{ knowledgeIds: options?.knowledgeBaseIds, query, topK },
|
||||
options?.signal,
|
||||
);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user