{ "openapi": "3.0.1", "info": { "title": "Knowledge API", "description": "API for managing knowledge bases, documents, chunks, metadata, and tags, including creation, retrieval, and configuration. **Note:** A single Knowledge Base API key has permission to operate on all visible knowledge bases under the same account. Please pay attention to data security.", "version": "1.0.0" }, "servers": [ { "url": "{apiBaseUrl}", "description": "The base URL for the Knowledge API.", "variables": { "apiBaseUrl": { "default": "https://api.dify.ai/v1", "description": "Actual base URL of the API" } } } ], "security": [ { "ApiKeyAuth": [] } ], "tags": [ { "name": "Knowledge Bases", "description": "Operations for managing knowledge bases, including creation, configuration, and retrieval." }, { "name": "Documents", "description": "Operations for creating, updating, and managing documents within a knowledge base." }, { "name": "Chunks", "description": "Operations for managing document chunks and child chunks." }, { "name": "Metadata", "description": "Operations for managing knowledge base metadata fields and document metadata values." }, { "name": "Tags", "description": "Operations for managing knowledge base tags and tag bindings." }, { "name": "Models", "description": "Operations for retrieving available models." }, { "name": "Knowledge Pipeline", "description": "Operations for managing and running knowledge pipelines, including datasource plugins and pipeline execution." } ], "paths": { "/datasets": { "post": { "tags": [ "Knowledge Bases" ], "summary": "Create an Empty Knowledge Base", "description": "Create a new empty knowledge base. After creation, use [Create Document by Text](/api-reference/documents/create-document-by-text) or [Create Document by File](/api-reference/documents/create-document-by-file) to add documents.", "operationId": "createDataset", "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "required": [ "name" ], "properties": { "name": { "type": "string", "minLength": 1, "maxLength": 40, "description": "Name of the knowledge base." }, "description": { "type": "string", "maxLength": 400, "default": "", "description": "Description of the knowledge base." }, "indexing_technique": { "type": "string", "enum": [ "high_quality", "economy" ], "nullable": true, "description": "`high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing." }, "permission": { "type": "string", "enum": [ "only_me", "all_team_members", "partial_members" ], "default": "only_me", "description": "Controls who can access this knowledge base. `only_me` restricts to the creator, `all_team_members` grants access to the entire workspace, `partial_members` grants access to specified members." }, "provider": { "type": "string", "enum": [ "vendor", "external" ], "default": "vendor", "description": "`vendor` for internal knowledge base, `external` for external knowledge base." }, "embedding_model": { "type": "string", "description": "Embedding model name. Use the `model` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`." }, "embedding_model_provider": { "type": "string", "description": "Embedding model provider. Use the `provider` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`." }, "retrieval_model": { "$ref": "#/components/schemas/RetrievalModel", "description": "Retrieval model configuration. Controls how chunks are searched and ranked when querying this knowledge base." }, "external_knowledge_api_id": { "type": "string", "description": "ID of the external knowledge API connection." }, "external_knowledge_id": { "type": "string", "description": "ID of the external knowledge base." }, "summary_index_setting": { "type": "object", "nullable": true, "description": "Summary index configuration.", "properties": { "enable": { "type": "boolean", "description": "Whether to enable summary indexing." }, "model_name": { "type": "string", "description": "Name of the model used for generating summaries." }, "model_provider_name": { "type": "string", "description": "Provider of the summary generation model." }, "summary_prompt": { "type": "string", "description": "Custom prompt template for summary generation." } } } } } } } }, "responses": { "200": { "description": "Knowledge base created successfully.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/Dataset" }, "examples": { "success": { "summary": "Response Example", "value": { "id": "c42e2a6e-40b3-4330-96f8-f1e4d768e8c9", "name": "Product Documentation", "description": "Technical documentation for the product API", "provider": "vendor", "permission": "only_me", "data_source_type": null, "indexing_technique": "high_quality", "app_count": 0, "document_count": 0, "word_count": 0, "created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "author_name": "admin", "created_at": 1741267200, "updated_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "updated_at": 1741267200, "embedding_model": "text-embedding-3-small", "embedding_model_provider": "openai", "embedding_available": true, "retrieval_model_dict": { "search_method": "semantic_search", "reranking_enable": false, "reranking_mode": null, "reranking_model": { "reranking_provider_name": "", "reranking_model_name": "" }, "weights": null, "top_k": 3, "score_threshold_enabled": false, "score_threshold": null }, "tags": [], "doc_form": "text_model", "external_knowledge_info": null, "external_retrieval_model": null, "doc_metadata": [], "built_in_field_enabled": true, "pipeline_id": null, "runtime_mode": null, "chunk_structure": null, "icon_info": null, "summary_index_setting": null, "is_published": false, "total_documents": 0, "total_available_documents": 0, "enable_api": true, "is_multimodal": false } } } } } }, "409": { "description": "`dataset_name_duplicate` : The dataset name already exists. Please modify your dataset name.", "content": { "application/json": { "examples": { "dataset_name_duplicate": { "summary": "dataset_name_duplicate", "value": { "status": 409, "code": "dataset_name_duplicate", "message": "The dataset name already exists. Please modify your dataset name." } } } } } } } }, "get": { "tags": [ "Knowledge Bases" ], "summary": "List Knowledge Bases", "description": "Returns a paginated list of knowledge bases. Supports filtering by keyword and tags.", "operationId": "listDatasets", "parameters": [ { "name": "page", "in": "query", "schema": { "type": "integer", "default": 1 }, "description": "Page number to retrieve." }, { "name": "limit", "in": "query", "schema": { "type": "integer", "default": 20 }, "description": "Number of items per page." }, { "name": "keyword", "in": "query", "schema": { "type": "string" }, "description": "Search keyword to filter by name." }, { "name": "include_all", "in": "query", "schema": { "type": "boolean", "default": false }, "description": "Whether to include all knowledge bases regardless of permissions." }, { "name": "tag_ids", "in": "query", "schema": { "type": "array", "items": { "type": "string" } }, "style": "form", "explode": true, "description": "Tag IDs to filter by." } ], "responses": { "200": { "description": "List of knowledge bases.", "content": { "application/json": { "schema": { "type": "object", "properties": { "data": { "type": "array", "description": "Array of knowledge base objects.", "items": { "$ref": "#/components/schemas/Dataset" } }, "has_more": { "type": "boolean", "description": "Whether more items exist on the next page." }, "limit": { "type": "integer", "description": "Number of items per page." }, "total": { "type": "integer", "description": "Total number of matching items." }, "page": { "type": "integer", "description": "Current page number." } } }, "examples": { "success": { "summary": "Response Example", "value": { "data": [ { "id": "c42e2a6e-40b3-4330-96f8-f1e4d768e8c9", "name": "Product Documentation", "description": "Technical documentation for the product API", "provider": "vendor", "permission": "only_me", "data_source_type": null, "indexing_technique": "high_quality", "app_count": 0, "document_count": 0, "word_count": 0, "created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "author_name": "admin", "created_at": 1741267200, "updated_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "updated_at": 1741267200, "embedding_model": "text-embedding-3-small", "embedding_model_provider": "openai", "embedding_available": true, "retrieval_model_dict": { "search_method": "semantic_search", "reranking_enable": false, "reranking_mode": null, "reranking_model": { "reranking_provider_name": "", "reranking_model_name": "" }, "weights": null, "top_k": 3, "score_threshold_enabled": false, "score_threshold": null }, "tags": [], "doc_form": "text_model", "external_knowledge_info": null, "external_retrieval_model": null, "doc_metadata": [], "built_in_field_enabled": true, "pipeline_id": null, "runtime_mode": null, "chunk_structure": null, "icon_info": null, "summary_index_setting": null, "is_published": false, "total_documents": 0, "total_available_documents": 0, "enable_api": true, "is_multimodal": false } ], "has_more": false, "limit": 20, "total": 1, "page": 1 } } } } } } } } }, "/datasets/{dataset_id}": { "get": { "tags": [ "Knowledge Bases" ], "summary": "Get Knowledge Base", "description": "Retrieve detailed information about a specific knowledge base, including its embedding model, retrieval configuration, and document statistics.", "operationId": "getDatasetDetail", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." } ], "responses": { "200": { "description": "Knowledge base details.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/Dataset" }, "examples": { "success": { "summary": "Response Example", "value": { "id": "c42e2a6e-40b3-4330-96f8-f1e4d768e8c9", "name": "Product Documentation", "description": "Technical documentation for the product API", "provider": "vendor", "permission": "only_me", "data_source_type": null, "indexing_technique": "high_quality", "app_count": 0, "document_count": 0, "word_count": 0, "created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "author_name": "admin", "created_at": 1741267200, "updated_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "updated_at": 1741267200, "embedding_model": "text-embedding-3-small", "embedding_model_provider": "openai", "embedding_available": true, "retrieval_model_dict": { "search_method": "semantic_search", "reranking_enable": false, "reranking_mode": null, "reranking_model": { "reranking_provider_name": "", "reranking_model_name": "" }, "weights": null, "top_k": 3, "score_threshold_enabled": false, "score_threshold": null }, "tags": [], "doc_form": "text_model", "external_knowledge_info": null, "external_retrieval_model": null, "doc_metadata": [], "built_in_field_enabled": true, "pipeline_id": null, "runtime_mode": null, "chunk_structure": null, "icon_info": null, "summary_index_setting": null, "is_published": false, "total_documents": 0, "total_available_documents": 0, "enable_api": true, "is_multimodal": false } } } } } }, "403": { "description": "`forbidden` : Insufficient permissions to access this knowledge base.", "content": { "application/json": { "examples": { "forbidden": { "summary": "forbidden", "value": { "status": 403, "code": "forbidden", "message": "Insufficient permissions to access this knowledge base." } } } } } }, "404": { "description": "`not_found` : Dataset not found.", "content": { "application/json": { "examples": { "not_found": { "summary": "not_found", "value": { "status": 404, "code": "not_found", "message": "Dataset not found." } } } } } } } }, "patch": { "tags": [ "Knowledge Bases" ], "summary": "Update Knowledge Base", "description": "Update the name, description, permissions, or retrieval settings of an existing knowledge base. Only the fields provided in the request body are updated.", "operationId": "updateDataset", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." } ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "properties": { "name": { "type": "string", "minLength": 1, "maxLength": 40, "description": "Name of the knowledge base." }, "description": { "type": "string", "maxLength": 400, "description": "Description of the knowledge base." }, "indexing_technique": { "type": "string", "enum": [ "high_quality", "economy" ], "nullable": true, "description": "`high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing." }, "permission": { "type": "string", "enum": [ "only_me", "all_team_members", "partial_members" ], "description": "Controls who can access this knowledge base. `only_me` restricts to the creator, `all_team_members` grants access to the entire workspace, `partial_members` grants access to specified members." }, "embedding_model": { "type": "string", "description": "Embedding model name. Use the `model` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`." }, "embedding_model_provider": { "type": "string", "description": "Embedding model provider. Use the `provider` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`." }, "retrieval_model": { "$ref": "#/components/schemas/RetrievalModel", "description": "Retrieval model configuration. Controls how chunks are searched and ranked when querying this knowledge base." }, "partial_member_list": { "type": "array", "description": "List of team members with access when `permission` is `partial_members`.", "items": { "type": "object", "properties": { "user_id": { "type": "string", "description": "ID of the team member to grant access." } } } }, "external_retrieval_model": { "type": "object", "description": "Retrieval settings for external knowledge bases.", "properties": { "top_k": { "type": "integer", "description": "Maximum number of results to return." }, "score_threshold": { "type": "number", "description": "Minimum relevance score threshold for filtering results." }, "score_threshold_enabled": { "type": "boolean", "description": "Whether score threshold filtering is enabled." } } }, "external_knowledge_id": { "type": "string", "description": "ID of the external knowledge base." }, "external_knowledge_api_id": { "type": "string", "description": "ID of the external knowledge API connection." } } } } } }, "responses": { "200": { "description": "Knowledge base updated successfully.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/Dataset" }, "examples": { "success": { "summary": "Response Example", "value": { "id": "c42e2a6e-40b3-4330-96f8-f1e4d768e8c9", "name": "Product Documentation", "description": "Technical documentation for the product API", "provider": "vendor", "permission": "only_me", "data_source_type": null, "indexing_technique": "high_quality", "app_count": 0, "document_count": 0, "word_count": 0, "created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "author_name": "admin", "created_at": 1741267200, "updated_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "updated_at": 1741267200, "embedding_model": "text-embedding-3-small", "embedding_model_provider": "openai", "embedding_available": true, "retrieval_model_dict": { "search_method": "semantic_search", "reranking_enable": false, "reranking_mode": null, "reranking_model": { "reranking_provider_name": "", "reranking_model_name": "" }, "weights": null, "top_k": 3, "score_threshold_enabled": false, "score_threshold": null }, "tags": [], "doc_form": "text_model", "external_knowledge_info": null, "external_retrieval_model": null, "doc_metadata": [], "built_in_field_enabled": true, "pipeline_id": null, "runtime_mode": null, "chunk_structure": null, "icon_info": null, "summary_index_setting": null, "is_published": false, "total_documents": 0, "total_available_documents": 0, "enable_api": true, "is_multimodal": false } } } } } }, "403": { "description": "`forbidden` : Insufficient permissions to access this knowledge base.", "content": { "application/json": { "examples": { "forbidden": { "summary": "forbidden", "value": { "status": 403, "code": "forbidden", "message": "Insufficient permissions to access this knowledge base." } } } } } }, "404": { "description": "`not_found` : Dataset not found.", "content": { "application/json": { "examples": { "not_found": { "summary": "not_found", "value": { "status": 404, "code": "not_found", "message": "Dataset not found." } } } } } } } }, "delete": { "tags": [ "Knowledge Bases" ], "summary": "Delete Knowledge Base", "description": "Permanently delete a knowledge base and all its documents. The knowledge base must not be in use by any application.", "operationId": "deleteDataset", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." } ], "responses": { "204": { "description": "Success." }, "404": { "description": "`not_found` : Dataset not found.", "content": { "application/json": { "examples": { "not_found": { "summary": "not_found", "value": { "status": 404, "code": "not_found", "message": "Dataset not found." } } } } } }, "409": { "description": "`dataset_in_use` : The knowledge base is being used by some apps. Please remove it from the apps before deleting.", "content": { "application/json": { "examples": { "dataset_in_use": { "summary": "dataset_in_use", "value": { "status": 409, "code": "dataset_in_use", "message": "The dataset is being used by some apps. Please remove the dataset from the apps before deleting it." } } } } } } } } }, "/datasets/{dataset_id}/document/create-by-text": { "post": { "tags": [ "Documents" ], "summary": "Create Document by Text", "description": "Create a document from raw text content. The document is processed asynchronously — use the returned `batch` ID with [Get Document Indexing Status](/api-reference/documents/get-document-indexing-status) to track progress.", "operationId": "createDocumentFromText", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." } ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "required": [ "name", "text" ], "properties": { "name": { "type": "string", "description": "Document name." }, "text": { "type": "string", "description": "Document text content." }, "indexing_technique": { "type": "string", "enum": [ "high_quality", "economy" ], "description": "Required when adding the first document to a knowledge base. Subsequent documents inherit the knowledge base's indexing technique if omitted. `high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing." }, "doc_form": { "type": "string", "enum": [ "text_model", "hierarchical_model", "qa_model" ], "default": "text_model", "description": "`text_model` for standard text chunking, `hierarchical_model` for parent-child chunk structure, `qa_model` for question-answer pair extraction." }, "doc_language": { "type": "string", "default": "English", "description": "Language of the document for processing optimization." }, "process_rule": { "type": "object", "description": "Processing rules for chunking.", "required": [ "mode" ], "properties": { "mode": { "type": "string", "enum": [ "automatic", "custom", "hierarchical" ], "description": "Processing mode. `automatic` uses built-in rules, `custom` allows manual configuration, `hierarchical` enables parent-child chunk structure (use with `doc_form: hierarchical_model`)." }, "rules": { "type": "object", "properties": { "pre_processing_rules": { "type": "array", "items": { "type": "object", "properties": { "id": { "type": "string", "enum": [ "remove_stopwords", "remove_extra_spaces", "remove_urls_emails" ], "description": "Rule identifier." }, "enabled": { "type": "boolean", "description": "Whether this preprocessing rule is enabled." } } } }, "segmentation": { "type": "object", "properties": { "separator": { "type": "string", "default": "\n", "description": "Custom separator for splitting text." }, "max_tokens": { "type": "integer", "description": "Maximum token count per chunk." }, "chunk_overlap": { "type": "integer", "default": 0, "description": "Token overlap between chunks." } } } } } } }, "retrieval_model": { "$ref": "#/components/schemas/RetrievalModel", "description": "Retrieval model configuration. Controls how chunks are searched and ranked when querying this knowledge base." }, "embedding_model": { "type": "string", "description": "Embedding model name. Use the `model` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`." }, "embedding_model_provider": { "type": "string", "description": "Embedding model provider. Use the `provider` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`." }, "original_document_id": { "type": "string", "description": "Original document ID for versioning." } } } } } }, "responses": { "200": { "description": "Document created successfully.", "content": { "application/json": { "schema": { "type": "object", "properties": { "document": { "$ref": "#/components/schemas/Document" }, "batch": { "type": "string", "description": "Batch ID for tracking indexing progress." } } }, "examples": { "success": { "summary": "Response Example", "value": { "document": { "id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac", "position": 1, "data_source_type": "upload_file", "data_source_info": { "upload_file_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890" }, "data_source_detail_dict": { "upload_file": { "id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", "name": "guide.txt", "size": 2048, "extension": "txt", "mime_type": "text/plain", "created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "created_at": 1741267200 } }, "dataset_process_rule_id": "e1f2a3b4-c5d6-7890-ef12-345678901234", "name": "guide.txt", "created_from": "api", "created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "created_at": 1741267200, "tokens": 0, "indexing_status": "indexing", "error": null, "enabled": true, "disabled_at": null, "disabled_by": null, "archived": false, "display_status": "indexing", "word_count": 0, "hit_count": 0, "doc_form": "text_model", "doc_metadata": [], "summary_index_status": null, "need_summary": false }, "batch": "20250306150245647595" } } } } } }, "400": { "description": "- `provider_not_initialize` : No valid model provider credentials found. Please go to Settings -> Model Provider to complete your provider credentials.\n- `invalid_param` : Knowledge base does not exist. / indexing_technique is required.", "content": { "application/json": { "examples": { "provider_not_initialize": { "summary": "provider_not_initialize", "value": { "status": 400, "code": "provider_not_initialize", "message": "No valid model provider credentials found. Please go to Settings -> Model Provider to complete your provider credentials." } }, "invalid_param_dataset": { "summary": "invalid_param", "value": { "status": 400, "code": "invalid_param", "message": "Dataset does not exist." } }, "invalid_param_indexing": { "summary": "invalid_param", "value": { "status": 400, "code": "invalid_param", "message": "indexing_technique is required." } } } } } } } } }, "/datasets/{dataset_id}/document/create-by-file": { "post": { "tags": [ "Documents" ], "summary": "Create Document by File", "description": "Create a document by uploading a file. Supports common document formats (PDF, TXT, DOCX, etc.). Processing is asynchronous — use the returned `batch` ID with [Get Document Indexing Status](/api-reference/documents/get-document-indexing-status) to track progress.", "operationId": "createDocumentFromFile", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." } ], "requestBody": { "required": true, "content": { "multipart/form-data": { "schema": { "type": "object", "required": [ "file" ], "properties": { "file": { "type": "string", "format": "binary", "description": "File to upload." }, "data": { "type": "string", "description": "JSON string containing configuration. Accepts the same fields as [Create Document by Text](/api-reference/documents/create-document-by-text) (`indexing_technique`, `doc_form`, `doc_language`, `process_rule`, `retrieval_model`, `embedding_model`, `embedding_model_provider`) except `name` and `text`.", "example": "{\"indexing_technique\":\"high_quality\",\"doc_form\":\"text_model\",\"doc_language\":\"English\",\"process_rule\":{\"mode\":\"automatic\"}}" } } } } } }, "responses": { "200": { "description": "Document created successfully.", "content": { "application/json": { "schema": { "type": "object", "properties": { "document": { "$ref": "#/components/schemas/Document" }, "batch": { "type": "string", "description": "Batch ID for tracking indexing progress." } } }, "examples": { "success": { "summary": "Response Example", "value": { "document": { "id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac", "position": 1, "data_source_type": "upload_file", "data_source_info": { "upload_file_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890" }, "data_source_detail_dict": { "upload_file": { "id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", "name": "guide.txt", "size": 2048, "extension": "txt", "mime_type": "text/plain", "created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "created_at": 1741267200 } }, "dataset_process_rule_id": "e1f2a3b4-c5d6-7890-ef12-345678901234", "name": "guide.txt", "created_from": "api", "created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "created_at": 1741267200, "tokens": 0, "indexing_status": "indexing", "error": null, "enabled": true, "disabled_at": null, "disabled_by": null, "archived": false, "display_status": "indexing", "word_count": 0, "hit_count": 0, "doc_form": "text_model", "doc_metadata": [], "summary_index_status": null, "need_summary": false }, "batch": "20250306150245647595" } } } } } }, "400": { "description": "- `no_file_uploaded` : Please upload your file.\n- `too_many_files` : Only one file is allowed.\n- `filename_not_exists_error` : The specified filename does not exist.\n- `provider_not_initialize` : No valid model provider credentials found. Please go to Settings -> Model Provider to complete your provider credentials.\n- `invalid_param` : Knowledge base does not exist, external datasets not supported, file too large, unsupported file type, or missing required fields.", "content": { "application/json": { "examples": { "no_file_uploaded": { "summary": "no_file_uploaded", "value": { "status": 400, "code": "no_file_uploaded", "message": "Please upload your file." } }, "too_many_files": { "summary": "too_many_files", "value": { "status": 400, "code": "too_many_files", "message": "Only one file is allowed." } }, "filename_not_exists_error": { "summary": "filename_not_exists_error", "value": { "status": 400, "code": "filename_not_exists_error", "message": "The specified filename does not exist." } }, "provider_not_initialize": { "summary": "provider_not_initialize", "value": { "status": 400, "code": "provider_not_initialize", "message": "No valid model provider credentials found. Please go to Settings -> Model Provider to complete your provider credentials." } }, "invalid_param_dataset": { "summary": "invalid_param (dataset)", "value": { "status": 400, "code": "invalid_param", "message": "Dataset does not exist." } }, "invalid_param_external": { "summary": "invalid_param (external)", "value": { "status": 400, "code": "invalid_param", "message": "External datasets are not supported." } }, "invalid_param_file_too_large": { "summary": "invalid_param (file_too_large)", "value": { "status": 400, "code": "invalid_param", "message": "File size exceeded." } }, "invalid_param_unsupported_file_type": { "summary": "invalid_param (unsupported_file_type)", "value": { "status": 400, "code": "invalid_param", "message": "File type not allowed." } }, "invalid_param_indexing": { "summary": "invalid_param (indexing_technique)", "value": { "status": 400, "code": "invalid_param", "message": "indexing_technique is required." } }, "invalid_param_process_rule": { "summary": "invalid_param (process_rule)", "value": { "status": 400, "code": "invalid_param", "message": "process_rule is required." } } } } } } } } }, "/datasets/{dataset_id}/documents": { "get": { "tags": [ "Documents" ], "summary": "List Documents", "description": "Returns a paginated list of documents in the knowledge base. Supports filtering by keyword and indexing status.", "operationId": "listDocuments", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." }, { "name": "page", "in": "query", "schema": { "type": "integer", "default": 1 }, "description": "Page number to retrieve." }, { "name": "limit", "in": "query", "schema": { "type": "integer", "default": 20 }, "description": "Number of items per page. Server caps at `100`." }, { "name": "keyword", "in": "query", "schema": { "type": "string" }, "description": "Search keyword to filter by document name." }, { "name": "status", "in": "query", "schema": { "type": "string", "enum": [ "queuing", "indexing", "paused", "error", "available", "disabled", "archived" ] }, "description": "Filter by display status." } ], "responses": { "200": { "description": "List of documents.", "content": { "application/json": { "schema": { "type": "object", "properties": { "data": { "type": "array", "description": "Array of knowledge base objects.", "items": { "$ref": "#/components/schemas/Document" } }, "has_more": { "type": "boolean", "description": "Whether more items exist on the next page." }, "limit": { "type": "integer", "description": "Number of items per page." }, "total": { "type": "integer", "description": "Total number of matching items." }, "page": { "type": "integer", "description": "Current page number." } } }, "examples": { "success": { "summary": "Response Example", "value": { "data": [ { "id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac", "position": 1, "data_source_type": "upload_file", "data_source_info": { "upload_file_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890" }, "data_source_detail_dict": { "upload_file": { "id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", "name": "guide.txt", "size": 2048, "extension": "txt", "mime_type": "text/plain", "created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "created_at": 1741267200 } }, "dataset_process_rule_id": "e1f2a3b4-c5d6-7890-ef12-345678901234", "name": "guide.txt", "created_from": "api", "created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "created_at": 1741267200, "tokens": 512, "indexing_status": "completed", "error": null, "enabled": true, "disabled_at": null, "disabled_by": null, "archived": false, "display_status": "available", "word_count": 350, "hit_count": 0, "doc_form": "text_model", "doc_metadata": [], "summary_index_status": null, "need_summary": false } ], "has_more": false, "limit": 20, "total": 1, "page": 1 } } } } } }, "404": { "description": "`not_found` : Knowledge base not found.", "content": { "application/json": { "examples": { "not_found": { "summary": "not_found", "value": { "status": 404, "code": "not_found", "message": "Dataset not found." } } } } } } } } }, "/datasets/{dataset_id}/documents/{document_id}": { "get": { "tags": [ "Documents" ], "summary": "Get Document", "description": "Retrieve detailed information about a specific document, including its indexing status, metadata, and processing statistics.", "operationId": "getDocumentDetail", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." }, { "name": "document_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Document ID." }, { "name": "metadata", "in": "query", "schema": { "type": "string", "enum": [ "all", "only", "without" ], "default": "all" }, "description": "`all` returns all fields including metadata. `only` returns only `id`, `doc_type`, and `doc_metadata`. `without` returns all fields except `doc_metadata`." } ], "responses": { "200": { "description": "Document details. The response shape varies based on the `metadata` query parameter. When `metadata` is `only`, only `id`, `doc_type`, and `doc_metadata` are returned. When `metadata` is `without`, `doc_type` and `doc_metadata` are omitted.", "content": { "application/json": { "schema": { "type": "object", "properties": { "id": { "type": "string", "description": "Document identifier." }, "position": { "type": "integer", "description": "Position index within the knowledge base." }, "data_source_type": { "type": "string", "description": "How the document was uploaded. `upload_file` for file uploads, `notion_import` for Notion imports." }, "data_source_info": { "type": "object", "description": "Raw data source information." }, "dataset_process_rule_id": { "type": "string", "description": "ID of the processing rule applied to this document." }, "dataset_process_rule": { "type": "object", "description": "Knowledge-base-level processing rule configuration." }, "document_process_rule": { "type": "object", "description": "Document-level processing rule configuration." }, "name": { "type": "string", "description": "Document name." }, "created_from": { "type": "string", "description": "Origin of the document. `api` for API creation, `web` for UI creation." }, "created_by": { "type": "string", "description": "ID of the user who created the document." }, "created_at": { "type": "number", "description": "Unix timestamp of document creation." }, "tokens": { "type": "integer", "description": "Number of tokens in the document." }, "indexing_status": { "type": "string", "description": "Current indexing status, e.g. `waiting`, `parsing`, `cleaning`, `splitting`, `indexing`, `completed`, `error`, `paused`." }, "error": { "type": "string", "nullable": true, "description": "Error message if indexing failed, `null` otherwise." }, "enabled": { "type": "boolean", "description": "Whether the document is enabled for retrieval." }, "disabled_at": { "type": "number", "nullable": true, "description": "Unix timestamp when the document was disabled, `null` if enabled." }, "disabled_by": { "type": "string", "nullable": true, "description": "ID of the user who disabled the document, `null` if enabled." }, "archived": { "type": "boolean", "description": "Whether the document is archived." }, "display_status": { "type": "string", "description": "Display-friendly indexing status for the UI." }, "word_count": { "type": "integer", "description": "Total word count of the document." }, "hit_count": { "type": "integer", "description": "Number of times this document has been retrieved." }, "doc_form": { "type": "string", "description": "Document chunking mode. `text_model` for standard text, `hierarchical_model` for parent-child, `qa_model` for QA pairs." }, "doc_language": { "type": "string", "description": "Language of the document content." }, "doc_type": { "type": "string", "nullable": true, "description": "Document type classification, `null` if not set." }, "doc_metadata": { "type": "array", "description": "Custom metadata key-value pairs for this document.", "items": { "type": "object", "properties": { "id": { "type": "string", "description": "Metadata field identifier." }, "name": { "type": "string", "description": "Metadata field name." }, "type": { "type": "string", "description": "Metadata field type." }, "value": { "type": "string", "description": "Metadata field value for this document." } } } }, "completed_at": { "type": "number", "nullable": true, "description": "Unix timestamp when processing completed, `null` if not yet completed." }, "updated_at": { "type": "number", "nullable": true, "description": "Unix timestamp of last update, `null` if never updated." }, "indexing_latency": { "type": "number", "nullable": true, "description": "Time taken for indexing in seconds, `null` if not completed." }, "segment_count": { "type": "integer", "description": "Number of chunks in the document." }, "average_segment_length": { "type": "number", "description": "Average character length of chunks." }, "summary_index_status": { "type": "string", "nullable": true, "description": "Status of summary indexing, `null` if summary index is not enabled." }, "need_summary": { "type": "boolean", "description": "Whether the document needs summary generation." } } }, "examples": { "success": { "summary": "Response Example", "value": { "id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac", "position": 1, "data_source_type": "upload_file", "data_source_info": { "upload_file_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890" }, "dataset_process_rule_id": "e1f2a3b4-c5d6-7890-ef12-345678901234", "dataset_process_rule": { "id": "e1f2a3b4-c5d6-7890-ef12-345678901234", "mode": "custom" }, "document_process_rule": { "mode": "custom", "rules": { "pre_processing_rules": [], "segmentation": { "separator": "###", "max_tokens": 500, "chunk_overlap": 50 } } }, "name": "guide.txt", "created_from": "api", "created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "created_at": 1741267200, "tokens": 512, "indexing_status": "completed", "error": null, "enabled": true, "disabled_at": null, "disabled_by": null, "archived": false, "display_status": "available", "word_count": 350, "hit_count": 0, "doc_form": "text_model", "doc_language": "English", "doc_type": null, "doc_metadata": [], "completed_at": 1741267260, "updated_at": 1741267260, "indexing_latency": 60.0, "segment_count": 5, "average_segment_length": 70.0, "summary_index_status": null, "need_summary": false } } } } } }, "400": { "description": "`invalid_metadata` : Invalid metadata value for the specified key.", "content": { "application/json": { "examples": { "invalid_metadata": { "summary": "invalid_metadata", "value": { "status": 400, "code": "invalid_metadata", "message": "Invalid metadata value: {metadata_key}" } } } } } }, "403": { "description": "`forbidden` : No permission.", "content": { "application/json": { "examples": { "forbidden": { "summary": "forbidden", "value": { "status": 403, "code": "forbidden", "message": "No permission." } } } } } }, "404": { "description": "`not_found` : Document not found.", "content": { "application/json": { "examples": { "not_found": { "summary": "not_found", "value": { "status": 404, "code": "not_found", "message": "Document not found." } } } } } } } }, "delete": { "tags": [ "Documents" ], "summary": "Delete Document", "description": "Permanently delete a document and all its chunks from the knowledge base.", "operationId": "deleteDocument", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." }, { "name": "document_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Document ID." } ], "responses": { "204": { "description": "Success." }, "400": { "description": "`document_indexing` : Cannot delete document during indexing.", "content": { "application/json": { "examples": { "document_indexing": { "summary": "document_indexing", "value": { "status": 400, "code": "document_indexing", "message": "Cannot delete document during indexing." } } } } } }, "403": { "description": "`archived_document_immutable` : The archived document is not editable.", "content": { "application/json": { "examples": { "archived_document_immutable": { "summary": "archived_document_immutable", "value": { "status": 403, "code": "archived_document_immutable", "message": "The archived document is not editable." } } } } } }, "404": { "description": "`not_found` : Document Not Exists.", "content": { "application/json": { "examples": { "not_found": { "summary": "not_found", "value": { "status": 404, "code": "not_found", "message": "Document Not Exists." } } } } } } } } }, "/datasets/{dataset_id}/documents/{document_id}/download": { "get": { "tags": [ "Documents" ], "summary": "Download Document", "description": "Get a signed download URL for a document's original uploaded file.", "operationId": "downloadDocument", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." }, { "name": "document_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Document ID." } ], "responses": { "200": { "description": "Download URL generated successfully.", "content": { "application/json": { "schema": { "type": "object", "properties": { "url": { "type": "string", "description": "Signed URL to download the original uploaded file." } } }, "examples": { "success": { "summary": "Response Example", "value": { "url": "https://storage.example.com/datasets/documents/abc123/original-file.pdf?token=xyz789&expires=1741353600" } } } } } }, "403": { "description": "`forbidden` : No permission to access this document.", "content": { "application/json": { "examples": { "forbidden": { "summary": "forbidden", "value": { "status": 403, "code": "forbidden", "message": "No permission." } } } } } }, "404": { "description": "`not_found` : Document not found.", "content": { "application/json": { "examples": { "not_found": { "summary": "not_found", "value": { "status": 404, "code": "not_found", "message": "Document not found." } } } } } } } } }, "/datasets/{dataset_id}/documents/{batch}/indexing-status": { "get": { "tags": [ "Documents" ], "summary": "Get Document Indexing Status", "description": "Check the indexing progress of documents in a batch. Returns the current processing stage and chunk completion counts for each document. Poll this endpoint until `indexing_status` reaches `completed` or `error`. The status progresses through: `waiting` → `parsing` → `cleaning` → `splitting` → `indexing` → `completed`.", "operationId": "getDocumentIndexingStatus", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." }, { "name": "batch", "in": "path", "required": true, "schema": { "type": "string" }, "description": "Batch ID returned from document creation." } ], "responses": { "200": { "description": "Indexing status for documents in the batch.", "content": { "application/json": { "schema": { "type": "object", "properties": { "data": { "type": "array", "description": "List of indexing status entries.", "items": { "type": "object", "properties": { "id": { "type": "string", "description": "Document identifier." }, "indexing_status": { "type": "string", "description": "Current indexing status: `waiting`, `parsing`, `cleaning`, `splitting`, `indexing`, `completed`, or `error`." }, "processing_started_at": { "type": "number", "description": "Unix timestamp when processing started." }, "parsing_completed_at": { "type": "number", "description": "Unix timestamp when parsing completed." }, "cleaning_completed_at": { "type": "number", "description": "Unix timestamp when cleaning completed." }, "splitting_completed_at": { "type": "number", "description": "Unix timestamp when splitting completed." }, "completed_at": { "type": "number", "description": "Unix timestamp when indexing completed." }, "paused_at": { "type": "number", "nullable": true, "description": "Timestamp when indexing was paused. `null` if not paused." }, "error": { "type": "string", "nullable": true, "description": "Error message if indexing failed. `null` if no error." }, "stopped_at": { "type": "number", "nullable": true, "description": "Timestamp when indexing was stopped. `null` if not stopped." }, "completed_segments": { "type": "integer", "description": "Number of chunks that have been indexed." }, "total_segments": { "type": "integer", "description": "Total number of chunks to be indexed." } } } } } }, "examples": { "success": { "summary": "Response Example", "value": { "data": [ { "id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac", "indexing_status": "completed", "processing_started_at": 1741267200, "parsing_completed_at": 1741267200, "cleaning_completed_at": 1741267200, "splitting_completed_at": 1741267200, "completed_at": 1741267200, "paused_at": null, "error": null, "stopped_at": null, "completed_segments": 5, "total_segments": 5 } ] } } } } } }, "404": { "description": "`not_found` : Knowledge base not found. / Documents not found.", "content": { "application/json": { "examples": { "dataset_not_found": { "summary": "not_found", "value": { "status": 404, "code": "not_found", "message": "Dataset not found." } }, "documents_not_found": { "summary": "not_found", "value": { "status": 404, "code": "not_found", "message": "Documents not found." } } } } } } } } }, "/datasets/{dataset_id}/documents/{document_id}/update-by-text": { "post": { "tags": [ "Documents" ], "summary": "Update Document by Text", "description": "Update an existing document's text content, name, or processing configuration. Re-triggers indexing if content changes — use the returned `batch` ID with [Get Document Indexing Status](/api-reference/documents/get-document-indexing-status) to track progress.", "operationId": "updateDocumentByText", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." }, { "name": "document_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Document ID." } ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "properties": { "name": { "type": "string", "description": "Document name. Required when `text` is provided." }, "text": { "type": "string", "description": "Document text content." }, "process_rule": { "type": "object", "description": "Processing rules for chunking.", "required": [ "mode" ], "properties": { "mode": { "type": "string", "enum": [ "automatic", "custom", "hierarchical" ], "description": "Processing mode. `automatic` uses built-in rules, `custom` allows manual configuration, `hierarchical` enables parent-child chunk structure (use with `doc_form: hierarchical_model`)." }, "rules": { "type": "object", "properties": { "pre_processing_rules": { "type": "array", "items": { "type": "object", "properties": { "id": { "type": "string", "enum": [ "remove_stopwords", "remove_extra_spaces", "remove_urls_emails" ], "description": "Rule identifier." }, "enabled": { "type": "boolean", "description": "Whether this preprocessing rule is enabled." } } } }, "segmentation": { "type": "object", "properties": { "separator": { "type": "string", "default": "\n", "description": "Custom separator for splitting text." }, "max_tokens": { "type": "integer", "description": "Maximum token count per chunk." }, "chunk_overlap": { "type": "integer", "default": 0, "description": "Token overlap between chunks." } } } } } } }, "doc_form": { "type": "string", "enum": [ "text_model", "hierarchical_model", "qa_model" ], "default": "text_model", "description": "`text_model` for standard text chunking, `hierarchical_model` for parent-child chunk structure, `qa_model` for question-answer pair extraction." }, "doc_language": { "type": "string", "default": "English", "description": "Language of the document for processing optimization." }, "retrieval_model": { "$ref": "#/components/schemas/RetrievalModel", "description": "Retrieval model configuration. Controls how chunks are searched and ranked when querying this knowledge base." } } } } } }, "responses": { "200": { "description": "Document updated successfully.", "content": { "application/json": { "schema": { "type": "object", "properties": { "document": { "$ref": "#/components/schemas/Document" }, "batch": { "type": "string", "description": "Batch ID for tracking indexing progress." } } }, "examples": { "success": { "summary": "Response Example", "value": { "document": { "id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac", "position": 1, "data_source_type": "upload_file", "data_source_info": { "upload_file_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890" }, "data_source_detail_dict": { "upload_file": { "id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", "name": "guide.txt", "size": 2048, "extension": "txt", "mime_type": "text/plain", "created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "created_at": 1741267200 } }, "dataset_process_rule_id": "e1f2a3b4-c5d6-7890-ef12-345678901234", "name": "guide.txt", "created_from": "api", "created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "created_at": 1741267200, "tokens": 512, "indexing_status": "completed", "error": null, "enabled": true, "disabled_at": null, "disabled_by": null, "archived": false, "display_status": "available", "word_count": 350, "hit_count": 0, "doc_form": "text_model", "doc_metadata": [], "summary_index_status": null, "need_summary": false }, "batch": "20250306150245647595" } } } } } }, "400": { "description": "- `provider_not_initialize` : No valid model provider credentials found. Please go to Settings -> Model Provider to complete your provider credentials.\n- `invalid_param` : Knowledge base does not exist, or name is required when text is provided.", "content": { "application/json": { "examples": { "provider_not_initialize": { "summary": "provider_not_initialize", "value": { "status": 400, "code": "provider_not_initialize", "message": "No valid model provider credentials found. Please go to Settings -> Model Provider to complete your provider credentials." } }, "invalid_param_dataset": { "summary": "invalid_param", "value": { "status": 400, "code": "invalid_param", "message": "Dataset does not exist." } }, "invalid_param_name_required": { "summary": "invalid_param (name required)", "value": { "status": 400, "code": "invalid_param", "message": "name is required when text is provided." } } } } } } } } }, "/datasets/{dataset_id}/documents/{document_id}/update-by-file": { "post": { "tags": [ "Documents" ], "summary": "Update Document by File", "description": "Update an existing document by uploading a new file. Re-triggers indexing — use the returned `batch` ID with [Get Document Indexing Status](/api-reference/documents/get-document-indexing-status) to track progress.", "operationId": "updateDocumentByFile", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." }, { "name": "document_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Document ID." } ], "requestBody": { "required": true, "content": { "multipart/form-data": { "schema": { "type": "object", "properties": { "file": { "type": "string", "format": "binary", "description": "File to upload." }, "data": { "type": "string", "description": "JSON string containing configuration. Accepts the same fields as [Create Document by Text](/api-reference/documents/create-document-by-text) (`indexing_technique`, `doc_form`, `doc_language`, `process_rule`, `retrieval_model`, `embedding_model`, `embedding_model_provider`) except `name` and `text`.", "example": "{\"indexing_technique\":\"high_quality\",\"doc_form\":\"text_model\",\"doc_language\":\"English\",\"process_rule\":{\"mode\":\"automatic\"}}" } } } } } }, "responses": { "200": { "description": "Document updated successfully.", "content": { "application/json": { "schema": { "type": "object", "properties": { "document": { "$ref": "#/components/schemas/Document" }, "batch": { "type": "string", "description": "Batch ID for tracking indexing progress." } } }, "examples": { "success": { "summary": "Response Example", "value": { "document": { "id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac", "position": 1, "data_source_type": "upload_file", "data_source_info": { "upload_file_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890" }, "data_source_detail_dict": { "upload_file": { "id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", "name": "guide.txt", "size": 2048, "extension": "txt", "mime_type": "text/plain", "created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "created_at": 1741267200 } }, "dataset_process_rule_id": "e1f2a3b4-c5d6-7890-ef12-345678901234", "name": "guide.txt", "created_from": "api", "created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "created_at": 1741267200, "tokens": 512, "indexing_status": "completed", "error": null, "enabled": true, "disabled_at": null, "disabled_by": null, "archived": false, "display_status": "available", "word_count": 350, "hit_count": 0, "doc_form": "text_model", "doc_metadata": [], "summary_index_status": null, "need_summary": false }, "batch": "20250306150245647595" } } } } } }, "400": { "description": "- `too_many_files` : Only one file is allowed.\n- `filename_not_exists_error` : The specified filename does not exist.\n- `provider_not_initialize` : No valid model provider credentials found. Please go to Settings -> Model Provider to complete your provider credentials.\n- `invalid_param` : Knowledge base does not exist, external datasets not supported, file too large, or unsupported file type.", "content": { "application/json": { "examples": { "too_many_files": { "summary": "too_many_files", "value": { "status": 400, "code": "too_many_files", "message": "Only one file is allowed." } }, "filename_not_exists_error": { "summary": "filename_not_exists_error", "value": { "status": 400, "code": "filename_not_exists_error", "message": "The specified filename does not exist." } }, "provider_not_initialize": { "summary": "provider_not_initialize", "value": { "status": 400, "code": "provider_not_initialize", "message": "No valid model provider credentials found. Please go to Settings -> Model Provider to complete your provider credentials." } }, "invalid_param_dataset": { "summary": "invalid_param (dataset)", "value": { "status": 400, "code": "invalid_param", "message": "Dataset does not exist." } }, "invalid_param_external": { "summary": "invalid_param (external)", "value": { "status": 400, "code": "invalid_param", "message": "External datasets are not supported." } }, "invalid_param_file_too_large": { "summary": "invalid_param (file_too_large)", "value": { "status": 400, "code": "invalid_param", "message": "File size exceeded." } }, "invalid_param_unsupported_file_type": { "summary": "invalid_param (unsupported_file_type)", "value": { "status": 400, "code": "invalid_param", "message": "File type not allowed." } } } } } } } } }, "/datasets/{dataset_id}/documents/download-zip": { "post": { "tags": [ "Documents" ], "summary": "Download Documents as ZIP", "description": "Download multiple uploaded-file documents as a single ZIP archive. Accepts up to `100` document IDs.", "operationId": "downloadDocumentsZip", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." } ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "required": [ "document_ids" ], "properties": { "document_ids": { "type": "array", "minItems": 1, "maxItems": 100, "items": { "type": "string", "format": "uuid" }, "description": "Array of document IDs to include in the ZIP archive." } } } } } }, "responses": { "200": { "description": "ZIP archive containing the requested documents.", "content": { "application/zip": { "schema": { "type": "string", "format": "binary", "description": "ZIP archive binary stream." } } } }, "403": { "description": "`forbidden` : Insufficient permissions.", "content": { "application/json": { "examples": { "forbidden": { "summary": "forbidden", "value": { "status": 403, "code": "forbidden", "message": "Insufficient permissions." } } } } } }, "404": { "description": "`not_found` : Document or dataset not found.", "content": { "application/json": { "examples": { "not_found": { "summary": "not_found", "value": { "status": 404, "code": "not_found", "message": "Document not found." } } } } } } } } }, "/datasets/{dataset_id}/documents/status/{action}": { "patch": { "tags": [ "Documents" ], "summary": "Update Document Status in Batch", "description": "Enable, disable, archive, or unarchive multiple documents at once.", "operationId": "batchUpdateDocumentStatus", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." }, { "name": "action", "in": "path", "required": true, "schema": { "type": "string", "enum": [ "enable", "disable", "archive", "un_archive" ] }, "description": "`enable` to activate, `disable` to deactivate, `archive` to archive, `un_archive` to restore from archive." } ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "required": [ "document_ids" ], "properties": { "document_ids": { "type": "array", "items": { "type": "string" }, "description": "List of document IDs to update." } } } } } }, "responses": { "200": { "description": "Documents updated successfully.", "content": { "application/json": { "schema": { "type": "object", "properties": { "result": { "type": "string", "description": "Operation result." } } }, "examples": { "success": { "summary": "Response Example", "value": { "result": "success" } } } } } }, "400": { "description": "`invalid_action` : Invalid action.", "content": { "application/json": { "examples": { "invalid_action": { "summary": "invalid_action", "value": { "status": 400, "code": "invalid_action", "message": "Invalid action." } } } } } }, "403": { "description": "`forbidden` : Insufficient permissions.", "content": { "application/json": { "examples": { "forbidden": { "summary": "forbidden", "value": { "status": 403, "code": "forbidden", "message": "Insufficient permissions." } } } } } }, "404": { "description": "`not_found` : Knowledge base not found.", "content": { "application/json": { "examples": { "not_found": { "summary": "not_found", "value": { "status": 404, "code": "not_found", "message": "Dataset not found." } } } } } } } } }, "/datasets/{dataset_id}/documents/{document_id}/segments": { "post": { "tags": [ "Chunks" ], "summary": "Create Chunks", "description": "Create one or more chunks within a document. Each chunk can include optional keywords and an answer field (for QA-mode documents).", "operationId": "createSegments", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." }, { "name": "document_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Document ID." } ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "required": [ "segments" ], "properties": { "segments": { "type": "array", "items": { "type": "object", "properties": { "content": { "type": "string", "description": "Chunk text content." }, "answer": { "type": "string", "description": "Answer content for QA mode." }, "keywords": { "type": "array", "items": { "type": "string" }, "description": "Keywords for the chunk." }, "attachment_ids": { "type": "array", "items": { "type": "string" }, "description": "Attachment file IDs." } } }, "description": "Array of chunk objects to create." } } } } } }, "responses": { "200": { "description": "Chunks created successfully.", "content": { "application/json": { "schema": { "type": "object", "properties": { "data": { "type": "array", "description": "List of created chunks.", "items": { "$ref": "#/components/schemas/Segment" } }, "doc_form": { "type": "string", "description": "Document chunking mode used by this document." } } }, "examples": { "success": { "summary": "Response Example", "value": { "data": [ { "id": "f3d1c7be-9f3a-40d8-8eb8-3a1ef9c3f2c1", "position": 1, "document_id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac", "content": "Dify is an open-source LLM app development platform.", "sign_content": "", "answer": "", "word_count": 9, "tokens": 12, "keywords": [ "dify", "platform", "llm" ], "index_node_id": "a1b2c3d4-e5f6-7890-abcd-000000000001", "index_node_hash": "abc123def456", "hit_count": 0, "enabled": true, "disabled_at": null, "disabled_by": null, "status": "completed", "created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "created_at": 1741267200, "updated_at": 1741267200, "updated_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "indexing_at": 1741267200, "completed_at": 1741267200, "error": null, "stopped_at": null, "child_chunks": [], "attachments": [], "summary": null } ], "doc_form": "text_model" } } } } } }, "404": { "description": "`not_found` : Document is not completed or is disabled.", "content": { "application/json": { "examples": { "not_found_not_completed": { "summary": "not_found (not completed)", "value": { "status": 404, "code": "not_found", "message": "Document is not completed." } }, "not_found_disabled": { "summary": "not_found (disabled)", "value": { "status": 404, "code": "not_found", "message": "Document is disabled." } } } } } } } }, "get": { "tags": [ "Chunks" ], "summary": "List Chunks", "description": "Returns a paginated list of chunks within a document. Supports filtering by keyword and status.", "operationId": "listSegments", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." }, { "name": "document_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Document ID." }, { "name": "page", "in": "query", "schema": { "type": "integer", "default": 1 }, "description": "Page number to retrieve." }, { "name": "limit", "in": "query", "schema": { "type": "integer", "default": 20 }, "description": "Number of items per page. Server caps at `100`." }, { "name": "status", "in": "query", "schema": { "type": "array", "items": { "type": "string" } }, "style": "form", "explode": true, "description": "Filter chunks by indexing status, e.g. `completed`, `indexing`, `error`." }, { "name": "keyword", "in": "query", "schema": { "type": "string" }, "description": "Search keyword." } ], "responses": { "200": { "description": "List of chunks.", "content": { "application/json": { "schema": { "type": "object", "properties": { "data": { "type": "array", "description": "List of chunks.", "items": { "$ref": "#/components/schemas/Segment" } }, "doc_form": { "type": "string", "description": "Document chunking mode used by this document." }, "total": { "type": "integer", "description": "Total number of matching chunks." }, "has_more": { "type": "boolean", "description": "Whether more items exist on the next page." }, "limit": { "type": "integer", "description": "Number of items per page." }, "page": { "type": "integer", "description": "Current page number." } } }, "examples": { "success": { "summary": "Response Example", "value": { "data": [ { "id": "f3d1c7be-9f3a-40d8-8eb8-3a1ef9c3f2c1", "position": 1, "document_id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac", "content": "Dify is an open-source LLM app development platform.", "sign_content": "", "answer": "", "word_count": 9, "tokens": 12, "keywords": [ "dify", "platform", "llm" ], "index_node_id": "a1b2c3d4-e5f6-7890-abcd-000000000001", "index_node_hash": "abc123def456", "hit_count": 0, "enabled": true, "disabled_at": null, "disabled_by": null, "status": "completed", "created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "created_at": 1741267200, "updated_at": 1741267200, "updated_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "indexing_at": 1741267200, "completed_at": 1741267200, "error": null, "stopped_at": null, "child_chunks": [], "attachments": [], "summary": null } ], "doc_form": "text_model", "total": 1, "has_more": false, "limit": 20, "page": 1 } } } } } } } } }, "/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}": { "get": { "tags": [ "Chunks" ], "summary": "Get Chunk", "description": "Retrieve detailed information about a specific chunk, including its content, keywords, and indexing status.", "operationId": "getSegmentDetail", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." }, { "name": "document_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Document ID." }, { "name": "segment_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Chunk ID." } ], "responses": { "200": { "description": "Chunk details.", "content": { "application/json": { "schema": { "type": "object", "properties": { "data": { "$ref": "#/components/schemas/Segment" }, "doc_form": { "type": "string", "description": "Document chunking mode used by this document." } } }, "examples": { "success": { "summary": "Response Example", "value": { "data": { "id": "f3d1c7be-9f3a-40d8-8eb8-3a1ef9c3f2c1", "position": 1, "document_id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac", "content": "Dify is an open-source LLM app development platform.", "sign_content": "", "answer": "", "word_count": 9, "tokens": 12, "keywords": [ "dify", "platform", "llm" ], "index_node_id": "a1b2c3d4-e5f6-7890-abcd-000000000001", "index_node_hash": "abc123def456", "hit_count": 0, "enabled": true, "disabled_at": null, "disabled_by": null, "status": "completed", "created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "created_at": 1741267200, "updated_at": 1741267200, "updated_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "indexing_at": 1741267200, "completed_at": 1741267200, "error": null, "stopped_at": null, "child_chunks": [], "attachments": [], "summary": null }, "doc_form": "text_model" } } } } } } } }, "post": { "tags": [ "Chunks" ], "summary": "Update Chunk", "description": "Update a chunk's content, keywords, or answer. Re-triggers indexing for the modified chunk.", "operationId": "updateSegment", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." }, { "name": "document_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Document ID." }, { "name": "segment_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Chunk ID." } ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "required": [ "segment" ], "properties": { "segment": { "type": "object", "properties": { "content": { "type": "string", "description": "Chunk text content." }, "answer": { "type": "string", "description": "Answer content for QA mode." }, "keywords": { "type": "array", "items": { "type": "string" }, "description": "Keywords for the chunk." }, "enabled": { "type": "boolean", "description": "Whether the chunk is enabled." }, "regenerate_child_chunks": { "type": "boolean", "default": false, "description": "Whether to regenerate child chunks." }, "attachment_ids": { "type": "array", "items": { "type": "string" }, "description": "Attachment file IDs." }, "summary": { "type": "string", "nullable": true, "description": "Summary content for summary index." } }, "description": "Chunk data to update." } } } } } }, "responses": { "200": { "description": "Chunk updated successfully.", "content": { "application/json": { "schema": { "type": "object", "properties": { "data": { "$ref": "#/components/schemas/Segment" }, "doc_form": { "type": "string", "description": "Document chunking mode used by this document." } } }, "examples": { "success": { "summary": "Response Example", "value": { "data": { "id": "f3d1c7be-9f3a-40d8-8eb8-3a1ef9c3f2c1", "position": 1, "document_id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac", "content": "Dify is an open-source LLM app development platform.", "sign_content": "", "answer": "", "word_count": 9, "tokens": 12, "keywords": [ "dify", "platform", "llm" ], "index_node_id": "a1b2c3d4-e5f6-7890-abcd-000000000001", "index_node_hash": "abc123def456", "hit_count": 0, "enabled": true, "disabled_at": null, "disabled_by": null, "status": "completed", "created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "created_at": 1741267200, "updated_at": 1741267200, "updated_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "indexing_at": 1741267200, "completed_at": 1741267200, "error": null, "stopped_at": null, "child_chunks": [], "attachments": [], "summary": null }, "doc_form": "text_model" } } } } } } } }, "delete": { "tags": [ "Chunks" ], "summary": "Delete Chunk", "description": "Permanently delete a chunk from the document.", "operationId": "deleteSegment", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." }, { "name": "document_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Document ID." }, { "name": "segment_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Chunk ID." } ], "responses": { "204": { "description": "Success." } } } }, "/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks": { "post": { "tags": [ "Chunks" ], "summary": "Create Child Chunk", "description": "Create a child chunk under a parent chunk. Only available for documents using the `hierarchical_model` chunking mode.", "operationId": "createChildChunk", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." }, { "name": "document_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Document ID." }, { "name": "segment_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Chunk ID." } ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "required": [ "content" ], "properties": { "content": { "type": "string", "description": "Child chunk text content." } } } } } }, "responses": { "200": { "description": "Child chunk created successfully.", "content": { "application/json": { "schema": { "type": "object", "properties": { "data": { "$ref": "#/components/schemas/ChildChunk" } } }, "examples": { "success": { "summary": "Response Example", "value": { "data": { "id": "d7e8f9a0-1b2c-3d4e-5f6a-7b8c9d0e1f2a", "segment_id": "f3d1c7be-9f3a-40d8-8eb8-3a1ef9c3f2c1", "content": "Dify is an open-source platform.", "position": 1, "word_count": 6, "type": "automatic", "created_at": 1741267200, "updated_at": 1741267200 } } } } } } }, "400": { "description": "`invalid_param` : Create child chunk index failed.", "content": { "application/json": { "examples": { "invalid_param": { "summary": "invalid_param", "value": { "status": 400, "code": "invalid_param", "message": "Create child chunk index failed: {error details}" } } } } } } } }, "get": { "tags": [ "Chunks" ], "summary": "List Child Chunks", "description": "Returns a paginated list of child chunks under a specific parent chunk.", "operationId": "getChildChunks", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." }, { "name": "document_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Document ID." }, { "name": "segment_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Chunk ID." }, { "name": "page", "in": "query", "schema": { "type": "integer", "default": 1, "minimum": 1 }, "description": "Page number to retrieve." }, { "name": "limit", "in": "query", "schema": { "type": "integer", "default": 20, "minimum": 1 }, "description": "Number of items per page. Server caps at `100`." }, { "name": "keyword", "in": "query", "schema": { "type": "string" }, "description": "Search keyword." } ], "responses": { "200": { "description": "List of child chunks.", "content": { "application/json": { "schema": { "type": "object", "properties": { "data": { "type": "array", "description": "List of child chunks.", "items": { "$ref": "#/components/schemas/ChildChunk" } }, "total": { "type": "integer", "description": "Total number of child chunks." }, "total_pages": { "type": "integer", "description": "Total number of pages." }, "page": { "type": "integer", "description": "Current page number." }, "limit": { "type": "integer", "description": "Number of items per page." } } }, "examples": { "success": { "summary": "Response Example", "value": { "data": [ { "id": "d7e8f9a0-1b2c-3d4e-5f6a-7b8c9d0e1f2a", "segment_id": "f3d1c7be-9f3a-40d8-8eb8-3a1ef9c3f2c1", "content": "Dify is an open-source platform.", "position": 1, "word_count": 6, "type": "automatic", "created_at": 1741267200, "updated_at": 1741267200 } ], "total": 1, "total_pages": 1, "page": 1, "limit": 20 } } } } } } } } }, "/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}": { "patch": { "tags": [ "Chunks" ], "summary": "Update Child Chunk", "description": "Update the content of an existing child chunk.", "operationId": "updateChildChunk", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." }, { "name": "document_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Document ID." }, { "name": "segment_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Chunk ID." }, { "name": "child_chunk_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Child chunk ID." } ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "required": [ "content" ], "properties": { "content": { "type": "string", "description": "Child chunk text content." } } } } } }, "responses": { "200": { "description": "Child chunk updated successfully.", "content": { "application/json": { "schema": { "type": "object", "properties": { "data": { "$ref": "#/components/schemas/ChildChunk" } } }, "examples": { "success": { "summary": "Response Example", "value": { "data": { "id": "d7e8f9a0-1b2c-3d4e-5f6a-7b8c9d0e1f2a", "segment_id": "f3d1c7be-9f3a-40d8-8eb8-3a1ef9c3f2c1", "content": "Dify is an open-source platform.", "position": 1, "word_count": 6, "type": "automatic", "created_at": 1741267200, "updated_at": 1741267200 } } } } } } }, "400": { "description": "`invalid_param` : Update child chunk index failed.", "content": { "application/json": { "examples": { "invalid_param": { "summary": "invalid_param", "value": { "status": 400, "code": "invalid_param", "message": "Create child chunk index failed: {error details}" } } } } } } } }, "delete": { "tags": [ "Chunks" ], "summary": "Delete Child Chunk", "description": "Permanently delete a child chunk from its parent chunk.", "operationId": "deleteChildChunk", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." }, { "name": "document_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Document ID." }, { "name": "segment_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Chunk ID." }, { "name": "child_chunk_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Child chunk ID." } ], "responses": { "204": { "description": "Success." }, "400": { "description": "`invalid_param` : Delete child chunk index failed.", "content": { "application/json": { "examples": { "invalid_param": { "summary": "invalid_param", "value": { "status": 400, "code": "invalid_param", "message": "Delete child chunk index failed: {error details}" } } } } } } } } }, "/datasets/{dataset_id}/retrieve": { "post": { "tags": [ "Knowledge Bases" ], "summary": "Retrieve Chunks from a Knowledge Base / Test Retrieval", "description": "Performs a search query against a knowledge base to retrieve the most relevant chunks. This endpoint can be used for both production retrieval and test retrieval.", "operationId": "retrieveSegments", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." } ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "required": [ "query" ], "properties": { "query": { "type": "string", "maxLength": 250, "description": "Search query text." }, "retrieval_model": { "$ref": "#/components/schemas/RetrievalModel", "description": "Retrieval model configuration. Controls how chunks are searched and ranked when querying this knowledge base." }, "external_retrieval_model": { "type": "object", "description": "Retrieval settings for external knowledge bases.", "properties": { "top_k": { "type": "integer", "description": "Maximum number of results to return." }, "score_threshold": { "type": "number", "description": "Minimum relevance score threshold for filtering results." }, "score_threshold_enabled": { "type": "boolean", "description": "Whether score threshold filtering is enabled." } } }, "attachment_ids": { "type": "array", "items": { "type": "string" }, "nullable": true, "description": "List of attachment IDs to include in the retrieval context." } } } } } }, "responses": { "200": { "description": "Retrieval results.", "content": { "application/json": { "schema": { "type": "object", "properties": { "query": { "type": "object", "description": "The original query object.", "properties": { "content": { "type": "string", "description": "The query text." } } }, "records": { "type": "array", "description": "List of matched retrieval records.", "items": { "type": "object", "properties": { "segment": { "type": "object", "description": "Matched chunk from the knowledge base.", "properties": { "id": { "type": "string", "description": "Unique identifier of the chunk." }, "position": { "type": "integer", "description": "Position of the chunk within the document." }, "document_id": { "type": "string", "description": "ID of the document this chunk belongs to." }, "content": { "type": "string", "description": "Text content of the chunk." }, "sign_content": { "type": "string", "description": "Signed content hash for integrity verification." }, "answer": { "type": "string", "description": "Answer content, used in Q&A mode documents." }, "word_count": { "type": "integer", "description": "Word count of the chunk content." }, "tokens": { "type": "integer", "description": "Token count of the chunk content." }, "keywords": { "type": "array", "description": "Keywords associated with this chunk for keyword-based retrieval.", "items": { "type": "string" } }, "index_node_id": { "type": "string", "description": "ID of the index node in the vector store." }, "index_node_hash": { "type": "string", "description": "Hash of the indexed content, used to detect changes." }, "hit_count": { "type": "integer", "description": "Number of times this chunk has been matched in retrieval queries." }, "enabled": { "type": "boolean", "description": "Whether the chunk is enabled for retrieval." }, "disabled_at": { "type": "number", "nullable": true, "description": "Timestamp when the chunk was disabled. `null` if enabled." }, "disabled_by": { "type": "string", "nullable": true, "description": "ID of the user who disabled the chunk. `null` if enabled." }, "status": { "type": "string", "description": "Indexing status of the chunk." }, "created_by": { "type": "string", "description": "ID of the user who created the chunk." }, "created_at": { "type": "number", "description": "Creation timestamp (Unix epoch in seconds)." }, "indexing_at": { "type": "number", "nullable": true, "description": "Timestamp when indexing started. `null` if not yet started." }, "completed_at": { "type": "number", "nullable": true, "description": "Timestamp when indexing completed. `null` if not yet completed." }, "error": { "type": "string", "nullable": true, "description": "Error message if indexing failed. `null` when no error." }, "stopped_at": { "type": "number", "nullable": true, "description": "Timestamp when indexing was stopped. `null` if not stopped." }, "document": { "type": "object", "description": "Parent document information for the matched chunk.", "properties": { "id": { "type": "string", "description": "Unique identifier of the document." }, "data_source_type": { "type": "string", "description": "How the document was created." }, "name": { "type": "string", "description": "Document name." }, "doc_type": { "type": "string", "nullable": true, "description": "Document type classification. `null` if not set." }, "doc_metadata": { "type": "object", "nullable": true, "description": "Metadata values for the document. `null` if no metadata is configured." } } } } }, "child_chunks": { "type": "array", "description": "Matched child chunks within the chunk, if using hierarchical indexing.", "items": { "type": "object", "properties": { "id": { "type": "string", "description": "Unique identifier of the child chunk." }, "content": { "type": "string", "description": "Text content of the child chunk." }, "position": { "type": "integer", "description": "Position of the child chunk within the parent chunk." }, "score": { "type": "number", "description": "Relevance score of the child chunk." } } } }, "score": { "type": "number", "description": "Relevance score." }, "tsne_position": { "type": "object", "nullable": true, "description": "t-SNE visualization position." }, "files": { "type": "array", "description": "Files attached to this chunk.", "items": { "type": "object", "properties": { "id": { "type": "string", "description": "Attachment file identifier." }, "name": { "type": "string", "description": "Original file name." }, "size": { "type": "integer", "description": "File size in bytes." }, "extension": { "type": "string", "description": "File extension." }, "mime_type": { "type": "string", "description": "MIME type of the file." }, "source_url": { "type": "string", "description": "URL to access the attachment." } } } }, "summary": { "type": "string", "nullable": true, "description": "Summary content if retrieved via summary index." } } } } } }, "examples": { "success": { "summary": "Response Example", "value": { "query": { "content": "What is Dify?" }, "records": [ { "segment": { "id": "f3d1c7be-9f3a-40d8-8eb8-3a1ef9c3f2c1", "position": 1, "document_id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac", "content": "Dify is an open-source LLM app development platform.", "sign_content": "", "answer": "", "word_count": 9, "tokens": 12, "keywords": [ "dify", "platform", "llm" ], "index_node_id": "a1b2c3d4-e5f6-7890-abcd-000000000001", "index_node_hash": "abc123def456", "hit_count": 1, "enabled": true, "disabled_at": null, "disabled_by": null, "status": "completed", "created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "created_at": 1741267200, "indexing_at": 1741267200, "completed_at": 1741267200, "error": null, "stopped_at": null, "document": { "id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac", "data_source_type": "upload_file", "name": "guide.txt", "doc_type": null, "doc_metadata": null } }, "child_chunks": [], "score": 0.92, "tsne_position": null, "files": [], "summary": null } ] } } } } } }, "400": { "description": "- `dataset_not_initialized` : The dataset is still being initialized or indexing. Please wait a moment.\n- `provider_not_initialize` : No valid model provider credentials found. Please go to Settings -> Model Provider to complete your provider credentials.\n- `provider_quota_exceeded` : Your quota for Dify Hosted OpenAI has been exhausted. Please go to Settings -> Model Provider to complete your own provider credentials.\n- `model_currently_not_support` : Dify Hosted OpenAI trial currently not support the GPT-4 model.\n- `completion_request_error` : Completion request failed.\n- `invalid_param` : Invalid parameter value.", "content": { "application/json": { "examples": { "dataset_not_initialized": { "summary": "dataset_not_initialized", "value": { "status": 400, "code": "dataset_not_initialized", "message": "The dataset is still being initialized or indexing. Please wait a moment." } }, "provider_not_initialize": { "summary": "provider_not_initialize", "value": { "status": 400, "code": "provider_not_initialize", "message": "No valid model provider credentials found. Please go to Settings -> Model Provider to complete your provider credentials." } }, "provider_quota_exceeded": { "summary": "provider_quota_exceeded", "value": { "status": 400, "code": "provider_quota_exceeded", "message": "Your quota for Dify Hosted OpenAI has been exhausted. Please go to Settings -> Model Provider to complete your own provider credentials." } }, "model_currently_not_support": { "summary": "model_currently_not_support", "value": { "status": 400, "code": "model_currently_not_support", "message": "Dify Hosted OpenAI trial currently not support the GPT-4 model." } }, "completion_request_error": { "summary": "completion_request_error", "value": { "status": 400, "code": "completion_request_error", "message": "Completion request failed." } }, "invalid_param": { "summary": "invalid_param", "value": { "status": 400, "code": "invalid_param", "message": "Invalid parameter value." } } } } } }, "403": { "description": "`forbidden` : Insufficient permissions.", "content": { "application/json": { "examples": { "forbidden": { "summary": "forbidden", "value": { "status": 403, "code": "forbidden", "message": "Insufficient permissions." } } } } } }, "404": { "description": "`not_found` : Knowledge base not found.", "content": { "application/json": { "examples": { "not_found": { "summary": "not_found", "value": { "status": 404, "code": "not_found", "message": "Dataset not found." } } } } } }, "500": { "description": "`internal_server_error` : An internal error occurred during retrieval.", "content": { "application/json": { "examples": { "internal_server_error": { "summary": "internal_server_error", "value": { "status": 500, "code": "internal_server_error", "message": "An internal error occurred." } } } } } } } } }, "/datasets/tags": { "post": { "tags": [ "Tags" ], "summary": "Create Knowledge Tag", "description": "Create a new tag for organizing knowledge bases.", "operationId": "createKnowledgeTag", "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "required": [ "name" ], "properties": { "name": { "type": "string", "minLength": 1, "maxLength": 50, "description": "Tag name." } } } } } }, "responses": { "200": { "description": "Tag created successfully.", "content": { "application/json": { "schema": { "type": "object", "properties": { "id": { "type": "string", "description": "Tag identifier." }, "name": { "type": "string", "description": "Tag display name." }, "type": { "type": "string", "description": "Tag type. Always `knowledge` for knowledge base tags." }, "binding_count": { "type": "string", "nullable": true, "description": "Number of knowledge bases bound to this tag." } } }, "examples": { "success": { "summary": "Response Example", "value": { "id": "f4b5c6d7-e8f9-0a1b-2c3d-4e5f6a7b8c9d", "name": "Product Docs", "type": "knowledge", "binding_count": "0" } } } } } } } }, "get": { "tags": [ "Tags" ], "summary": "List Knowledge Tags", "description": "Returns the list of all knowledge base tags in the workspace.", "operationId": "getKnowledgeTags", "responses": { "200": { "description": "List of tags.", "content": { "application/json": { "schema": { "type": "array", "items": { "type": "object", "properties": { "id": { "type": "string", "description": "Tag identifier." }, "name": { "type": "string", "description": "Tag display name." }, "type": { "type": "string", "description": "Tag type. Always `knowledge` for knowledge base tags." }, "binding_count": { "type": "string", "nullable": true, "description": "Number of knowledge bases bound to this tag." } } } }, "examples": { "success": { "summary": "Response Example", "value": [ { "id": "f4b5c6d7-e8f9-0a1b-2c3d-4e5f6a7b8c9d", "name": "Product Docs", "type": "knowledge", "binding_count": "0" } ] } } } } } } }, "patch": { "tags": [ "Tags" ], "summary": "Update Knowledge Tag", "description": "Rename an existing knowledge base tag.", "operationId": "updateKnowledgeTag", "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "required": [ "tag_id", "name" ], "properties": { "tag_id": { "type": "string", "description": "Tag ID to update." }, "name": { "type": "string", "minLength": 1, "maxLength": 50, "description": "New tag name." } } } } } }, "responses": { "200": { "description": "Tag updated successfully.", "content": { "application/json": { "schema": { "type": "object", "properties": { "id": { "type": "string", "description": "Tag identifier." }, "name": { "type": "string", "description": "Tag display name." }, "type": { "type": "string", "description": "Tag type. Always `knowledge` for knowledge base tags." }, "binding_count": { "type": "string", "nullable": true, "description": "Number of knowledge bases bound to this tag." } } }, "examples": { "success": { "summary": "Response Example", "value": { "id": "f4b5c6d7-e8f9-0a1b-2c3d-4e5f6a7b8c9d", "name": "Product Docs", "type": "knowledge", "binding_count": "0" } } } } } } } }, "delete": { "tags": [ "Tags" ], "summary": "Delete Knowledge Tag", "description": "Permanently delete a knowledge base tag. Does not delete the knowledge bases that were tagged.", "operationId": "deleteKnowledgeTag", "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "required": [ "tag_id" ], "properties": { "tag_id": { "type": "string", "description": "Tag ID to delete." } } } } } }, "responses": { "204": { "description": "Success." } } } }, "/datasets/tags/binding": { "post": { "tags": [ "Tags" ], "summary": "Create Tag Binding", "description": "Bind one or more tags to a knowledge base. A knowledge base can have multiple tags.", "operationId": "bindTagsToDataset", "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "required": [ "tag_ids", "target_id" ], "properties": { "tag_ids": { "type": "array", "items": { "type": "string" }, "minItems": 1, "description": "Tag IDs to bind." }, "target_id": { "type": "string", "description": "Knowledge base ID." } } } } } }, "responses": { "204": { "description": "Success." } } } }, "/datasets/tags/unbinding": { "post": { "tags": [ "Tags" ], "summary": "Delete Tag Binding", "description": "Remove a tag binding from a knowledge base.", "operationId": "unbindTagFromDataset", "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "required": [ "tag_id", "target_id" ], "properties": { "tag_id": { "type": "string", "description": "Tag ID to unbind." }, "target_id": { "type": "string", "description": "Knowledge base ID." } } } } } }, "responses": { "204": { "description": "Success." } } } }, "/datasets/{dataset_id}/tags": { "get": { "tags": [ "Tags" ], "summary": "Get Knowledge Base Tags", "description": "Returns the list of tags bound to a specific knowledge base.", "operationId": "queryDatasetTags", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." } ], "responses": { "200": { "description": "Tags bound to the knowledge base.", "content": { "application/json": { "schema": { "type": "object", "properties": { "data": { "type": "array", "description": "List of tags bound to this knowledge base.", "items": { "type": "object", "properties": { "id": { "type": "string", "description": "Tag identifier." }, "name": { "type": "string", "description": "Tag display name." } } } }, "total": { "type": "integer", "description": "Total number of tags bound to this knowledge base." } } }, "examples": { "success": { "summary": "Response Example", "value": { "data": [ { "id": "f4b5c6d7-e8f9-0a1b-2c3d-4e5f6a7b8c9d", "name": "Product Docs" } ], "total": 1 } } } } } } } } }, "/datasets/{dataset_id}/metadata": { "post": { "tags": [ "Metadata" ], "summary": "Create Metadata Field", "description": "Create a custom metadata field for the knowledge base. Metadata fields can be used to annotate documents with structured information.", "operationId": "createMetadataField", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." } ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "required": [ "type", "name" ], "properties": { "type": { "type": "string", "enum": [ "string", "number", "time" ], "description": "`string` for text values, `number` for numeric values, `time` for date/time values." }, "name": { "type": "string", "description": "Metadata field name." } } } } } }, "responses": { "201": { "description": "Metadata field created successfully.", "content": { "application/json": { "schema": { "type": "object", "properties": { "id": { "type": "string", "description": "Metadata field identifier." }, "name": { "type": "string", "description": "Metadata field name." }, "type": { "type": "string", "description": "Metadata field type." } } }, "examples": { "success": { "summary": "Response Example", "value": { "id": "b5c6d7e8-f9a0-1b2c-3d4e-5f6a7b8c9d0e", "name": "author", "type": "string" } } } } } } } }, "get": { "tags": [ "Metadata" ], "summary": "List Metadata Fields", "description": "Returns the list of all metadata fields (both custom and built-in) for the knowledge base, along with the count of documents using each field.", "operationId": "listMetadataFields", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." } ], "responses": { "200": { "description": "Metadata fields for the knowledge base.", "content": { "application/json": { "schema": { "type": "object", "properties": { "doc_metadata": { "type": "array", "description": "List of metadata field definitions.", "items": { "type": "object", "properties": { "id": { "type": "string", "description": "Metadata field identifier." }, "name": { "type": "string", "description": "Metadata field name." }, "type": { "type": "string", "description": "Metadata field type." }, "count": { "type": "integer", "description": "Number of documents using this metadata field." } } } }, "built_in_field_enabled": { "type": "boolean", "description": "Whether built-in metadata fields are enabled for this knowledge base." } } }, "examples": { "success": { "summary": "Response Example", "value": { "doc_metadata": [ { "id": "b5c6d7e8-f9a0-1b2c-3d4e-5f6a7b8c9d0e", "name": "author", "type": "string", "count": 3 } ], "built_in_field_enabled": true } } } } } } } } }, "/datasets/{dataset_id}/metadata/{metadata_id}": { "patch": { "tags": [ "Metadata" ], "summary": "Update Metadata Field", "description": "Rename a custom metadata field.", "operationId": "updateMetadataField", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." }, { "name": "metadata_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Metadata field ID." } ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "required": [ "name" ], "properties": { "name": { "type": "string", "description": "New metadata field name." } } } } } }, "responses": { "200": { "description": "Metadata field updated successfully.", "content": { "application/json": { "schema": { "type": "object", "properties": { "id": { "type": "string", "description": "Metadata field identifier." }, "name": { "type": "string", "description": "Metadata field name." }, "type": { "type": "string", "description": "Metadata field type." } } }, "examples": { "success": { "summary": "Response Example", "value": { "id": "b5c6d7e8-f9a0-1b2c-3d4e-5f6a7b8c9d0e", "name": "author", "type": "string" } } } } } } } }, "delete": { "tags": [ "Metadata" ], "summary": "Delete Metadata Field", "description": "Permanently delete a custom metadata field. Documents using this field will lose their metadata values for it.", "operationId": "deleteMetadataField", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." }, { "name": "metadata_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Metadata field ID." } ], "responses": { "204": { "description": "Success." } } } }, "/datasets/{dataset_id}/metadata/built-in": { "get": { "tags": [ "Metadata" ], "summary": "Get Built-in Metadata Fields", "description": "Returns the list of built-in metadata fields provided by the system (e.g., document type, source URL).", "operationId": "getBuiltInMetadataFields", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." } ], "responses": { "200": { "description": "Built-in metadata fields.", "content": { "application/json": { "schema": { "type": "object", "properties": { "fields": { "type": "array", "description": "List of system-provided metadata fields.", "items": { "type": "object", "properties": { "name": { "type": "string", "description": "Built-in field identifier. `document_name` for the document title, `uploader` for the creator, `upload_date` for creation time, `last_update_date` for last modification time, `source` for the document origin." }, "type": { "type": "string", "description": "Field data type. `string` for text values, `time` for date/time values." } } } } } }, "examples": { "success": { "summary": "Response Example", "value": { "fields": [ { "name": "document_name", "type": "string" }, { "name": "uploader", "type": "string" }, { "name": "upload_date", "type": "time" }, { "name": "last_update_date", "type": "time" }, { "name": "source", "type": "string" } ] } } } } } } } } }, "/datasets/{dataset_id}/metadata/built-in/{action}": { "post": { "tags": [ "Metadata" ], "summary": "Update Built-in Metadata Field", "description": "Enable or disable built-in metadata fields for the knowledge base.", "operationId": "toggleBuiltInMetadataField", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." }, { "name": "action", "in": "path", "required": true, "schema": { "type": "string", "enum": [ "enable", "disable" ] }, "description": "`enable` to activate built-in metadata fields, `disable` to deactivate them." } ], "responses": { "200": { "description": "Built-in metadata field toggled successfully.", "content": { "application/json": { "schema": { "type": "object", "properties": { "result": { "type": "string", "description": "Operation result." } } }, "examples": { "success": { "summary": "Response Example", "value": { "result": "success" } } } } } } } } }, "/datasets/{dataset_id}/documents/metadata": { "post": { "tags": [ "Metadata" ], "summary": "Update Document Metadata in Batch", "description": "Update metadata values for multiple documents at once. Each document in the request receives the specified metadata key-value pairs.", "operationId": "batchUpdateDocumentMetadata", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." } ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "required": [ "operation_data" ], "properties": { "operation_data": { "type": "array", "items": { "type": "object", "required": [ "document_id", "metadata_list" ], "properties": { "document_id": { "type": "string", "description": "ID of the document whose metadata to update." }, "metadata_list": { "type": "array", "items": { "type": "object", "required": [ "id", "name" ], "properties": { "id": { "type": "string", "description": "Metadata field ID." }, "name": { "type": "string", "description": "Metadata field name." }, "value": { "description": "Metadata value. Can be a string, number, or `null`." } } } }, "partial_update": { "type": "boolean", "default": false, "description": "Whether to partially update metadata, keeping existing values for unspecified fields." } } }, "description": "Array of document metadata update operations. Each entry maps a document ID to its metadata key-value pairs." } } } } } }, "responses": { "200": { "description": "Document metadata updated successfully.", "content": { "application/json": { "schema": { "type": "object", "properties": { "result": { "type": "string", "description": "Operation result." } } }, "examples": { "success": { "summary": "Response Example", "value": { "result": "success" } } } } } } } } }, "/workspaces/current/models/model-types/{model_type}": { "get": { "tags": [ "Models" ], "summary": "Get Available Models", "description": "Retrieve the list of available models by type. Primarily used to query `text-embedding` and `rerank` models for knowledge base configuration.", "operationId": "getAvailableModels", "parameters": [ { "name": "model_type", "in": "path", "required": true, "schema": { "type": "string", "enum": [ "text-embedding", "rerank", "llm", "tts", "speech2text", "moderation" ] }, "description": "Type of model to retrieve. For knowledge base configuration, use `text-embedding` for embedding models or `rerank` for reranking models." } ], "responses": { "200": { "description": "Available models for the specified type.", "content": { "application/json": { "schema": { "type": "object", "properties": { "data": { "type": "array", "description": "List of model providers with their available models.", "items": { "type": "object", "properties": { "provider": { "type": "string", "description": "Model provider identifier, e.g. `openai`, `cohere`." }, "label": { "type": "object", "description": "Localized display name of the provider.", "properties": { "en_US": { "type": "string", "description": "English display name." }, "zh_Hans": { "type": "string", "description": "Chinese display name." } } }, "icon_small": { "type": "object", "description": "URL of the provider's small icon.", "properties": { "en_US": { "type": "string", "description": "Small icon URL." } } }, "icon_large": { "type": "object", "description": "URL of the provider's large icon.", "properties": { "en_US": { "type": "string", "description": "Large icon URL." } } }, "status": { "type": "string", "description": "Provider status. `active` when credentials are configured and valid." }, "models": { "type": "array", "description": "List of available models from this provider.", "items": { "type": "object", "properties": { "model": { "type": "string", "description": "Model identifier. Use this as the `embedding_model` value when creating or updating a knowledge base." }, "label": { "type": "object", "description": "Localized display name of the model.", "properties": { "en_US": { "type": "string", "description": "English model name." }, "zh_Hans": { "type": "string", "description": "Chinese model name." } } }, "model_type": { "type": "string", "description": "Type of the model, matching the `model_type` path parameter." }, "features": { "type": "array", "nullable": true, "description": "Supported features of the model, `null` if none.", "items": { "type": "string" } }, "fetch_from": { "type": "string", "description": "Where the model definition comes from. `predefined-model` for built-in models, `customizable-model` for user-configured models." }, "model_properties": { "type": "object", "description": "Model-specific properties such as `context_size`." }, "status": { "type": "string", "description": "Model availability status. `active` when ready to use." } } } } } } } } }, "examples": { "success": { "summary": "Response Example", "value": { "data": [ { "provider": "openai", "label": { "en_US": "OpenAI", "zh_Hans": "OpenAI" }, "icon_small": { "en_US": "https://example.com/openai-small.svg" }, "icon_large": { "en_US": "https://example.com/openai-large.svg" }, "status": "active", "models": [ { "model": "text-embedding-3-small", "label": { "en_US": "text-embedding-3-small", "zh_Hans": "text-embedding-3-small" }, "model_type": "text-embedding", "features": null, "fetch_from": "predefined-model", "model_properties": { "context_size": 8191 }, "status": "active" } ] } ] } } } } } } } } }, "/datasets/pipeline/file-upload": { "post": { "tags": [ "Knowledge Pipeline" ], "summary": "Upload Pipeline File", "description": "Upload a file for use in a knowledge pipeline. Accepts a single file via `multipart/form-data`.", "operationId": "uploadPipelineFile", "requestBody": { "required": true, "content": { "multipart/form-data": { "schema": { "type": "object", "required": [ "file" ], "properties": { "file": { "type": "string", "format": "binary", "description": "The file to upload." } } } } } }, "responses": { "201": { "description": "File uploaded successfully.", "content": { "application/json": { "schema": { "type": "object", "properties": { "id": { "type": "string", "description": "Unique identifier of the uploaded file." }, "name": { "type": "string", "description": "Original file name." }, "size": { "type": "integer", "description": "File size in bytes." }, "extension": { "type": "string", "description": "File extension." }, "mime_type": { "type": "string", "description": "MIME type of the file." }, "created_by": { "type": "string", "description": "ID of the user who uploaded the file." }, "created_at": { "type": "string", "description": "Upload timestamp in ISO 8601 format." } } }, "examples": { "success": { "summary": "Response Example", "value": { "id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", "name": "report.pdf", "size": 524288, "extension": "pdf", "mime_type": "application/pdf", "created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4", "created_at": "2025-03-06T12:00:00" } } } } } }, "400": { "description": "- `no_file_uploaded` : Please upload your file.\n- `filename_not_exists_error` : The specified filename does not exist.\n- `too_many_files` : Only one file is allowed.", "content": { "application/json": { "examples": { "no_file_uploaded": { "summary": "no_file_uploaded", "value": { "status": 400, "code": "no_file_uploaded", "message": "Please upload your file." } }, "filename_not_exists_error": { "summary": "filename_not_exists_error", "value": { "status": 400, "code": "filename_not_exists_error", "message": "The specified filename does not exist." } }, "too_many_files": { "summary": "too_many_files", "value": { "status": 400, "code": "too_many_files", "message": "Only one file is allowed." } } } } } }, "413": { "description": "`file_too_large` : File size exceeded.", "content": { "application/json": { "examples": { "file_too_large": { "summary": "file_too_large", "value": { "status": 413, "code": "file_too_large", "message": "File size exceeded." } } } } } }, "415": { "description": "`unsupported_file_type` : File type not allowed.", "content": { "application/json": { "examples": { "unsupported_file_type": { "summary": "unsupported_file_type", "value": { "status": 415, "code": "unsupported_file_type", "message": "File type not allowed." } } } } } } } } }, "/datasets/{dataset_id}/pipeline/datasource-plugins": { "get": { "tags": [ "Knowledge Pipeline" ], "summary": "List Datasource Plugins", "description": "List all datasource plugins available for a knowledge pipeline. Returns published or draft plugins depending on the `is_published` query parameter.", "operationId": "listDatasourcePlugins", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." }, { "name": "is_published", "in": "query", "schema": { "type": "boolean", "default": true }, "description": "Whether to retrieve published or draft datasource plugins. `true` returns published plugins, `false` returns draft plugins." } ], "responses": { "200": { "description": "List of datasource plugins.", "content": { "application/json": { "schema": { "type": "array", "items": { "type": "object", "properties": { "id": { "type": "string", "description": "Unique identifier of the datasource plugin." }, "name": { "type": "string", "description": "Display name of the datasource plugin." }, "type": { "type": "string", "description": "Type of the datasource plugin." } } } }, "examples": { "success": { "summary": "Response Example", "value": [ { "id": "b1c2d3e4-f5a6-7890-abcd-ef1234567890", "name": "Web Scraper", "type": "online_document" } ] } } } } }, "404": { "description": "`not_found` : Dataset not found.", "content": { "application/json": { "examples": { "not_found": { "summary": "not_found", "value": { "status": 404, "code": "not_found", "message": "Dataset not found." } } } } } } } } }, "/datasets/{dataset_id}/pipeline/datasource/nodes/{node_id}/run": { "post": { "tags": [ "Knowledge Pipeline" ], "summary": "Run Datasource Node", "description": "Execute a single datasource node within the knowledge pipeline. Returns a streaming response with the node execution results.", "operationId": "runDatasourceNode", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." }, { "name": "node_id", "in": "path", "required": true, "schema": { "type": "string" }, "description": "ID of the datasource node to execute." } ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "required": [ "inputs", "datasource_type", "is_published" ], "properties": { "inputs": { "type": "object", "additionalProperties": true, "description": "Input variables for the datasource node." }, "datasource_type": { "type": "string", "enum": [ "online_document", "local_file", "website_crawl", "online_drive" ], "description": "Type of the datasource." }, "credential_id": { "type": "string", "nullable": true, "description": "ID of the credential to use for authentication with the datasource." }, "is_published": { "type": "boolean", "description": "Whether to run the published or draft version of the node. `true` runs the published version, `false` runs the draft." } } } } } }, "responses": { "200": { "description": "Streaming response with node execution events.", "content": { "text/event-stream": { "schema": { "type": "string", "description": "Server-Sent Events stream containing node execution progress and results." } } } }, "404": { "description": "`not_found` : Dataset not found.", "content": { "application/json": { "examples": { "not_found": { "summary": "not_found", "value": { "status": 404, "code": "not_found", "message": "Dataset not found." } } } } } } } } }, "/datasets/{dataset_id}/pipeline/run": { "post": { "tags": [ "Knowledge Pipeline" ], "summary": "Run Pipeline", "description": "Execute the full knowledge pipeline for a knowledge base. Supports both streaming and blocking response modes.", "operationId": "runPipeline", "parameters": [ { "name": "dataset_id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" }, "description": "Knowledge base ID." } ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "required": [ "inputs", "datasource_type", "datasource_info_list", "start_node_id", "is_published", "response_mode" ], "properties": { "inputs": { "type": "object", "additionalProperties": true, "description": "Input variables for the pipeline." }, "datasource_type": { "type": "string", "enum": [ "online_document", "local_file", "website_crawl", "online_drive" ], "description": "Type of the datasource." }, "datasource_info_list": { "type": "array", "items": { "type": "object", "additionalProperties": true }, "description": "List of datasource information objects for the pipeline to process." }, "start_node_id": { "type": "string", "description": "ID of the node to start pipeline execution from." }, "is_published": { "type": "boolean", "description": "Whether to run the published or draft version of the pipeline. `true` runs the published version, `false` runs the draft." }, "response_mode": { "type": "string", "enum": [ "streaming", "blocking" ], "description": "Response mode for the pipeline execution. `streaming` returns a Server-Sent Events stream, `blocking` waits and returns the complete result." } } } } } }, "responses": { "200": { "description": "Pipeline execution result. Format depends on `response_mode`: streaming returns a `text/event-stream`, blocking returns a JSON object.", "content": { "text/event-stream": { "schema": { "type": "string", "description": "Server-Sent Events stream containing pipeline execution progress and results. Returned when `response_mode` is `streaming`." } }, "application/json": { "schema": { "type": "object", "description": "Complete pipeline execution result. Returned when `response_mode` is `blocking`.", "additionalProperties": true }, "examples": { "success": { "summary": "Blocking Response Example", "value": { "task_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", "workflow_run_id": "f1e2d3c4-b5a6-7890-abcd-ef0987654321", "data": { "id": "f1e2d3c4-b5a6-7890-abcd-ef0987654321", "status": "succeeded", "outputs": {}, "created_at": 1741267200, "finished_at": 1741267210 } } } } } } }, "403": { "description": "`forbidden` : Forbidden.", "content": { "application/json": { "examples": { "forbidden": { "summary": "forbidden", "value": { "status": 403, "code": "forbidden", "message": "Forbidden." } } } } } }, "404": { "description": "`not_found` : Dataset not found.", "content": { "application/json": { "examples": { "not_found": { "summary": "not_found", "value": { "status": 404, "code": "not_found", "message": "Dataset not found." } } } } } }, "500": { "description": "`pipeline_run_error` : Pipeline execution failed.", "content": { "application/json": { "examples": { "pipeline_run_error": { "summary": "pipeline_run_error", "value": { "status": 500, "code": "pipeline_run_error", "message": "Pipeline execution failed: connection timeout" } } } } } } } } } }, "components": { "schemas": { "Dataset": { "type": "object", "properties": { "id": { "type": "string", "description": "Unique identifier of the knowledge base." }, "name": { "type": "string", "description": "Display name of the knowledge base. Unique within the workspace." }, "description": { "type": "string", "description": "Optional text describing the purpose or contents of the knowledge base." }, "provider": { "type": "string", "description": "Provider type. `vendor` for internally managed, `external` for external knowledge base connections." }, "permission": { "type": "string", "description": "Controls who can access this knowledge base. Possible values: `only_me`, `all_team_members`, `partial_members`." }, "data_source_type": { "type": "string", "description": "Data source type of the documents, `null` if not yet configured." }, "indexing_technique": { "type": "string", "description": "`high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing." }, "app_count": { "type": "integer", "description": "Number of applications currently using this knowledge base." }, "document_count": { "type": "integer", "description": "Total number of documents in the knowledge base." }, "word_count": { "type": "integer", "description": "Total word count across all documents." }, "created_by": { "type": "string", "description": "ID of the user who created the knowledge base." }, "author_name": { "type": "string", "description": "Display name of the creator." }, "created_at": { "type": "number", "description": "Creation timestamp (Unix epoch in seconds)." }, "updated_by": { "type": "string", "description": "ID of the user who last updated the knowledge base." }, "updated_at": { "type": "number", "description": "Last update timestamp (Unix epoch in seconds)." }, "embedding_model": { "type": "string", "description": "Name of the embedding model used for indexing." }, "embedding_model_provider": { "type": "string", "description": "Embedding model provider. Use the `provider` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`." }, "embedding_available": { "type": "boolean", "description": "Whether the configured embedding model is currently available." }, "retrieval_model_dict": { "type": "object", "description": "Retrieval configuration for the knowledge base.", "properties": { "search_method": { "type": "string", "description": "Search method used for retrieval. `keyword_search` for keyword matching, `semantic_search` for embedding-based similarity, `full_text_search` for full-text indexing, `hybrid_search` for a combination of semantic and keyword approaches." }, "reranking_enable": { "type": "boolean", "description": "Whether reranking is enabled." }, "reranking_mode": { "type": "string", "nullable": true, "description": "Reranking mode. `reranking_model` for model-based reranking, `weighted_score` for score-based weighting. `null` if reranking is disabled." }, "reranking_model": { "type": "object", "description": "Reranking model configuration.", "properties": { "reranking_provider_name": { "type": "string", "description": "Provider name of the reranking model." }, "reranking_model_name": { "type": "string", "description": "Name of the reranking model." } } }, "weights": { "type": "object", "nullable": true, "description": "Weight configuration for hybrid search.", "properties": { "weight_type": { "type": "string", "description": "Strategy for balancing semantic and keyword search weights." }, "vector_setting": { "type": "object", "description": "Semantic search weight settings.", "properties": { "vector_weight": { "type": "number", "description": "Weight assigned to semantic (vector) search results." }, "embedding_provider_name": { "type": "string", "description": "Provider of the embedding model used for vector search." }, "embedding_model_name": { "type": "string", "description": "Name of the embedding model used for vector search." } } }, "keyword_setting": { "type": "object", "description": "Keyword search weight settings.", "properties": { "keyword_weight": { "type": "number", "description": "Weight assigned to keyword search results." } } } } }, "top_k": { "type": "integer", "description": "Maximum number of results to return." }, "score_threshold_enabled": { "type": "boolean", "description": "Whether score threshold filtering is enabled." }, "score_threshold": { "type": "number", "description": "Minimum relevance score for results. Only effective when `score_threshold_enabled` is `true`." } } }, "summary_index_setting": { "type": "object", "nullable": true, "description": "Summary index configuration.", "properties": { "enable": { "type": "boolean", "description": "Whether summary indexing is enabled." }, "model_name": { "type": "string", "description": "Name of the model used for generating summaries." }, "model_provider_name": { "type": "string", "description": "Provider of the summary generation model." }, "summary_prompt": { "type": "string", "description": "Prompt template used for summary generation." } } }, "tags": { "type": "array", "description": "Tags associated with this knowledge base.", "items": { "type": "object", "properties": { "id": { "type": "string", "description": "Tag identifier." }, "name": { "type": "string", "description": "Tag name." }, "type": { "type": "string", "description": "Tag type. Always `knowledge` for knowledge base tags." } } } }, "doc_form": { "type": "string", "description": "Document chunking mode. `text_model` for standard text chunking, `hierarchical_model` for parent-child structure, `qa_model` for QA pair extraction." }, "external_knowledge_info": { "type": "object", "nullable": true, "description": "Connection details for external knowledge bases. Present when `provider` is `external`.", "properties": { "external_knowledge_id": { "type": "string", "description": "ID of the external knowledge base." }, "external_knowledge_api_id": { "type": "string", "description": "ID of the external knowledge API connection." }, "external_knowledge_api_name": { "type": "string", "description": "Display name of the external knowledge API." }, "external_knowledge_api_endpoint": { "type": "string", "description": "Endpoint URL of the external knowledge API." } } }, "external_retrieval_model": { "type": "object", "nullable": true, "description": "Retrieval settings for external knowledge bases. `null` for internal knowledge bases.", "properties": { "top_k": { "type": "integer", "description": "Maximum number of results to return from the external knowledge base." }, "score_threshold": { "type": "number", "description": "Minimum relevance score threshold." }, "score_threshold_enabled": { "type": "boolean", "description": "Whether score threshold filtering is enabled." } } }, "doc_metadata": { "type": "array", "description": "Metadata field definitions for the knowledge base.", "items": { "type": "object", "properties": { "id": { "type": "string", "description": "Metadata field identifier." }, "name": { "type": "string", "description": "Metadata field name." }, "type": { "type": "string", "description": "Metadata field value type." } } } }, "built_in_field_enabled": { "type": "boolean", "description": "Whether built-in metadata fields (e.g., `document_name`, `uploader`) are enabled." }, "pipeline_id": { "type": "string", "nullable": true, "description": "Pipeline ID, if a custom processing pipeline is configured." }, "runtime_mode": { "type": "string", "nullable": true, "description": "Runtime processing mode." }, "chunk_structure": { "type": "string", "nullable": true, "description": "Chunk structure configuration." }, "icon_info": { "type": "object", "nullable": true, "description": "Icon display configuration for the knowledge base.", "properties": { "icon_type": { "type": "string", "description": "Type of icon." }, "icon": { "type": "string", "description": "Icon identifier or emoji." }, "icon_background": { "type": "string", "description": "Background color for the icon." }, "icon_url": { "type": "string", "description": "URL of a custom icon image." } } }, "is_published": { "type": "boolean", "description": "Whether the knowledge base is published." }, "total_documents": { "type": "integer", "description": "Total number of documents." }, "total_available_documents": { "type": "integer", "description": "Number of documents that are enabled and available." }, "enable_api": { "type": "boolean", "description": "Whether API access is enabled for this knowledge base." }, "is_multimodal": { "type": "boolean", "description": "Whether multimodal content processing is enabled." } } }, "Document": { "type": "object", "properties": { "id": { "type": "string", "description": "Unique identifier of the document." }, "position": { "type": "integer", "description": "Display position of the document in the list." }, "data_source_type": { "type": "string", "description": "How the document was created. `upload_file` for file uploads, `notion_import` for Notion imports." }, "data_source_info": { "type": "object", "description": "Raw data source information, varies by `data_source_type`." }, "data_source_detail_dict": { "type": "object", "description": "Detailed data source information including file details." }, "dataset_process_rule_id": { "type": "string", "description": "ID of the processing rule applied to this document." }, "name": { "type": "string", "description": "Document name." }, "created_from": { "type": "string", "description": "Origin of the document. `api` for API creation, `web` for UI creation." }, "created_by": { "type": "string", "description": "ID of the user who created the document." }, "created_at": { "type": "number", "description": "Creation timestamp (Unix epoch in seconds)." }, "tokens": { "type": "integer", "description": "Total number of tokens in the document." }, "indexing_status": { "type": "string", "description": "Current indexing status. `waiting` for queued, `parsing` while extracting content, `cleaning` while removing noise, `splitting` while chunking, `indexing` while building vectors, `completed` when ready, `error` if failed, `paused` if manually paused." }, "error": { "type": "string", "nullable": true, "description": "Error message if indexing failed. `null` when no error." }, "enabled": { "type": "boolean", "description": "Whether the document is enabled for retrieval." }, "disabled_at": { "type": "number", "nullable": true, "description": "Timestamp when the document was disabled. `null` if enabled." }, "disabled_by": { "type": "string", "nullable": true, "description": "ID of the user who disabled the document. `null` if enabled." }, "archived": { "type": "boolean", "description": "Whether the document is archived." }, "display_status": { "type": "string", "description": "User-facing display status derived from `indexing_status` and `enabled` state." }, "word_count": { "type": "integer", "description": "Total word count of the document." }, "hit_count": { "type": "integer", "description": "Number of times the document has been matched in retrieval queries." }, "doc_form": { "type": "string", "description": "Document chunking mode. `text_model` for standard text chunking, `hierarchical_model` for parent-child structure, `qa_model` for QA pair extraction." }, "doc_metadata": { "type": "array", "description": "Metadata values assigned to this document.", "items": { "type": "object", "properties": { "id": { "type": "string", "description": "Metadata field identifier." }, "name": { "type": "string", "description": "Metadata field name." }, "type": { "type": "string", "description": "Metadata field value type." }, "value": { "type": "string", "description": "Metadata value for this document." } } } }, "summary_index_status": { "type": "string", "nullable": true, "description": "Status of the summary index for this document. `null` if summary indexing is not configured." }, "need_summary": { "type": "boolean", "description": "Whether a summary needs to be generated for this document." } } }, "Segment": { "type": "object", "properties": { "id": { "type": "string", "description": "Unique identifier of the chunk." }, "position": { "type": "integer", "description": "Position of the chunk within the document." }, "document_id": { "type": "string", "description": "ID of the document this chunk belongs to." }, "content": { "type": "string", "description": "Text content of the chunk." }, "sign_content": { "type": "string", "description": "Signed content hash for integrity verification." }, "answer": { "type": "string", "description": "Answer content, used in Q&A mode documents." }, "word_count": { "type": "integer", "description": "Word count of the chunk content." }, "tokens": { "type": "integer", "description": "Token count of the chunk content." }, "keywords": { "type": "array", "description": "Keywords associated with this chunk for keyword-based retrieval.", "items": { "type": "string" } }, "index_node_id": { "type": "string", "description": "ID of the index node in the vector store." }, "index_node_hash": { "type": "string", "description": "Hash of the indexed content, used to detect changes." }, "hit_count": { "type": "integer", "description": "Number of times this chunk has been matched in retrieval queries." }, "enabled": { "type": "boolean", "description": "Whether the chunk is enabled for retrieval." }, "disabled_at": { "type": "number", "nullable": true, "description": "Timestamp when the chunk was disabled. `null` if enabled." }, "disabled_by": { "type": "string", "nullable": true, "description": "ID of the user who disabled the chunk. `null` if enabled." }, "status": { "type": "string", "description": "Current indexing status of the chunk, e.g. `completed`, `indexing`, `error`." }, "created_by": { "type": "string", "description": "ID of the user who created the chunk." }, "created_at": { "type": "number", "description": "Creation timestamp (Unix epoch in seconds)." }, "updated_at": { "type": "number", "description": "Last update timestamp (Unix epoch in seconds)." }, "updated_by": { "type": "string", "description": "ID of the user who last updated the chunk." }, "indexing_at": { "type": "number", "nullable": true, "description": "Timestamp when indexing started. `null` if not yet started." }, "completed_at": { "type": "number", "nullable": true, "description": "Timestamp when indexing completed. `null` if not yet completed." }, "error": { "type": "string", "nullable": true, "description": "Error message if indexing failed. `null` when no error." }, "stopped_at": { "type": "number", "nullable": true, "description": "Timestamp when indexing was stopped. `null` if not stopped." }, "child_chunks": { "type": "array", "description": "Child chunks belonging to this chunk. Only present for hierarchical mode documents.", "items": { "$ref": "#/components/schemas/ChildChunk" } }, "attachments": { "type": "array", "description": "Files attached to this chunk.", "items": { "type": "object", "properties": { "id": { "type": "string", "description": "Attachment file identifier." }, "name": { "type": "string", "description": "Original file name." }, "size": { "type": "integer", "description": "File size in bytes." }, "extension": { "type": "string", "description": "File extension." }, "mime_type": { "type": "string", "description": "MIME type of the file." }, "source_url": { "type": "string", "description": "URL to access the attachment." } } } }, "summary": { "type": "string", "nullable": true, "description": "AI-generated summary of the chunk content. `null` if summary indexing is not enabled." } } }, "ChildChunk": { "type": "object", "properties": { "id": { "type": "string", "description": "Unique identifier of the child chunk." }, "segment_id": { "type": "string", "description": "ID of the parent chunk this child chunk belongs to." }, "content": { "type": "string", "description": "Text content of the child chunk." }, "position": { "type": "integer", "description": "Position of the child chunk within the parent chunk." }, "word_count": { "type": "integer", "description": "Word count of the child chunk content." }, "type": { "type": "string", "description": "How the child chunk was created. `automatic` for system-generated, `custom` for manually created." }, "created_at": { "type": "number", "description": "Creation timestamp (Unix epoch in seconds)." }, "updated_at": { "type": "number", "description": "Last update timestamp (Unix epoch in seconds)." } } }, "RetrievalModel": { "type": "object", "required": [ "search_method", "reranking_enable", "top_k", "score_threshold_enabled" ], "properties": { "search_method": { "type": "string", "description": "Search method used for retrieval.", "enum": [ "keyword_search", "semantic_search", "full_text_search", "hybrid_search" ] }, "reranking_enable": { "type": "boolean", "description": "Whether reranking is enabled." }, "reranking_model": { "type": "object", "description": "Reranking model configuration.", "properties": { "reranking_provider_name": { "type": "string", "description": "Provider name of the reranking model." }, "reranking_model_name": { "type": "string", "description": "Name of the reranking model." } } }, "reranking_mode": { "type": "string", "enum": [ "reranking_model", "weighted_score" ], "nullable": true, "description": "Reranking mode. Required when `reranking_enable` is `true`." }, "top_k": { "type": "integer", "description": "Maximum number of results to return." }, "score_threshold_enabled": { "type": "boolean", "description": "Whether score threshold filtering is enabled." }, "score_threshold": { "type": "number", "nullable": true, "description": "Minimum relevance score for results. Only effective when `score_threshold_enabled` is `true`." }, "weights": { "type": "object", "nullable": true, "description": "Weight configuration for hybrid search.", "properties": { "weight_type": { "type": "string", "description": "Strategy for balancing semantic and keyword search weights.", "enum": [ "semantic_first", "keyword_first", "customized" ] }, "vector_setting": { "type": "object", "description": "Semantic search weight settings.", "properties": { "vector_weight": { "type": "number", "description": "Weight assigned to semantic (vector) search results." }, "embedding_provider_name": { "type": "string", "description": "Provider of the embedding model used for vector search." }, "embedding_model_name": { "type": "string", "description": "Name of the embedding model used for vector search." } } }, "keyword_setting": { "type": "object", "description": "Keyword search weight settings.", "properties": { "keyword_weight": { "type": "number", "description": "Weight assigned to keyword search results." } } } } } } } }, "securitySchemes": { "ApiKeyAuth": { "type": "http", "scheme": "bearer", "bearerFormat": "API_KEY", "description": "API Key authentication. For all API requests, include your API Key in the `Authorization` HTTP Header, prefixed with `Bearer `. Example: `Authorization: Bearer {API_KEY}`. **Strongly recommend storing your API Key on the server-side, not shared or stored on the client-side, to avoid possible API-Key leakage that can lead to serious consequences.**" } } } }