Files
dify-docs/en/api-reference/openapi_knowledge.json
Riskey 21122c4282 Batch update apis (#715)
* Batch update service apis

* final check and add zh/ja docs

* add temporary redirects for changed api links embedded in the product

* Restore original operationIds to preserve SDK compatibility

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* complete the missing descriptions

* correct the human input terminology

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-23 16:38:10 +08:00

6657 lines
243 KiB
JSON

{
"openapi": "3.0.1",
"info": {
"title": "Knowledge API",
"description": "API for managing knowledge bases, documents, chunks, metadata, and tags, including creation, retrieval, and configuration. **Note:** A single Knowledge Base API key has permission to operate on all visible knowledge bases under the same account. Please pay attention to data security.",
"version": "1.0.0"
},
"servers": [
{
"url": "{apiBaseUrl}",
"description": "The base URL for the Knowledge API.",
"variables": {
"apiBaseUrl": {
"default": "https://api.dify.ai/v1",
"description": "Actual base URL of the API"
}
}
}
],
"security": [
{
"ApiKeyAuth": []
}
],
"tags": [
{
"name": "Knowledge Bases",
"description": "Operations for managing knowledge bases, including creation, configuration, and retrieval."
},
{
"name": "Documents",
"description": "Operations for creating, updating, and managing documents within a knowledge base."
},
{
"name": "Chunks",
"description": "Operations for managing document chunks and child chunks."
},
{
"name": "Metadata",
"description": "Operations for managing knowledge base metadata fields and document metadata values."
},
{
"name": "Tags",
"description": "Operations for managing knowledge base tags and tag bindings."
},
{
"name": "Models",
"description": "Operations for retrieving available models."
},
{
"name": "Knowledge Pipeline",
"description": "Operations for managing and running knowledge pipelines, including datasource plugins and pipeline execution."
}
],
"paths": {
"/datasets": {
"post": {
"tags": [
"Knowledge Bases"
],
"summary": "Create an Empty Knowledge Base",
"description": "Create a new empty knowledge base. After creation, use [Create Document by Text](/api-reference/documents/create-document-by-text) or [Create Document by File](/api-reference/documents/create-document-by-file) to add documents.",
"operationId": "createDataset",
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": [
"name"
],
"properties": {
"name": {
"type": "string",
"minLength": 1,
"maxLength": 40,
"description": "Name of the knowledge base."
},
"description": {
"type": "string",
"maxLength": 400,
"default": "",
"description": "Description of the knowledge base."
},
"indexing_technique": {
"type": "string",
"enum": [
"high_quality",
"economy"
],
"nullable": true,
"description": "`high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing."
},
"permission": {
"type": "string",
"enum": [
"only_me",
"all_team_members",
"partial_members"
],
"default": "only_me",
"description": "Controls who can access this knowledge base. `only_me` restricts to the creator, `all_team_members` grants access to the entire workspace, `partial_members` grants access to specified members."
},
"provider": {
"type": "string",
"enum": [
"vendor",
"external"
],
"default": "vendor",
"description": "`vendor` for internal knowledge base, `external` for external knowledge base."
},
"embedding_model": {
"type": "string",
"description": "Embedding model name. Use the `model` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`."
},
"embedding_model_provider": {
"type": "string",
"description": "Embedding model provider. Use the `provider` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`."
},
"retrieval_model": {
"$ref": "#/components/schemas/RetrievalModel",
"description": "Retrieval model configuration. Controls how chunks are searched and ranked when querying this knowledge base."
},
"external_knowledge_api_id": {
"type": "string",
"description": "ID of the external knowledge API connection."
},
"external_knowledge_id": {
"type": "string",
"description": "ID of the external knowledge base."
},
"summary_index_setting": {
"type": "object",
"nullable": true,
"description": "Summary index configuration.",
"properties": {
"enable": {
"type": "boolean",
"description": "Whether to enable summary indexing."
},
"model_name": {
"type": "string",
"description": "Name of the model used for generating summaries."
},
"model_provider_name": {
"type": "string",
"description": "Provider of the summary generation model."
},
"summary_prompt": {
"type": "string",
"description": "Custom prompt template for summary generation."
}
}
}
}
}
}
}
},
"responses": {
"200": {
"description": "Knowledge base created successfully.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Dataset"
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"id": "c42e2a6e-40b3-4330-96f8-f1e4d768e8c9",
"name": "Product Documentation",
"description": "Technical documentation for the product API",
"provider": "vendor",
"permission": "only_me",
"data_source_type": null,
"indexing_technique": "high_quality",
"app_count": 0,
"document_count": 0,
"word_count": 0,
"created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"author_name": "admin",
"created_at": 1741267200,
"updated_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"updated_at": 1741267200,
"embedding_model": "text-embedding-3-small",
"embedding_model_provider": "openai",
"embedding_available": true,
"retrieval_model_dict": {
"search_method": "semantic_search",
"reranking_enable": false,
"reranking_mode": null,
"reranking_model": {
"reranking_provider_name": "",
"reranking_model_name": ""
},
"weights": null,
"top_k": 3,
"score_threshold_enabled": false,
"score_threshold": null
},
"tags": [],
"doc_form": "text_model",
"external_knowledge_info": null,
"external_retrieval_model": null,
"doc_metadata": [],
"built_in_field_enabled": true,
"pipeline_id": null,
"runtime_mode": null,
"chunk_structure": null,
"icon_info": null,
"summary_index_setting": null,
"is_published": false,
"total_documents": 0,
"total_available_documents": 0,
"enable_api": true,
"is_multimodal": false
}
}
}
}
}
},
"409": {
"description": "`dataset_name_duplicate` : The dataset name already exists. Please modify your dataset name.",
"content": {
"application/json": {
"examples": {
"dataset_name_duplicate": {
"summary": "dataset_name_duplicate",
"value": {
"status": 409,
"code": "dataset_name_duplicate",
"message": "The dataset name already exists. Please modify your dataset name."
}
}
}
}
}
}
}
},
"get": {
"tags": [
"Knowledge Bases"
],
"summary": "List Knowledge Bases",
"description": "Returns a paginated list of knowledge bases. Supports filtering by keyword and tags.",
"operationId": "listDatasets",
"parameters": [
{
"name": "page",
"in": "query",
"schema": {
"type": "integer",
"default": 1
},
"description": "Page number to retrieve."
},
{
"name": "limit",
"in": "query",
"schema": {
"type": "integer",
"default": 20
},
"description": "Number of items per page."
},
{
"name": "keyword",
"in": "query",
"schema": {
"type": "string"
},
"description": "Search keyword to filter by name."
},
{
"name": "include_all",
"in": "query",
"schema": {
"type": "boolean",
"default": false
},
"description": "Whether to include all knowledge bases regardless of permissions."
},
{
"name": "tag_ids",
"in": "query",
"schema": {
"type": "array",
"items": {
"type": "string"
}
},
"style": "form",
"explode": true,
"description": "Tag IDs to filter by."
}
],
"responses": {
"200": {
"description": "List of knowledge bases.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"data": {
"type": "array",
"description": "Array of knowledge base objects.",
"items": {
"$ref": "#/components/schemas/Dataset"
}
},
"has_more": {
"type": "boolean",
"description": "Whether more items exist on the next page."
},
"limit": {
"type": "integer",
"description": "Number of items per page."
},
"total": {
"type": "integer",
"description": "Total number of matching items."
},
"page": {
"type": "integer",
"description": "Current page number."
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"data": [
{
"id": "c42e2a6e-40b3-4330-96f8-f1e4d768e8c9",
"name": "Product Documentation",
"description": "Technical documentation for the product API",
"provider": "vendor",
"permission": "only_me",
"data_source_type": null,
"indexing_technique": "high_quality",
"app_count": 0,
"document_count": 0,
"word_count": 0,
"created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"author_name": "admin",
"created_at": 1741267200,
"updated_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"updated_at": 1741267200,
"embedding_model": "text-embedding-3-small",
"embedding_model_provider": "openai",
"embedding_available": true,
"retrieval_model_dict": {
"search_method": "semantic_search",
"reranking_enable": false,
"reranking_mode": null,
"reranking_model": {
"reranking_provider_name": "",
"reranking_model_name": ""
},
"weights": null,
"top_k": 3,
"score_threshold_enabled": false,
"score_threshold": null
},
"tags": [],
"doc_form": "text_model",
"external_knowledge_info": null,
"external_retrieval_model": null,
"doc_metadata": [],
"built_in_field_enabled": true,
"pipeline_id": null,
"runtime_mode": null,
"chunk_structure": null,
"icon_info": null,
"summary_index_setting": null,
"is_published": false,
"total_documents": 0,
"total_available_documents": 0,
"enable_api": true,
"is_multimodal": false
}
],
"has_more": false,
"limit": 20,
"total": 1,
"page": 1
}
}
}
}
}
}
}
}
},
"/datasets/{dataset_id}": {
"get": {
"tags": [
"Knowledge Bases"
],
"summary": "Get Knowledge Base",
"description": "Retrieve detailed information about a specific knowledge base, including its embedding model, retrieval configuration, and document statistics.",
"operationId": "getDatasetDetail",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
}
],
"responses": {
"200": {
"description": "Knowledge base details.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Dataset"
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"id": "c42e2a6e-40b3-4330-96f8-f1e4d768e8c9",
"name": "Product Documentation",
"description": "Technical documentation for the product API",
"provider": "vendor",
"permission": "only_me",
"data_source_type": null,
"indexing_technique": "high_quality",
"app_count": 0,
"document_count": 0,
"word_count": 0,
"created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"author_name": "admin",
"created_at": 1741267200,
"updated_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"updated_at": 1741267200,
"embedding_model": "text-embedding-3-small",
"embedding_model_provider": "openai",
"embedding_available": true,
"retrieval_model_dict": {
"search_method": "semantic_search",
"reranking_enable": false,
"reranking_mode": null,
"reranking_model": {
"reranking_provider_name": "",
"reranking_model_name": ""
},
"weights": null,
"top_k": 3,
"score_threshold_enabled": false,
"score_threshold": null
},
"tags": [],
"doc_form": "text_model",
"external_knowledge_info": null,
"external_retrieval_model": null,
"doc_metadata": [],
"built_in_field_enabled": true,
"pipeline_id": null,
"runtime_mode": null,
"chunk_structure": null,
"icon_info": null,
"summary_index_setting": null,
"is_published": false,
"total_documents": 0,
"total_available_documents": 0,
"enable_api": true,
"is_multimodal": false
}
}
}
}
}
},
"403": {
"description": "`forbidden` : Insufficient permissions to access this knowledge base.",
"content": {
"application/json": {
"examples": {
"forbidden": {
"summary": "forbidden",
"value": {
"status": 403,
"code": "forbidden",
"message": "Insufficient permissions to access this knowledge base."
}
}
}
}
}
},
"404": {
"description": "`not_found` : Dataset not found.",
"content": {
"application/json": {
"examples": {
"not_found": {
"summary": "not_found",
"value": {
"status": 404,
"code": "not_found",
"message": "Dataset not found."
}
}
}
}
}
}
}
},
"patch": {
"tags": [
"Knowledge Bases"
],
"summary": "Update Knowledge Base",
"description": "Update the name, description, permissions, or retrieval settings of an existing knowledge base. Only the fields provided in the request body are updated.",
"operationId": "updateDataset",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
}
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"name": {
"type": "string",
"minLength": 1,
"maxLength": 40,
"description": "Name of the knowledge base."
},
"description": {
"type": "string",
"maxLength": 400,
"description": "Description of the knowledge base."
},
"indexing_technique": {
"type": "string",
"enum": [
"high_quality",
"economy"
],
"nullable": true,
"description": "`high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing."
},
"permission": {
"type": "string",
"enum": [
"only_me",
"all_team_members",
"partial_members"
],
"description": "Controls who can access this knowledge base. `only_me` restricts to the creator, `all_team_members` grants access to the entire workspace, `partial_members` grants access to specified members."
},
"embedding_model": {
"type": "string",
"description": "Embedding model name. Use the `model` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`."
},
"embedding_model_provider": {
"type": "string",
"description": "Embedding model provider. Use the `provider` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`."
},
"retrieval_model": {
"$ref": "#/components/schemas/RetrievalModel",
"description": "Retrieval model configuration. Controls how chunks are searched and ranked when querying this knowledge base."
},
"partial_member_list": {
"type": "array",
"description": "List of team members with access when `permission` is `partial_members`.",
"items": {
"type": "object",
"properties": {
"user_id": {
"type": "string",
"description": "ID of the team member to grant access."
}
}
}
},
"external_retrieval_model": {
"type": "object",
"description": "Retrieval settings for external knowledge bases.",
"properties": {
"top_k": {
"type": "integer",
"description": "Maximum number of results to return."
},
"score_threshold": {
"type": "number",
"description": "Minimum relevance score threshold for filtering results."
},
"score_threshold_enabled": {
"type": "boolean",
"description": "Whether score threshold filtering is enabled."
}
}
},
"external_knowledge_id": {
"type": "string",
"description": "ID of the external knowledge base."
},
"external_knowledge_api_id": {
"type": "string",
"description": "ID of the external knowledge API connection."
}
}
}
}
}
},
"responses": {
"200": {
"description": "Knowledge base updated successfully.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Dataset"
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"id": "c42e2a6e-40b3-4330-96f8-f1e4d768e8c9",
"name": "Product Documentation",
"description": "Technical documentation for the product API",
"provider": "vendor",
"permission": "only_me",
"data_source_type": null,
"indexing_technique": "high_quality",
"app_count": 0,
"document_count": 0,
"word_count": 0,
"created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"author_name": "admin",
"created_at": 1741267200,
"updated_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"updated_at": 1741267200,
"embedding_model": "text-embedding-3-small",
"embedding_model_provider": "openai",
"embedding_available": true,
"retrieval_model_dict": {
"search_method": "semantic_search",
"reranking_enable": false,
"reranking_mode": null,
"reranking_model": {
"reranking_provider_name": "",
"reranking_model_name": ""
},
"weights": null,
"top_k": 3,
"score_threshold_enabled": false,
"score_threshold": null
},
"tags": [],
"doc_form": "text_model",
"external_knowledge_info": null,
"external_retrieval_model": null,
"doc_metadata": [],
"built_in_field_enabled": true,
"pipeline_id": null,
"runtime_mode": null,
"chunk_structure": null,
"icon_info": null,
"summary_index_setting": null,
"is_published": false,
"total_documents": 0,
"total_available_documents": 0,
"enable_api": true,
"is_multimodal": false
}
}
}
}
}
},
"403": {
"description": "`forbidden` : Insufficient permissions to access this knowledge base.",
"content": {
"application/json": {
"examples": {
"forbidden": {
"summary": "forbidden",
"value": {
"status": 403,
"code": "forbidden",
"message": "Insufficient permissions to access this knowledge base."
}
}
}
}
}
},
"404": {
"description": "`not_found` : Dataset not found.",
"content": {
"application/json": {
"examples": {
"not_found": {
"summary": "not_found",
"value": {
"status": 404,
"code": "not_found",
"message": "Dataset not found."
}
}
}
}
}
}
}
},
"delete": {
"tags": [
"Knowledge Bases"
],
"summary": "Delete Knowledge Base",
"description": "Permanently delete a knowledge base and all its documents. The knowledge base must not be in use by any application.",
"operationId": "deleteDataset",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
}
],
"responses": {
"204": {
"description": "Success."
},
"404": {
"description": "`not_found` : Dataset not found.",
"content": {
"application/json": {
"examples": {
"not_found": {
"summary": "not_found",
"value": {
"status": 404,
"code": "not_found",
"message": "Dataset not found."
}
}
}
}
}
},
"409": {
"description": "`dataset_in_use` : The knowledge base is being used by some apps. Please remove it from the apps before deleting.",
"content": {
"application/json": {
"examples": {
"dataset_in_use": {
"summary": "dataset_in_use",
"value": {
"status": 409,
"code": "dataset_in_use",
"message": "The dataset is being used by some apps. Please remove the dataset from the apps before deleting it."
}
}
}
}
}
}
}
}
},
"/datasets/{dataset_id}/document/create-by-text": {
"post": {
"tags": [
"Documents"
],
"summary": "Create Document by Text",
"description": "Create a document from raw text content. The document is processed asynchronously — use the returned `batch` ID with [Get Document Indexing Status](/api-reference/documents/get-document-indexing-status) to track progress.",
"operationId": "createDocumentFromText",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
}
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": [
"name",
"text"
],
"properties": {
"name": {
"type": "string",
"description": "Document name."
},
"text": {
"type": "string",
"description": "Document text content."
},
"indexing_technique": {
"type": "string",
"enum": [
"high_quality",
"economy"
],
"description": "Required when adding the first document to a knowledge base. Subsequent documents inherit the knowledge base's indexing technique if omitted. `high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing."
},
"doc_form": {
"type": "string",
"enum": [
"text_model",
"hierarchical_model",
"qa_model"
],
"default": "text_model",
"description": "`text_model` for standard text chunking, `hierarchical_model` for parent-child chunk structure, `qa_model` for question-answer pair extraction."
},
"doc_language": {
"type": "string",
"default": "English",
"description": "Language of the document for processing optimization."
},
"process_rule": {
"type": "object",
"description": "Processing rules for chunking.",
"required": [
"mode"
],
"properties": {
"mode": {
"type": "string",
"enum": [
"automatic",
"custom",
"hierarchical"
],
"description": "Processing mode. `automatic` uses built-in rules, `custom` allows manual configuration, `hierarchical` enables parent-child chunk structure (use with `doc_form: hierarchical_model`)."
},
"rules": {
"type": "object",
"properties": {
"pre_processing_rules": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string",
"enum": [
"remove_stopwords",
"remove_extra_spaces",
"remove_urls_emails"
],
"description": "Rule identifier."
},
"enabled": {
"type": "boolean",
"description": "Whether this preprocessing rule is enabled."
}
}
}
},
"segmentation": {
"type": "object",
"properties": {
"separator": {
"type": "string",
"default": "\n",
"description": "Custom separator for splitting text."
},
"max_tokens": {
"type": "integer",
"description": "Maximum token count per chunk."
},
"chunk_overlap": {
"type": "integer",
"default": 0,
"description": "Token overlap between chunks."
}
}
}
}
}
}
},
"retrieval_model": {
"$ref": "#/components/schemas/RetrievalModel",
"description": "Retrieval model configuration. Controls how chunks are searched and ranked when querying this knowledge base."
},
"embedding_model": {
"type": "string",
"description": "Embedding model name. Use the `model` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`."
},
"embedding_model_provider": {
"type": "string",
"description": "Embedding model provider. Use the `provider` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`."
},
"original_document_id": {
"type": "string",
"description": "Original document ID for versioning."
}
}
}
}
}
},
"responses": {
"200": {
"description": "Document created successfully.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"document": {
"$ref": "#/components/schemas/Document"
},
"batch": {
"type": "string",
"description": "Batch ID for tracking indexing progress."
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"document": {
"id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac",
"position": 1,
"data_source_type": "upload_file",
"data_source_info": {
"upload_file_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
},
"data_source_detail_dict": {
"upload_file": {
"id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
"name": "guide.txt",
"size": 2048,
"extension": "txt",
"mime_type": "text/plain",
"created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"created_at": 1741267200
}
},
"dataset_process_rule_id": "e1f2a3b4-c5d6-7890-ef12-345678901234",
"name": "guide.txt",
"created_from": "api",
"created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"created_at": 1741267200,
"tokens": 0,
"indexing_status": "indexing",
"error": null,
"enabled": true,
"disabled_at": null,
"disabled_by": null,
"archived": false,
"display_status": "indexing",
"word_count": 0,
"hit_count": 0,
"doc_form": "text_model",
"doc_metadata": [],
"summary_index_status": null,
"need_summary": false
},
"batch": "20250306150245647595"
}
}
}
}
}
},
"400": {
"description": "- `provider_not_initialize` : No valid model provider credentials found. Please go to Settings -> Model Provider to complete your provider credentials.\n- `invalid_param` : Knowledge base does not exist. / indexing_technique is required.",
"content": {
"application/json": {
"examples": {
"provider_not_initialize": {
"summary": "provider_not_initialize",
"value": {
"status": 400,
"code": "provider_not_initialize",
"message": "No valid model provider credentials found. Please go to Settings -> Model Provider to complete your provider credentials."
}
},
"invalid_param_dataset": {
"summary": "invalid_param",
"value": {
"status": 400,
"code": "invalid_param",
"message": "Dataset does not exist."
}
},
"invalid_param_indexing": {
"summary": "invalid_param",
"value": {
"status": 400,
"code": "invalid_param",
"message": "indexing_technique is required."
}
}
}
}
}
}
}
}
},
"/datasets/{dataset_id}/document/create-by-file": {
"post": {
"tags": [
"Documents"
],
"summary": "Create Document by File",
"description": "Create a document by uploading a file. Supports common document formats (PDF, TXT, DOCX, etc.). Processing is asynchronous — use the returned `batch` ID with [Get Document Indexing Status](/api-reference/documents/get-document-indexing-status) to track progress.",
"operationId": "createDocumentFromFile",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
}
],
"requestBody": {
"required": true,
"content": {
"multipart/form-data": {
"schema": {
"type": "object",
"required": [
"file"
],
"properties": {
"file": {
"type": "string",
"format": "binary",
"description": "File to upload."
},
"data": {
"type": "string",
"description": "JSON string containing configuration. Accepts the same fields as [Create Document by Text](/api-reference/documents/create-document-by-text) (`indexing_technique`, `doc_form`, `doc_language`, `process_rule`, `retrieval_model`, `embedding_model`, `embedding_model_provider`) except `name` and `text`.",
"example": "{\"indexing_technique\":\"high_quality\",\"doc_form\":\"text_model\",\"doc_language\":\"English\",\"process_rule\":{\"mode\":\"automatic\"}}"
}
}
}
}
}
},
"responses": {
"200": {
"description": "Document created successfully.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"document": {
"$ref": "#/components/schemas/Document"
},
"batch": {
"type": "string",
"description": "Batch ID for tracking indexing progress."
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"document": {
"id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac",
"position": 1,
"data_source_type": "upload_file",
"data_source_info": {
"upload_file_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
},
"data_source_detail_dict": {
"upload_file": {
"id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
"name": "guide.txt",
"size": 2048,
"extension": "txt",
"mime_type": "text/plain",
"created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"created_at": 1741267200
}
},
"dataset_process_rule_id": "e1f2a3b4-c5d6-7890-ef12-345678901234",
"name": "guide.txt",
"created_from": "api",
"created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"created_at": 1741267200,
"tokens": 0,
"indexing_status": "indexing",
"error": null,
"enabled": true,
"disabled_at": null,
"disabled_by": null,
"archived": false,
"display_status": "indexing",
"word_count": 0,
"hit_count": 0,
"doc_form": "text_model",
"doc_metadata": [],
"summary_index_status": null,
"need_summary": false
},
"batch": "20250306150245647595"
}
}
}
}
}
},
"400": {
"description": "- `no_file_uploaded` : Please upload your file.\n- `too_many_files` : Only one file is allowed.\n- `filename_not_exists_error` : The specified filename does not exist.\n- `provider_not_initialize` : No valid model provider credentials found. Please go to Settings -> Model Provider to complete your provider credentials.\n- `invalid_param` : Knowledge base does not exist, external datasets not supported, file too large, unsupported file type, or missing required fields.",
"content": {
"application/json": {
"examples": {
"no_file_uploaded": {
"summary": "no_file_uploaded",
"value": {
"status": 400,
"code": "no_file_uploaded",
"message": "Please upload your file."
}
},
"too_many_files": {
"summary": "too_many_files",
"value": {
"status": 400,
"code": "too_many_files",
"message": "Only one file is allowed."
}
},
"filename_not_exists_error": {
"summary": "filename_not_exists_error",
"value": {
"status": 400,
"code": "filename_not_exists_error",
"message": "The specified filename does not exist."
}
},
"provider_not_initialize": {
"summary": "provider_not_initialize",
"value": {
"status": 400,
"code": "provider_not_initialize",
"message": "No valid model provider credentials found. Please go to Settings -> Model Provider to complete your provider credentials."
}
},
"invalid_param_dataset": {
"summary": "invalid_param (dataset)",
"value": {
"status": 400,
"code": "invalid_param",
"message": "Dataset does not exist."
}
},
"invalid_param_external": {
"summary": "invalid_param (external)",
"value": {
"status": 400,
"code": "invalid_param",
"message": "External datasets are not supported."
}
},
"invalid_param_file_too_large": {
"summary": "invalid_param (file_too_large)",
"value": {
"status": 400,
"code": "invalid_param",
"message": "File size exceeded."
}
},
"invalid_param_unsupported_file_type": {
"summary": "invalid_param (unsupported_file_type)",
"value": {
"status": 400,
"code": "invalid_param",
"message": "File type not allowed."
}
},
"invalid_param_indexing": {
"summary": "invalid_param (indexing_technique)",
"value": {
"status": 400,
"code": "invalid_param",
"message": "indexing_technique is required."
}
},
"invalid_param_process_rule": {
"summary": "invalid_param (process_rule)",
"value": {
"status": 400,
"code": "invalid_param",
"message": "process_rule is required."
}
}
}
}
}
}
}
}
},
"/datasets/{dataset_id}/documents": {
"get": {
"tags": [
"Documents"
],
"summary": "List Documents",
"description": "Returns a paginated list of documents in the knowledge base. Supports filtering by keyword and indexing status.",
"operationId": "listDocuments",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
},
{
"name": "page",
"in": "query",
"schema": {
"type": "integer",
"default": 1
},
"description": "Page number to retrieve."
},
{
"name": "limit",
"in": "query",
"schema": {
"type": "integer",
"default": 20
},
"description": "Number of items per page. Server caps at `100`."
},
{
"name": "keyword",
"in": "query",
"schema": {
"type": "string"
},
"description": "Search keyword to filter by document name."
},
{
"name": "status",
"in": "query",
"schema": {
"type": "string",
"enum": [
"queuing",
"indexing",
"paused",
"error",
"available",
"disabled",
"archived"
]
},
"description": "Filter by display status."
}
],
"responses": {
"200": {
"description": "List of documents.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"data": {
"type": "array",
"description": "Array of knowledge base objects.",
"items": {
"$ref": "#/components/schemas/Document"
}
},
"has_more": {
"type": "boolean",
"description": "Whether more items exist on the next page."
},
"limit": {
"type": "integer",
"description": "Number of items per page."
},
"total": {
"type": "integer",
"description": "Total number of matching items."
},
"page": {
"type": "integer",
"description": "Current page number."
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"data": [
{
"id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac",
"position": 1,
"data_source_type": "upload_file",
"data_source_info": {
"upload_file_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
},
"data_source_detail_dict": {
"upload_file": {
"id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
"name": "guide.txt",
"size": 2048,
"extension": "txt",
"mime_type": "text/plain",
"created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"created_at": 1741267200
}
},
"dataset_process_rule_id": "e1f2a3b4-c5d6-7890-ef12-345678901234",
"name": "guide.txt",
"created_from": "api",
"created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"created_at": 1741267200,
"tokens": 512,
"indexing_status": "completed",
"error": null,
"enabled": true,
"disabled_at": null,
"disabled_by": null,
"archived": false,
"display_status": "available",
"word_count": 350,
"hit_count": 0,
"doc_form": "text_model",
"doc_metadata": [],
"summary_index_status": null,
"need_summary": false
}
],
"has_more": false,
"limit": 20,
"total": 1,
"page": 1
}
}
}
}
}
},
"404": {
"description": "`not_found` : Knowledge base not found.",
"content": {
"application/json": {
"examples": {
"not_found": {
"summary": "not_found",
"value": {
"status": 404,
"code": "not_found",
"message": "Dataset not found."
}
}
}
}
}
}
}
}
},
"/datasets/{dataset_id}/documents/{document_id}": {
"get": {
"tags": [
"Documents"
],
"summary": "Get Document",
"description": "Retrieve detailed information about a specific document, including its indexing status, metadata, and processing statistics.",
"operationId": "getDocumentDetail",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
},
{
"name": "document_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Document ID."
},
{
"name": "metadata",
"in": "query",
"schema": {
"type": "string",
"enum": [
"all",
"only",
"without"
],
"default": "all"
},
"description": "`all` returns all fields including metadata. `only` returns only `id`, `doc_type`, and `doc_metadata`. `without` returns all fields except `doc_metadata`."
}
],
"responses": {
"200": {
"description": "Document details. The response shape varies based on the `metadata` query parameter. When `metadata` is `only`, only `id`, `doc_type`, and `doc_metadata` are returned. When `metadata` is `without`, `doc_type` and `doc_metadata` are omitted.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Document identifier."
},
"position": {
"type": "integer",
"description": "Position index within the knowledge base."
},
"data_source_type": {
"type": "string",
"description": "How the document was uploaded. `upload_file` for file uploads, `notion_import` for Notion imports."
},
"data_source_info": {
"type": "object",
"description": "Raw data source information."
},
"dataset_process_rule_id": {
"type": "string",
"description": "ID of the processing rule applied to this document."
},
"dataset_process_rule": {
"type": "object",
"description": "Knowledge-base-level processing rule configuration."
},
"document_process_rule": {
"type": "object",
"description": "Document-level processing rule configuration."
},
"name": {
"type": "string",
"description": "Document name."
},
"created_from": {
"type": "string",
"description": "Origin of the document. `api` for API creation, `web` for UI creation."
},
"created_by": {
"type": "string",
"description": "ID of the user who created the document."
},
"created_at": {
"type": "number",
"description": "Unix timestamp of document creation."
},
"tokens": {
"type": "integer",
"description": "Number of tokens in the document."
},
"indexing_status": {
"type": "string",
"description": "Current indexing status, e.g. `waiting`, `parsing`, `cleaning`, `splitting`, `indexing`, `completed`, `error`, `paused`."
},
"error": {
"type": "string",
"nullable": true,
"description": "Error message if indexing failed, `null` otherwise."
},
"enabled": {
"type": "boolean",
"description": "Whether the document is enabled for retrieval."
},
"disabled_at": {
"type": "number",
"nullable": true,
"description": "Unix timestamp when the document was disabled, `null` if enabled."
},
"disabled_by": {
"type": "string",
"nullable": true,
"description": "ID of the user who disabled the document, `null` if enabled."
},
"archived": {
"type": "boolean",
"description": "Whether the document is archived."
},
"display_status": {
"type": "string",
"description": "Display-friendly indexing status for the UI."
},
"word_count": {
"type": "integer",
"description": "Total word count of the document."
},
"hit_count": {
"type": "integer",
"description": "Number of times this document has been retrieved."
},
"doc_form": {
"type": "string",
"description": "Document chunking mode. `text_model` for standard text, `hierarchical_model` for parent-child, `qa_model` for QA pairs."
},
"doc_language": {
"type": "string",
"description": "Language of the document content."
},
"doc_type": {
"type": "string",
"nullable": true,
"description": "Document type classification, `null` if not set."
},
"doc_metadata": {
"type": "array",
"description": "Custom metadata key-value pairs for this document.",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Metadata field identifier."
},
"name": {
"type": "string",
"description": "Metadata field name."
},
"type": {
"type": "string",
"description": "Metadata field type."
},
"value": {
"type": "string",
"description": "Metadata field value for this document."
}
}
}
},
"completed_at": {
"type": "number",
"nullable": true,
"description": "Unix timestamp when processing completed, `null` if not yet completed."
},
"updated_at": {
"type": "number",
"nullable": true,
"description": "Unix timestamp of last update, `null` if never updated."
},
"indexing_latency": {
"type": "number",
"nullable": true,
"description": "Time taken for indexing in seconds, `null` if not completed."
},
"segment_count": {
"type": "integer",
"description": "Number of chunks in the document."
},
"average_segment_length": {
"type": "number",
"description": "Average character length of chunks."
},
"summary_index_status": {
"type": "string",
"nullable": true,
"description": "Status of summary indexing, `null` if summary index is not enabled."
},
"need_summary": {
"type": "boolean",
"description": "Whether the document needs summary generation."
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac",
"position": 1,
"data_source_type": "upload_file",
"data_source_info": {
"upload_file_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
},
"dataset_process_rule_id": "e1f2a3b4-c5d6-7890-ef12-345678901234",
"dataset_process_rule": {
"id": "e1f2a3b4-c5d6-7890-ef12-345678901234",
"mode": "custom"
},
"document_process_rule": {
"mode": "custom",
"rules": {
"pre_processing_rules": [],
"segmentation": {
"separator": "###",
"max_tokens": 500,
"chunk_overlap": 50
}
}
},
"name": "guide.txt",
"created_from": "api",
"created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"created_at": 1741267200,
"tokens": 512,
"indexing_status": "completed",
"error": null,
"enabled": true,
"disabled_at": null,
"disabled_by": null,
"archived": false,
"display_status": "available",
"word_count": 350,
"hit_count": 0,
"doc_form": "text_model",
"doc_language": "English",
"doc_type": null,
"doc_metadata": [],
"completed_at": 1741267260,
"updated_at": 1741267260,
"indexing_latency": 60.0,
"segment_count": 5,
"average_segment_length": 70.0,
"summary_index_status": null,
"need_summary": false
}
}
}
}
}
},
"400": {
"description": "`invalid_metadata` : Invalid metadata value for the specified key.",
"content": {
"application/json": {
"examples": {
"invalid_metadata": {
"summary": "invalid_metadata",
"value": {
"status": 400,
"code": "invalid_metadata",
"message": "Invalid metadata value: {metadata_key}"
}
}
}
}
}
},
"403": {
"description": "`forbidden` : No permission.",
"content": {
"application/json": {
"examples": {
"forbidden": {
"summary": "forbidden",
"value": {
"status": 403,
"code": "forbidden",
"message": "No permission."
}
}
}
}
}
},
"404": {
"description": "`not_found` : Document not found.",
"content": {
"application/json": {
"examples": {
"not_found": {
"summary": "not_found",
"value": {
"status": 404,
"code": "not_found",
"message": "Document not found."
}
}
}
}
}
}
}
},
"delete": {
"tags": [
"Documents"
],
"summary": "Delete Document",
"description": "Permanently delete a document and all its chunks from the knowledge base.",
"operationId": "deleteDocument",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
},
{
"name": "document_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Document ID."
}
],
"responses": {
"204": {
"description": "Success."
},
"400": {
"description": "`document_indexing` : Cannot delete document during indexing.",
"content": {
"application/json": {
"examples": {
"document_indexing": {
"summary": "document_indexing",
"value": {
"status": 400,
"code": "document_indexing",
"message": "Cannot delete document during indexing."
}
}
}
}
}
},
"403": {
"description": "`archived_document_immutable` : The archived document is not editable.",
"content": {
"application/json": {
"examples": {
"archived_document_immutable": {
"summary": "archived_document_immutable",
"value": {
"status": 403,
"code": "archived_document_immutable",
"message": "The archived document is not editable."
}
}
}
}
}
},
"404": {
"description": "`not_found` : Document Not Exists.",
"content": {
"application/json": {
"examples": {
"not_found": {
"summary": "not_found",
"value": {
"status": 404,
"code": "not_found",
"message": "Document Not Exists."
}
}
}
}
}
}
}
}
},
"/datasets/{dataset_id}/documents/{document_id}/download": {
"get": {
"tags": [
"Documents"
],
"summary": "Download Document",
"description": "Get a signed download URL for a document's original uploaded file.",
"operationId": "downloadDocument",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
},
{
"name": "document_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Document ID."
}
],
"responses": {
"200": {
"description": "Download URL generated successfully.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Signed URL to download the original uploaded file."
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"url": "https://storage.example.com/datasets/documents/abc123/original-file.pdf?token=xyz789&expires=1741353600"
}
}
}
}
}
},
"403": {
"description": "`forbidden` : No permission to access this document.",
"content": {
"application/json": {
"examples": {
"forbidden": {
"summary": "forbidden",
"value": {
"status": 403,
"code": "forbidden",
"message": "No permission."
}
}
}
}
}
},
"404": {
"description": "`not_found` : Document not found.",
"content": {
"application/json": {
"examples": {
"not_found": {
"summary": "not_found",
"value": {
"status": 404,
"code": "not_found",
"message": "Document not found."
}
}
}
}
}
}
}
}
},
"/datasets/{dataset_id}/documents/{batch}/indexing-status": {
"get": {
"tags": [
"Documents"
],
"summary": "Get Document Indexing Status",
"description": "Check the indexing progress of documents in a batch. Returns the current processing stage and chunk completion counts for each document. Poll this endpoint until `indexing_status` reaches `completed` or `error`. The status progresses through: `waiting` → `parsing` → `cleaning` → `splitting` → `indexing` → `completed`.",
"operationId": "getDocumentIndexingStatus",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
},
{
"name": "batch",
"in": "path",
"required": true,
"schema": {
"type": "string"
},
"description": "Batch ID returned from document creation."
}
],
"responses": {
"200": {
"description": "Indexing status for documents in the batch.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"data": {
"type": "array",
"description": "List of indexing status entries.",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Document identifier."
},
"indexing_status": {
"type": "string",
"description": "Current indexing status: `waiting`, `parsing`, `cleaning`, `splitting`, `indexing`, `completed`, or `error`."
},
"processing_started_at": {
"type": "number",
"description": "Unix timestamp when processing started."
},
"parsing_completed_at": {
"type": "number",
"description": "Unix timestamp when parsing completed."
},
"cleaning_completed_at": {
"type": "number",
"description": "Unix timestamp when cleaning completed."
},
"splitting_completed_at": {
"type": "number",
"description": "Unix timestamp when splitting completed."
},
"completed_at": {
"type": "number",
"description": "Unix timestamp when indexing completed."
},
"paused_at": {
"type": "number",
"nullable": true,
"description": "Timestamp when indexing was paused. `null` if not paused."
},
"error": {
"type": "string",
"nullable": true,
"description": "Error message if indexing failed. `null` if no error."
},
"stopped_at": {
"type": "number",
"nullable": true,
"description": "Timestamp when indexing was stopped. `null` if not stopped."
},
"completed_segments": {
"type": "integer",
"description": "Number of chunks that have been indexed."
},
"total_segments": {
"type": "integer",
"description": "Total number of chunks to be indexed."
}
}
}
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"data": [
{
"id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac",
"indexing_status": "completed",
"processing_started_at": 1741267200,
"parsing_completed_at": 1741267200,
"cleaning_completed_at": 1741267200,
"splitting_completed_at": 1741267200,
"completed_at": 1741267200,
"paused_at": null,
"error": null,
"stopped_at": null,
"completed_segments": 5,
"total_segments": 5
}
]
}
}
}
}
}
},
"404": {
"description": "`not_found` : Knowledge base not found. / Documents not found.",
"content": {
"application/json": {
"examples": {
"dataset_not_found": {
"summary": "not_found",
"value": {
"status": 404,
"code": "not_found",
"message": "Dataset not found."
}
},
"documents_not_found": {
"summary": "not_found",
"value": {
"status": 404,
"code": "not_found",
"message": "Documents not found."
}
}
}
}
}
}
}
}
},
"/datasets/{dataset_id}/documents/{document_id}/update-by-text": {
"post": {
"tags": [
"Documents"
],
"summary": "Update Document by Text",
"description": "Update an existing document's text content, name, or processing configuration. Re-triggers indexing if content changes — use the returned `batch` ID with [Get Document Indexing Status](/api-reference/documents/get-document-indexing-status) to track progress.",
"operationId": "updateDocumentByText",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
},
{
"name": "document_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Document ID."
}
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "Document name. Required when `text` is provided."
},
"text": {
"type": "string",
"description": "Document text content."
},
"process_rule": {
"type": "object",
"description": "Processing rules for chunking.",
"required": [
"mode"
],
"properties": {
"mode": {
"type": "string",
"enum": [
"automatic",
"custom",
"hierarchical"
],
"description": "Processing mode. `automatic` uses built-in rules, `custom` allows manual configuration, `hierarchical` enables parent-child chunk structure (use with `doc_form: hierarchical_model`)."
},
"rules": {
"type": "object",
"properties": {
"pre_processing_rules": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string",
"enum": [
"remove_stopwords",
"remove_extra_spaces",
"remove_urls_emails"
],
"description": "Rule identifier."
},
"enabled": {
"type": "boolean",
"description": "Whether this preprocessing rule is enabled."
}
}
}
},
"segmentation": {
"type": "object",
"properties": {
"separator": {
"type": "string",
"default": "\n",
"description": "Custom separator for splitting text."
},
"max_tokens": {
"type": "integer",
"description": "Maximum token count per chunk."
},
"chunk_overlap": {
"type": "integer",
"default": 0,
"description": "Token overlap between chunks."
}
}
}
}
}
}
},
"doc_form": {
"type": "string",
"enum": [
"text_model",
"hierarchical_model",
"qa_model"
],
"default": "text_model",
"description": "`text_model` for standard text chunking, `hierarchical_model` for parent-child chunk structure, `qa_model` for question-answer pair extraction."
},
"doc_language": {
"type": "string",
"default": "English",
"description": "Language of the document for processing optimization."
},
"retrieval_model": {
"$ref": "#/components/schemas/RetrievalModel",
"description": "Retrieval model configuration. Controls how chunks are searched and ranked when querying this knowledge base."
}
}
}
}
}
},
"responses": {
"200": {
"description": "Document updated successfully.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"document": {
"$ref": "#/components/schemas/Document"
},
"batch": {
"type": "string",
"description": "Batch ID for tracking indexing progress."
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"document": {
"id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac",
"position": 1,
"data_source_type": "upload_file",
"data_source_info": {
"upload_file_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
},
"data_source_detail_dict": {
"upload_file": {
"id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
"name": "guide.txt",
"size": 2048,
"extension": "txt",
"mime_type": "text/plain",
"created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"created_at": 1741267200
}
},
"dataset_process_rule_id": "e1f2a3b4-c5d6-7890-ef12-345678901234",
"name": "guide.txt",
"created_from": "api",
"created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"created_at": 1741267200,
"tokens": 512,
"indexing_status": "completed",
"error": null,
"enabled": true,
"disabled_at": null,
"disabled_by": null,
"archived": false,
"display_status": "available",
"word_count": 350,
"hit_count": 0,
"doc_form": "text_model",
"doc_metadata": [],
"summary_index_status": null,
"need_summary": false
},
"batch": "20250306150245647595"
}
}
}
}
}
},
"400": {
"description": "- `provider_not_initialize` : No valid model provider credentials found. Please go to Settings -> Model Provider to complete your provider credentials.\n- `invalid_param` : Knowledge base does not exist, or name is required when text is provided.",
"content": {
"application/json": {
"examples": {
"provider_not_initialize": {
"summary": "provider_not_initialize",
"value": {
"status": 400,
"code": "provider_not_initialize",
"message": "No valid model provider credentials found. Please go to Settings -> Model Provider to complete your provider credentials."
}
},
"invalid_param_dataset": {
"summary": "invalid_param",
"value": {
"status": 400,
"code": "invalid_param",
"message": "Dataset does not exist."
}
},
"invalid_param_name_required": {
"summary": "invalid_param (name required)",
"value": {
"status": 400,
"code": "invalid_param",
"message": "name is required when text is provided."
}
}
}
}
}
}
}
}
},
"/datasets/{dataset_id}/documents/{document_id}/update-by-file": {
"post": {
"tags": [
"Documents"
],
"summary": "Update Document by File",
"description": "Update an existing document by uploading a new file. Re-triggers indexing — use the returned `batch` ID with [Get Document Indexing Status](/api-reference/documents/get-document-indexing-status) to track progress.",
"operationId": "updateDocumentByFile",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
},
{
"name": "document_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Document ID."
}
],
"requestBody": {
"required": true,
"content": {
"multipart/form-data": {
"schema": {
"type": "object",
"properties": {
"file": {
"type": "string",
"format": "binary",
"description": "File to upload."
},
"data": {
"type": "string",
"description": "JSON string containing configuration. Accepts the same fields as [Create Document by Text](/api-reference/documents/create-document-by-text) (`indexing_technique`, `doc_form`, `doc_language`, `process_rule`, `retrieval_model`, `embedding_model`, `embedding_model_provider`) except `name` and `text`.",
"example": "{\"indexing_technique\":\"high_quality\",\"doc_form\":\"text_model\",\"doc_language\":\"English\",\"process_rule\":{\"mode\":\"automatic\"}}"
}
}
}
}
}
},
"responses": {
"200": {
"description": "Document updated successfully.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"document": {
"$ref": "#/components/schemas/Document"
},
"batch": {
"type": "string",
"description": "Batch ID for tracking indexing progress."
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"document": {
"id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac",
"position": 1,
"data_source_type": "upload_file",
"data_source_info": {
"upload_file_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
},
"data_source_detail_dict": {
"upload_file": {
"id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
"name": "guide.txt",
"size": 2048,
"extension": "txt",
"mime_type": "text/plain",
"created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"created_at": 1741267200
}
},
"dataset_process_rule_id": "e1f2a3b4-c5d6-7890-ef12-345678901234",
"name": "guide.txt",
"created_from": "api",
"created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"created_at": 1741267200,
"tokens": 512,
"indexing_status": "completed",
"error": null,
"enabled": true,
"disabled_at": null,
"disabled_by": null,
"archived": false,
"display_status": "available",
"word_count": 350,
"hit_count": 0,
"doc_form": "text_model",
"doc_metadata": [],
"summary_index_status": null,
"need_summary": false
},
"batch": "20250306150245647595"
}
}
}
}
}
},
"400": {
"description": "- `too_many_files` : Only one file is allowed.\n- `filename_not_exists_error` : The specified filename does not exist.\n- `provider_not_initialize` : No valid model provider credentials found. Please go to Settings -> Model Provider to complete your provider credentials.\n- `invalid_param` : Knowledge base does not exist, external datasets not supported, file too large, or unsupported file type.",
"content": {
"application/json": {
"examples": {
"too_many_files": {
"summary": "too_many_files",
"value": {
"status": 400,
"code": "too_many_files",
"message": "Only one file is allowed."
}
},
"filename_not_exists_error": {
"summary": "filename_not_exists_error",
"value": {
"status": 400,
"code": "filename_not_exists_error",
"message": "The specified filename does not exist."
}
},
"provider_not_initialize": {
"summary": "provider_not_initialize",
"value": {
"status": 400,
"code": "provider_not_initialize",
"message": "No valid model provider credentials found. Please go to Settings -> Model Provider to complete your provider credentials."
}
},
"invalid_param_dataset": {
"summary": "invalid_param (dataset)",
"value": {
"status": 400,
"code": "invalid_param",
"message": "Dataset does not exist."
}
},
"invalid_param_external": {
"summary": "invalid_param (external)",
"value": {
"status": 400,
"code": "invalid_param",
"message": "External datasets are not supported."
}
},
"invalid_param_file_too_large": {
"summary": "invalid_param (file_too_large)",
"value": {
"status": 400,
"code": "invalid_param",
"message": "File size exceeded."
}
},
"invalid_param_unsupported_file_type": {
"summary": "invalid_param (unsupported_file_type)",
"value": {
"status": 400,
"code": "invalid_param",
"message": "File type not allowed."
}
}
}
}
}
}
}
}
},
"/datasets/{dataset_id}/documents/download-zip": {
"post": {
"tags": [
"Documents"
],
"summary": "Download Documents as ZIP",
"description": "Download multiple uploaded-file documents as a single ZIP archive. Accepts up to `100` document IDs.",
"operationId": "downloadDocumentsZip",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
}
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": [
"document_ids"
],
"properties": {
"document_ids": {
"type": "array",
"minItems": 1,
"maxItems": 100,
"items": {
"type": "string",
"format": "uuid"
},
"description": "Array of document IDs to include in the ZIP archive."
}
}
}
}
}
},
"responses": {
"200": {
"description": "ZIP archive containing the requested documents.",
"content": {
"application/zip": {
"schema": {
"type": "string",
"format": "binary",
"description": "ZIP archive binary stream."
}
}
}
},
"403": {
"description": "`forbidden` : Insufficient permissions.",
"content": {
"application/json": {
"examples": {
"forbidden": {
"summary": "forbidden",
"value": {
"status": 403,
"code": "forbidden",
"message": "Insufficient permissions."
}
}
}
}
}
},
"404": {
"description": "`not_found` : Document or dataset not found.",
"content": {
"application/json": {
"examples": {
"not_found": {
"summary": "not_found",
"value": {
"status": 404,
"code": "not_found",
"message": "Document not found."
}
}
}
}
}
}
}
}
},
"/datasets/{dataset_id}/documents/status/{action}": {
"patch": {
"tags": [
"Documents"
],
"summary": "Update Document Status in Batch",
"description": "Enable, disable, archive, or unarchive multiple documents at once.",
"operationId": "batchUpdateDocumentStatus",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
},
{
"name": "action",
"in": "path",
"required": true,
"schema": {
"type": "string",
"enum": [
"enable",
"disable",
"archive",
"un_archive"
]
},
"description": "`enable` to activate, `disable` to deactivate, `archive` to archive, `un_archive` to restore from archive."
}
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": [
"document_ids"
],
"properties": {
"document_ids": {
"type": "array",
"items": {
"type": "string"
},
"description": "List of document IDs to update."
}
}
}
}
}
},
"responses": {
"200": {
"description": "Documents updated successfully.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"result": {
"type": "string",
"description": "Operation result."
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"result": "success"
}
}
}
}
}
},
"400": {
"description": "`invalid_action` : Invalid action.",
"content": {
"application/json": {
"examples": {
"invalid_action": {
"summary": "invalid_action",
"value": {
"status": 400,
"code": "invalid_action",
"message": "Invalid action."
}
}
}
}
}
},
"403": {
"description": "`forbidden` : Insufficient permissions.",
"content": {
"application/json": {
"examples": {
"forbidden": {
"summary": "forbidden",
"value": {
"status": 403,
"code": "forbidden",
"message": "Insufficient permissions."
}
}
}
}
}
},
"404": {
"description": "`not_found` : Knowledge base not found.",
"content": {
"application/json": {
"examples": {
"not_found": {
"summary": "not_found",
"value": {
"status": 404,
"code": "not_found",
"message": "Dataset not found."
}
}
}
}
}
}
}
}
},
"/datasets/{dataset_id}/documents/{document_id}/segments": {
"post": {
"tags": [
"Chunks"
],
"summary": "Create Chunks",
"description": "Create one or more chunks within a document. Each chunk can include optional keywords and an answer field (for QA-mode documents).",
"operationId": "createSegments",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
},
{
"name": "document_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Document ID."
}
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": [
"segments"
],
"properties": {
"segments": {
"type": "array",
"items": {
"type": "object",
"properties": {
"content": {
"type": "string",
"description": "Chunk text content."
},
"answer": {
"type": "string",
"description": "Answer content for QA mode."
},
"keywords": {
"type": "array",
"items": {
"type": "string"
},
"description": "Keywords for the chunk."
},
"attachment_ids": {
"type": "array",
"items": {
"type": "string"
},
"description": "Attachment file IDs."
}
}
},
"description": "Array of chunk objects to create."
}
}
}
}
}
},
"responses": {
"200": {
"description": "Chunks created successfully.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"data": {
"type": "array",
"description": "List of created chunks.",
"items": {
"$ref": "#/components/schemas/Segment"
}
},
"doc_form": {
"type": "string",
"description": "Document chunking mode used by this document."
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"data": [
{
"id": "f3d1c7be-9f3a-40d8-8eb8-3a1ef9c3f2c1",
"position": 1,
"document_id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac",
"content": "Dify is an open-source LLM app development platform.",
"sign_content": "",
"answer": "",
"word_count": 9,
"tokens": 12,
"keywords": [
"dify",
"platform",
"llm"
],
"index_node_id": "a1b2c3d4-e5f6-7890-abcd-000000000001",
"index_node_hash": "abc123def456",
"hit_count": 0,
"enabled": true,
"disabled_at": null,
"disabled_by": null,
"status": "completed",
"created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"created_at": 1741267200,
"updated_at": 1741267200,
"updated_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"indexing_at": 1741267200,
"completed_at": 1741267200,
"error": null,
"stopped_at": null,
"child_chunks": [],
"attachments": [],
"summary": null
}
],
"doc_form": "text_model"
}
}
}
}
}
},
"404": {
"description": "`not_found` : Document is not completed or is disabled.",
"content": {
"application/json": {
"examples": {
"not_found_not_completed": {
"summary": "not_found (not completed)",
"value": {
"status": 404,
"code": "not_found",
"message": "Document is not completed."
}
},
"not_found_disabled": {
"summary": "not_found (disabled)",
"value": {
"status": 404,
"code": "not_found",
"message": "Document is disabled."
}
}
}
}
}
}
}
},
"get": {
"tags": [
"Chunks"
],
"summary": "List Chunks",
"description": "Returns a paginated list of chunks within a document. Supports filtering by keyword and status.",
"operationId": "listSegments",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
},
{
"name": "document_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Document ID."
},
{
"name": "page",
"in": "query",
"schema": {
"type": "integer",
"default": 1
},
"description": "Page number to retrieve."
},
{
"name": "limit",
"in": "query",
"schema": {
"type": "integer",
"default": 20
},
"description": "Number of items per page. Server caps at `100`."
},
{
"name": "status",
"in": "query",
"schema": {
"type": "array",
"items": {
"type": "string"
}
},
"style": "form",
"explode": true,
"description": "Filter chunks by indexing status, e.g. `completed`, `indexing`, `error`."
},
{
"name": "keyword",
"in": "query",
"schema": {
"type": "string"
},
"description": "Search keyword."
}
],
"responses": {
"200": {
"description": "List of chunks.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"data": {
"type": "array",
"description": "List of chunks.",
"items": {
"$ref": "#/components/schemas/Segment"
}
},
"doc_form": {
"type": "string",
"description": "Document chunking mode used by this document."
},
"total": {
"type": "integer",
"description": "Total number of matching chunks."
},
"has_more": {
"type": "boolean",
"description": "Whether more items exist on the next page."
},
"limit": {
"type": "integer",
"description": "Number of items per page."
},
"page": {
"type": "integer",
"description": "Current page number."
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"data": [
{
"id": "f3d1c7be-9f3a-40d8-8eb8-3a1ef9c3f2c1",
"position": 1,
"document_id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac",
"content": "Dify is an open-source LLM app development platform.",
"sign_content": "",
"answer": "",
"word_count": 9,
"tokens": 12,
"keywords": [
"dify",
"platform",
"llm"
],
"index_node_id": "a1b2c3d4-e5f6-7890-abcd-000000000001",
"index_node_hash": "abc123def456",
"hit_count": 0,
"enabled": true,
"disabled_at": null,
"disabled_by": null,
"status": "completed",
"created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"created_at": 1741267200,
"updated_at": 1741267200,
"updated_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"indexing_at": 1741267200,
"completed_at": 1741267200,
"error": null,
"stopped_at": null,
"child_chunks": [],
"attachments": [],
"summary": null
}
],
"doc_form": "text_model",
"total": 1,
"has_more": false,
"limit": 20,
"page": 1
}
}
}
}
}
}
}
}
},
"/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}": {
"get": {
"tags": [
"Chunks"
],
"summary": "Get Chunk",
"description": "Retrieve detailed information about a specific chunk, including its content, keywords, and indexing status.",
"operationId": "getSegmentDetail",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
},
{
"name": "document_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Document ID."
},
{
"name": "segment_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Chunk ID."
}
],
"responses": {
"200": {
"description": "Chunk details.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"data": {
"$ref": "#/components/schemas/Segment"
},
"doc_form": {
"type": "string",
"description": "Document chunking mode used by this document."
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"data": {
"id": "f3d1c7be-9f3a-40d8-8eb8-3a1ef9c3f2c1",
"position": 1,
"document_id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac",
"content": "Dify is an open-source LLM app development platform.",
"sign_content": "",
"answer": "",
"word_count": 9,
"tokens": 12,
"keywords": [
"dify",
"platform",
"llm"
],
"index_node_id": "a1b2c3d4-e5f6-7890-abcd-000000000001",
"index_node_hash": "abc123def456",
"hit_count": 0,
"enabled": true,
"disabled_at": null,
"disabled_by": null,
"status": "completed",
"created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"created_at": 1741267200,
"updated_at": 1741267200,
"updated_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"indexing_at": 1741267200,
"completed_at": 1741267200,
"error": null,
"stopped_at": null,
"child_chunks": [],
"attachments": [],
"summary": null
},
"doc_form": "text_model"
}
}
}
}
}
}
}
},
"post": {
"tags": [
"Chunks"
],
"summary": "Update Chunk",
"description": "Update a chunk's content, keywords, or answer. Re-triggers indexing for the modified chunk.",
"operationId": "updateSegment",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
},
{
"name": "document_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Document ID."
},
{
"name": "segment_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Chunk ID."
}
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": [
"segment"
],
"properties": {
"segment": {
"type": "object",
"properties": {
"content": {
"type": "string",
"description": "Chunk text content."
},
"answer": {
"type": "string",
"description": "Answer content for QA mode."
},
"keywords": {
"type": "array",
"items": {
"type": "string"
},
"description": "Keywords for the chunk."
},
"enabled": {
"type": "boolean",
"description": "Whether the chunk is enabled."
},
"regenerate_child_chunks": {
"type": "boolean",
"default": false,
"description": "Whether to regenerate child chunks."
},
"attachment_ids": {
"type": "array",
"items": {
"type": "string"
},
"description": "Attachment file IDs."
},
"summary": {
"type": "string",
"nullable": true,
"description": "Summary content for summary index."
}
},
"description": "Chunk data to update."
}
}
}
}
}
},
"responses": {
"200": {
"description": "Chunk updated successfully.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"data": {
"$ref": "#/components/schemas/Segment"
},
"doc_form": {
"type": "string",
"description": "Document chunking mode used by this document."
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"data": {
"id": "f3d1c7be-9f3a-40d8-8eb8-3a1ef9c3f2c1",
"position": 1,
"document_id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac",
"content": "Dify is an open-source LLM app development platform.",
"sign_content": "",
"answer": "",
"word_count": 9,
"tokens": 12,
"keywords": [
"dify",
"platform",
"llm"
],
"index_node_id": "a1b2c3d4-e5f6-7890-abcd-000000000001",
"index_node_hash": "abc123def456",
"hit_count": 0,
"enabled": true,
"disabled_at": null,
"disabled_by": null,
"status": "completed",
"created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"created_at": 1741267200,
"updated_at": 1741267200,
"updated_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"indexing_at": 1741267200,
"completed_at": 1741267200,
"error": null,
"stopped_at": null,
"child_chunks": [],
"attachments": [],
"summary": null
},
"doc_form": "text_model"
}
}
}
}
}
}
}
},
"delete": {
"tags": [
"Chunks"
],
"summary": "Delete Chunk",
"description": "Permanently delete a chunk from the document.",
"operationId": "deleteSegment",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
},
{
"name": "document_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Document ID."
},
{
"name": "segment_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Chunk ID."
}
],
"responses": {
"204": {
"description": "Success."
}
}
}
},
"/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks": {
"post": {
"tags": [
"Chunks"
],
"summary": "Create Child Chunk",
"description": "Create a child chunk under a parent chunk. Only available for documents using the `hierarchical_model` chunking mode.",
"operationId": "createChildChunk",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
},
{
"name": "document_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Document ID."
},
{
"name": "segment_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Chunk ID."
}
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": [
"content"
],
"properties": {
"content": {
"type": "string",
"description": "Child chunk text content."
}
}
}
}
}
},
"responses": {
"200": {
"description": "Child chunk created successfully.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"data": {
"$ref": "#/components/schemas/ChildChunk"
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"data": {
"id": "d7e8f9a0-1b2c-3d4e-5f6a-7b8c9d0e1f2a",
"segment_id": "f3d1c7be-9f3a-40d8-8eb8-3a1ef9c3f2c1",
"content": "Dify is an open-source platform.",
"position": 1,
"word_count": 6,
"type": "automatic",
"created_at": 1741267200,
"updated_at": 1741267200
}
}
}
}
}
}
},
"400": {
"description": "`invalid_param` : Create child chunk index failed.",
"content": {
"application/json": {
"examples": {
"invalid_param": {
"summary": "invalid_param",
"value": {
"status": 400,
"code": "invalid_param",
"message": "Create child chunk index failed: {error details}"
}
}
}
}
}
}
}
},
"get": {
"tags": [
"Chunks"
],
"summary": "List Child Chunks",
"description": "Returns a paginated list of child chunks under a specific parent chunk.",
"operationId": "getChildChunks",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
},
{
"name": "document_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Document ID."
},
{
"name": "segment_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Chunk ID."
},
{
"name": "page",
"in": "query",
"schema": {
"type": "integer",
"default": 1,
"minimum": 1
},
"description": "Page number to retrieve."
},
{
"name": "limit",
"in": "query",
"schema": {
"type": "integer",
"default": 20,
"minimum": 1
},
"description": "Number of items per page. Server caps at `100`."
},
{
"name": "keyword",
"in": "query",
"schema": {
"type": "string"
},
"description": "Search keyword."
}
],
"responses": {
"200": {
"description": "List of child chunks.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"data": {
"type": "array",
"description": "List of child chunks.",
"items": {
"$ref": "#/components/schemas/ChildChunk"
}
},
"total": {
"type": "integer",
"description": "Total number of child chunks."
},
"total_pages": {
"type": "integer",
"description": "Total number of pages."
},
"page": {
"type": "integer",
"description": "Current page number."
},
"limit": {
"type": "integer",
"description": "Number of items per page."
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"data": [
{
"id": "d7e8f9a0-1b2c-3d4e-5f6a-7b8c9d0e1f2a",
"segment_id": "f3d1c7be-9f3a-40d8-8eb8-3a1ef9c3f2c1",
"content": "Dify is an open-source platform.",
"position": 1,
"word_count": 6,
"type": "automatic",
"created_at": 1741267200,
"updated_at": 1741267200
}
],
"total": 1,
"total_pages": 1,
"page": 1,
"limit": 20
}
}
}
}
}
}
}
}
},
"/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}": {
"patch": {
"tags": [
"Chunks"
],
"summary": "Update Child Chunk",
"description": "Update the content of an existing child chunk.",
"operationId": "updateChildChunk",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
},
{
"name": "document_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Document ID."
},
{
"name": "segment_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Chunk ID."
},
{
"name": "child_chunk_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Child chunk ID."
}
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": [
"content"
],
"properties": {
"content": {
"type": "string",
"description": "Child chunk text content."
}
}
}
}
}
},
"responses": {
"200": {
"description": "Child chunk updated successfully.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"data": {
"$ref": "#/components/schemas/ChildChunk"
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"data": {
"id": "d7e8f9a0-1b2c-3d4e-5f6a-7b8c9d0e1f2a",
"segment_id": "f3d1c7be-9f3a-40d8-8eb8-3a1ef9c3f2c1",
"content": "Dify is an open-source platform.",
"position": 1,
"word_count": 6,
"type": "automatic",
"created_at": 1741267200,
"updated_at": 1741267200
}
}
}
}
}
}
},
"400": {
"description": "`invalid_param` : Update child chunk index failed.",
"content": {
"application/json": {
"examples": {
"invalid_param": {
"summary": "invalid_param",
"value": {
"status": 400,
"code": "invalid_param",
"message": "Create child chunk index failed: {error details}"
}
}
}
}
}
}
}
},
"delete": {
"tags": [
"Chunks"
],
"summary": "Delete Child Chunk",
"description": "Permanently delete a child chunk from its parent chunk.",
"operationId": "deleteChildChunk",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
},
{
"name": "document_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Document ID."
},
{
"name": "segment_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Chunk ID."
},
{
"name": "child_chunk_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Child chunk ID."
}
],
"responses": {
"204": {
"description": "Success."
},
"400": {
"description": "`invalid_param` : Delete child chunk index failed.",
"content": {
"application/json": {
"examples": {
"invalid_param": {
"summary": "invalid_param",
"value": {
"status": 400,
"code": "invalid_param",
"message": "Delete child chunk index failed: {error details}"
}
}
}
}
}
}
}
}
},
"/datasets/{dataset_id}/retrieve": {
"post": {
"tags": [
"Knowledge Bases"
],
"summary": "Retrieve Chunks from a Knowledge Base / Test Retrieval",
"description": "Performs a search query against a knowledge base to retrieve the most relevant chunks. This endpoint can be used for both production retrieval and test retrieval.",
"operationId": "retrieveSegments",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
}
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": [
"query"
],
"properties": {
"query": {
"type": "string",
"maxLength": 250,
"description": "Search query text."
},
"retrieval_model": {
"$ref": "#/components/schemas/RetrievalModel",
"description": "Retrieval model configuration. Controls how chunks are searched and ranked when querying this knowledge base."
},
"external_retrieval_model": {
"type": "object",
"description": "Retrieval settings for external knowledge bases.",
"properties": {
"top_k": {
"type": "integer",
"description": "Maximum number of results to return."
},
"score_threshold": {
"type": "number",
"description": "Minimum relevance score threshold for filtering results."
},
"score_threshold_enabled": {
"type": "boolean",
"description": "Whether score threshold filtering is enabled."
}
}
},
"attachment_ids": {
"type": "array",
"items": {
"type": "string"
},
"nullable": true,
"description": "List of attachment IDs to include in the retrieval context."
}
}
}
}
}
},
"responses": {
"200": {
"description": "Retrieval results.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"query": {
"type": "object",
"description": "The original query object.",
"properties": {
"content": {
"type": "string",
"description": "The query text."
}
}
},
"records": {
"type": "array",
"description": "List of matched retrieval records.",
"items": {
"type": "object",
"properties": {
"segment": {
"type": "object",
"description": "Matched chunk from the knowledge base.",
"properties": {
"id": {
"type": "string",
"description": "Unique identifier of the chunk."
},
"position": {
"type": "integer",
"description": "Position of the chunk within the document."
},
"document_id": {
"type": "string",
"description": "ID of the document this chunk belongs to."
},
"content": {
"type": "string",
"description": "Text content of the chunk."
},
"sign_content": {
"type": "string",
"description": "Signed content hash for integrity verification."
},
"answer": {
"type": "string",
"description": "Answer content, used in Q&A mode documents."
},
"word_count": {
"type": "integer",
"description": "Word count of the chunk content."
},
"tokens": {
"type": "integer",
"description": "Token count of the chunk content."
},
"keywords": {
"type": "array",
"description": "Keywords associated with this chunk for keyword-based retrieval.",
"items": {
"type": "string"
}
},
"index_node_id": {
"type": "string",
"description": "ID of the index node in the vector store."
},
"index_node_hash": {
"type": "string",
"description": "Hash of the indexed content, used to detect changes."
},
"hit_count": {
"type": "integer",
"description": "Number of times this chunk has been matched in retrieval queries."
},
"enabled": {
"type": "boolean",
"description": "Whether the chunk is enabled for retrieval."
},
"disabled_at": {
"type": "number",
"nullable": true,
"description": "Timestamp when the chunk was disabled. `null` if enabled."
},
"disabled_by": {
"type": "string",
"nullable": true,
"description": "ID of the user who disabled the chunk. `null` if enabled."
},
"status": {
"type": "string",
"description": "Indexing status of the chunk."
},
"created_by": {
"type": "string",
"description": "ID of the user who created the chunk."
},
"created_at": {
"type": "number",
"description": "Creation timestamp (Unix epoch in seconds)."
},
"indexing_at": {
"type": "number",
"nullable": true,
"description": "Timestamp when indexing started. `null` if not yet started."
},
"completed_at": {
"type": "number",
"nullable": true,
"description": "Timestamp when indexing completed. `null` if not yet completed."
},
"error": {
"type": "string",
"nullable": true,
"description": "Error message if indexing failed. `null` when no error."
},
"stopped_at": {
"type": "number",
"nullable": true,
"description": "Timestamp when indexing was stopped. `null` if not stopped."
},
"document": {
"type": "object",
"description": "Parent document information for the matched chunk.",
"properties": {
"id": {
"type": "string",
"description": "Unique identifier of the document."
},
"data_source_type": {
"type": "string",
"description": "How the document was created."
},
"name": {
"type": "string",
"description": "Document name."
},
"doc_type": {
"type": "string",
"nullable": true,
"description": "Document type classification. `null` if not set."
},
"doc_metadata": {
"type": "object",
"nullable": true,
"description": "Metadata values for the document. `null` if no metadata is configured."
}
}
}
}
},
"child_chunks": {
"type": "array",
"description": "Matched child chunks within the chunk, if using hierarchical indexing.",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Unique identifier of the child chunk."
},
"content": {
"type": "string",
"description": "Text content of the child chunk."
},
"position": {
"type": "integer",
"description": "Position of the child chunk within the parent chunk."
},
"score": {
"type": "number",
"description": "Relevance score of the child chunk."
}
}
}
},
"score": {
"type": "number",
"description": "Relevance score."
},
"tsne_position": {
"type": "object",
"nullable": true,
"description": "t-SNE visualization position."
},
"files": {
"type": "array",
"description": "Files attached to this chunk.",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Attachment file identifier."
},
"name": {
"type": "string",
"description": "Original file name."
},
"size": {
"type": "integer",
"description": "File size in bytes."
},
"extension": {
"type": "string",
"description": "File extension."
},
"mime_type": {
"type": "string",
"description": "MIME type of the file."
},
"source_url": {
"type": "string",
"description": "URL to access the attachment."
}
}
}
},
"summary": {
"type": "string",
"nullable": true,
"description": "Summary content if retrieved via summary index."
}
}
}
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"query": {
"content": "What is Dify?"
},
"records": [
{
"segment": {
"id": "f3d1c7be-9f3a-40d8-8eb8-3a1ef9c3f2c1",
"position": 1,
"document_id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac",
"content": "Dify is an open-source LLM app development platform.",
"sign_content": "",
"answer": "",
"word_count": 9,
"tokens": 12,
"keywords": [
"dify",
"platform",
"llm"
],
"index_node_id": "a1b2c3d4-e5f6-7890-abcd-000000000001",
"index_node_hash": "abc123def456",
"hit_count": 1,
"enabled": true,
"disabled_at": null,
"disabled_by": null,
"status": "completed",
"created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"created_at": 1741267200,
"indexing_at": 1741267200,
"completed_at": 1741267200,
"error": null,
"stopped_at": null,
"document": {
"id": "a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac",
"data_source_type": "upload_file",
"name": "guide.txt",
"doc_type": null,
"doc_metadata": null
}
},
"child_chunks": [],
"score": 0.92,
"tsne_position": null,
"files": [],
"summary": null
}
]
}
}
}
}
}
},
"400": {
"description": "- `dataset_not_initialized` : The dataset is still being initialized or indexing. Please wait a moment.\n- `provider_not_initialize` : No valid model provider credentials found. Please go to Settings -> Model Provider to complete your provider credentials.\n- `provider_quota_exceeded` : Your quota for Dify Hosted OpenAI has been exhausted. Please go to Settings -> Model Provider to complete your own provider credentials.\n- `model_currently_not_support` : Dify Hosted OpenAI trial currently not support the GPT-4 model.\n- `completion_request_error` : Completion request failed.\n- `invalid_param` : Invalid parameter value.",
"content": {
"application/json": {
"examples": {
"dataset_not_initialized": {
"summary": "dataset_not_initialized",
"value": {
"status": 400,
"code": "dataset_not_initialized",
"message": "The dataset is still being initialized or indexing. Please wait a moment."
}
},
"provider_not_initialize": {
"summary": "provider_not_initialize",
"value": {
"status": 400,
"code": "provider_not_initialize",
"message": "No valid model provider credentials found. Please go to Settings -> Model Provider to complete your provider credentials."
}
},
"provider_quota_exceeded": {
"summary": "provider_quota_exceeded",
"value": {
"status": 400,
"code": "provider_quota_exceeded",
"message": "Your quota for Dify Hosted OpenAI has been exhausted. Please go to Settings -> Model Provider to complete your own provider credentials."
}
},
"model_currently_not_support": {
"summary": "model_currently_not_support",
"value": {
"status": 400,
"code": "model_currently_not_support",
"message": "Dify Hosted OpenAI trial currently not support the GPT-4 model."
}
},
"completion_request_error": {
"summary": "completion_request_error",
"value": {
"status": 400,
"code": "completion_request_error",
"message": "Completion request failed."
}
},
"invalid_param": {
"summary": "invalid_param",
"value": {
"status": 400,
"code": "invalid_param",
"message": "Invalid parameter value."
}
}
}
}
}
},
"403": {
"description": "`forbidden` : Insufficient permissions.",
"content": {
"application/json": {
"examples": {
"forbidden": {
"summary": "forbidden",
"value": {
"status": 403,
"code": "forbidden",
"message": "Insufficient permissions."
}
}
}
}
}
},
"404": {
"description": "`not_found` : Knowledge base not found.",
"content": {
"application/json": {
"examples": {
"not_found": {
"summary": "not_found",
"value": {
"status": 404,
"code": "not_found",
"message": "Dataset not found."
}
}
}
}
}
},
"500": {
"description": "`internal_server_error` : An internal error occurred during retrieval.",
"content": {
"application/json": {
"examples": {
"internal_server_error": {
"summary": "internal_server_error",
"value": {
"status": 500,
"code": "internal_server_error",
"message": "An internal error occurred."
}
}
}
}
}
}
}
}
},
"/datasets/tags": {
"post": {
"tags": [
"Tags"
],
"summary": "Create Knowledge Tag",
"description": "Create a new tag for organizing knowledge bases.",
"operationId": "createKnowledgeTag",
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": [
"name"
],
"properties": {
"name": {
"type": "string",
"minLength": 1,
"maxLength": 50,
"description": "Tag name."
}
}
}
}
}
},
"responses": {
"200": {
"description": "Tag created successfully.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Tag identifier."
},
"name": {
"type": "string",
"description": "Tag display name."
},
"type": {
"type": "string",
"description": "Tag type. Always `knowledge` for knowledge base tags."
},
"binding_count": {
"type": "string",
"nullable": true,
"description": "Number of knowledge bases bound to this tag."
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"id": "f4b5c6d7-e8f9-0a1b-2c3d-4e5f6a7b8c9d",
"name": "Product Docs",
"type": "knowledge",
"binding_count": "0"
}
}
}
}
}
}
}
},
"get": {
"tags": [
"Tags"
],
"summary": "List Knowledge Tags",
"description": "Returns the list of all knowledge base tags in the workspace.",
"operationId": "getKnowledgeTags",
"responses": {
"200": {
"description": "List of tags.",
"content": {
"application/json": {
"schema": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Tag identifier."
},
"name": {
"type": "string",
"description": "Tag display name."
},
"type": {
"type": "string",
"description": "Tag type. Always `knowledge` for knowledge base tags."
},
"binding_count": {
"type": "string",
"nullable": true,
"description": "Number of knowledge bases bound to this tag."
}
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": [
{
"id": "f4b5c6d7-e8f9-0a1b-2c3d-4e5f6a7b8c9d",
"name": "Product Docs",
"type": "knowledge",
"binding_count": "0"
}
]
}
}
}
}
}
}
},
"patch": {
"tags": [
"Tags"
],
"summary": "Update Knowledge Tag",
"description": "Rename an existing knowledge base tag.",
"operationId": "updateKnowledgeTag",
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": [
"tag_id",
"name"
],
"properties": {
"tag_id": {
"type": "string",
"description": "Tag ID to update."
},
"name": {
"type": "string",
"minLength": 1,
"maxLength": 50,
"description": "New tag name."
}
}
}
}
}
},
"responses": {
"200": {
"description": "Tag updated successfully.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Tag identifier."
},
"name": {
"type": "string",
"description": "Tag display name."
},
"type": {
"type": "string",
"description": "Tag type. Always `knowledge` for knowledge base tags."
},
"binding_count": {
"type": "string",
"nullable": true,
"description": "Number of knowledge bases bound to this tag."
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"id": "f4b5c6d7-e8f9-0a1b-2c3d-4e5f6a7b8c9d",
"name": "Product Docs",
"type": "knowledge",
"binding_count": "0"
}
}
}
}
}
}
}
},
"delete": {
"tags": [
"Tags"
],
"summary": "Delete Knowledge Tag",
"description": "Permanently delete a knowledge base tag. Does not delete the knowledge bases that were tagged.",
"operationId": "deleteKnowledgeTag",
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": [
"tag_id"
],
"properties": {
"tag_id": {
"type": "string",
"description": "Tag ID to delete."
}
}
}
}
}
},
"responses": {
"204": {
"description": "Success."
}
}
}
},
"/datasets/tags/binding": {
"post": {
"tags": [
"Tags"
],
"summary": "Create Tag Binding",
"description": "Bind one or more tags to a knowledge base. A knowledge base can have multiple tags.",
"operationId": "bindTagsToDataset",
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": [
"tag_ids",
"target_id"
],
"properties": {
"tag_ids": {
"type": "array",
"items": {
"type": "string"
},
"minItems": 1,
"description": "Tag IDs to bind."
},
"target_id": {
"type": "string",
"description": "Knowledge base ID."
}
}
}
}
}
},
"responses": {
"204": {
"description": "Success."
}
}
}
},
"/datasets/tags/unbinding": {
"post": {
"tags": [
"Tags"
],
"summary": "Delete Tag Binding",
"description": "Remove a tag binding from a knowledge base.",
"operationId": "unbindTagFromDataset",
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": [
"tag_id",
"target_id"
],
"properties": {
"tag_id": {
"type": "string",
"description": "Tag ID to unbind."
},
"target_id": {
"type": "string",
"description": "Knowledge base ID."
}
}
}
}
}
},
"responses": {
"204": {
"description": "Success."
}
}
}
},
"/datasets/{dataset_id}/tags": {
"get": {
"tags": [
"Tags"
],
"summary": "Get Knowledge Base Tags",
"description": "Returns the list of tags bound to a specific knowledge base.",
"operationId": "queryDatasetTags",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
}
],
"responses": {
"200": {
"description": "Tags bound to the knowledge base.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"data": {
"type": "array",
"description": "List of tags bound to this knowledge base.",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Tag identifier."
},
"name": {
"type": "string",
"description": "Tag display name."
}
}
}
},
"total": {
"type": "integer",
"description": "Total number of tags bound to this knowledge base."
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"data": [
{
"id": "f4b5c6d7-e8f9-0a1b-2c3d-4e5f6a7b8c9d",
"name": "Product Docs"
}
],
"total": 1
}
}
}
}
}
}
}
}
},
"/datasets/{dataset_id}/metadata": {
"post": {
"tags": [
"Metadata"
],
"summary": "Create Metadata Field",
"description": "Create a custom metadata field for the knowledge base. Metadata fields can be used to annotate documents with structured information.",
"operationId": "createMetadataField",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
}
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": [
"type",
"name"
],
"properties": {
"type": {
"type": "string",
"enum": [
"string",
"number",
"time"
],
"description": "`string` for text values, `number` for numeric values, `time` for date/time values."
},
"name": {
"type": "string",
"description": "Metadata field name."
}
}
}
}
}
},
"responses": {
"201": {
"description": "Metadata field created successfully.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Metadata field identifier."
},
"name": {
"type": "string",
"description": "Metadata field name."
},
"type": {
"type": "string",
"description": "Metadata field type."
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"id": "b5c6d7e8-f9a0-1b2c-3d4e-5f6a7b8c9d0e",
"name": "author",
"type": "string"
}
}
}
}
}
}
}
},
"get": {
"tags": [
"Metadata"
],
"summary": "List Metadata Fields",
"description": "Returns the list of all metadata fields (both custom and built-in) for the knowledge base, along with the count of documents using each field.",
"operationId": "listMetadataFields",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
}
],
"responses": {
"200": {
"description": "Metadata fields for the knowledge base.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"doc_metadata": {
"type": "array",
"description": "List of metadata field definitions.",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Metadata field identifier."
},
"name": {
"type": "string",
"description": "Metadata field name."
},
"type": {
"type": "string",
"description": "Metadata field type."
},
"count": {
"type": "integer",
"description": "Number of documents using this metadata field."
}
}
}
},
"built_in_field_enabled": {
"type": "boolean",
"description": "Whether built-in metadata fields are enabled for this knowledge base."
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"doc_metadata": [
{
"id": "b5c6d7e8-f9a0-1b2c-3d4e-5f6a7b8c9d0e",
"name": "author",
"type": "string",
"count": 3
}
],
"built_in_field_enabled": true
}
}
}
}
}
}
}
}
},
"/datasets/{dataset_id}/metadata/{metadata_id}": {
"patch": {
"tags": [
"Metadata"
],
"summary": "Update Metadata Field",
"description": "Rename a custom metadata field.",
"operationId": "updateMetadataField",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
},
{
"name": "metadata_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Metadata field ID."
}
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": [
"name"
],
"properties": {
"name": {
"type": "string",
"description": "New metadata field name."
}
}
}
}
}
},
"responses": {
"200": {
"description": "Metadata field updated successfully.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Metadata field identifier."
},
"name": {
"type": "string",
"description": "Metadata field name."
},
"type": {
"type": "string",
"description": "Metadata field type."
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"id": "b5c6d7e8-f9a0-1b2c-3d4e-5f6a7b8c9d0e",
"name": "author",
"type": "string"
}
}
}
}
}
}
}
},
"delete": {
"tags": [
"Metadata"
],
"summary": "Delete Metadata Field",
"description": "Permanently delete a custom metadata field. Documents using this field will lose their metadata values for it.",
"operationId": "deleteMetadataField",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
},
{
"name": "metadata_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Metadata field ID."
}
],
"responses": {
"204": {
"description": "Success."
}
}
}
},
"/datasets/{dataset_id}/metadata/built-in": {
"get": {
"tags": [
"Metadata"
],
"summary": "Get Built-in Metadata Fields",
"description": "Returns the list of built-in metadata fields provided by the system (e.g., document type, source URL).",
"operationId": "getBuiltInMetadataFields",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
}
],
"responses": {
"200": {
"description": "Built-in metadata fields.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"fields": {
"type": "array",
"description": "List of system-provided metadata fields.",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "Built-in field identifier. `document_name` for the document title, `uploader` for the creator, `upload_date` for creation time, `last_update_date` for last modification time, `source` for the document origin."
},
"type": {
"type": "string",
"description": "Field data type. `string` for text values, `time` for date/time values."
}
}
}
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"fields": [
{
"name": "document_name",
"type": "string"
},
{
"name": "uploader",
"type": "string"
},
{
"name": "upload_date",
"type": "time"
},
{
"name": "last_update_date",
"type": "time"
},
{
"name": "source",
"type": "string"
}
]
}
}
}
}
}
}
}
}
},
"/datasets/{dataset_id}/metadata/built-in/{action}": {
"post": {
"tags": [
"Metadata"
],
"summary": "Update Built-in Metadata Field",
"description": "Enable or disable built-in metadata fields for the knowledge base.",
"operationId": "toggleBuiltInMetadataField",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
},
{
"name": "action",
"in": "path",
"required": true,
"schema": {
"type": "string",
"enum": [
"enable",
"disable"
]
},
"description": "`enable` to activate built-in metadata fields, `disable` to deactivate them."
}
],
"responses": {
"200": {
"description": "Built-in metadata field toggled successfully.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"result": {
"type": "string",
"description": "Operation result."
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"result": "success"
}
}
}
}
}
}
}
}
},
"/datasets/{dataset_id}/documents/metadata": {
"post": {
"tags": [
"Metadata"
],
"summary": "Update Document Metadata in Batch",
"description": "Update metadata values for multiple documents at once. Each document in the request receives the specified metadata key-value pairs.",
"operationId": "batchUpdateDocumentMetadata",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
}
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": [
"operation_data"
],
"properties": {
"operation_data": {
"type": "array",
"items": {
"type": "object",
"required": [
"document_id",
"metadata_list"
],
"properties": {
"document_id": {
"type": "string",
"description": "ID of the document whose metadata to update."
},
"metadata_list": {
"type": "array",
"items": {
"type": "object",
"required": [
"id",
"name"
],
"properties": {
"id": {
"type": "string",
"description": "Metadata field ID."
},
"name": {
"type": "string",
"description": "Metadata field name."
},
"value": {
"description": "Metadata value. Can be a string, number, or `null`."
}
}
}
},
"partial_update": {
"type": "boolean",
"default": false,
"description": "Whether to partially update metadata, keeping existing values for unspecified fields."
}
}
},
"description": "Array of document metadata update operations. Each entry maps a document ID to its metadata key-value pairs."
}
}
}
}
}
},
"responses": {
"200": {
"description": "Document metadata updated successfully.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"result": {
"type": "string",
"description": "Operation result."
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"result": "success"
}
}
}
}
}
}
}
}
},
"/workspaces/current/models/model-types/{model_type}": {
"get": {
"tags": [
"Models"
],
"summary": "Get Available Models",
"description": "Retrieve the list of available models by type. Primarily used to query `text-embedding` and `rerank` models for knowledge base configuration.",
"operationId": "getAvailableModels",
"parameters": [
{
"name": "model_type",
"in": "path",
"required": true,
"schema": {
"type": "string",
"enum": [
"text-embedding",
"rerank",
"llm",
"tts",
"speech2text",
"moderation"
]
},
"description": "Type of model to retrieve. For knowledge base configuration, use `text-embedding` for embedding models or `rerank` for reranking models."
}
],
"responses": {
"200": {
"description": "Available models for the specified type.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"data": {
"type": "array",
"description": "List of model providers with their available models.",
"items": {
"type": "object",
"properties": {
"provider": {
"type": "string",
"description": "Model provider identifier, e.g. `openai`, `cohere`."
},
"label": {
"type": "object",
"description": "Localized display name of the provider.",
"properties": {
"en_US": {
"type": "string",
"description": "English display name."
},
"zh_Hans": {
"type": "string",
"description": "Chinese display name."
}
}
},
"icon_small": {
"type": "object",
"description": "URL of the provider's small icon.",
"properties": {
"en_US": {
"type": "string",
"description": "Small icon URL."
}
}
},
"icon_large": {
"type": "object",
"description": "URL of the provider's large icon.",
"properties": {
"en_US": {
"type": "string",
"description": "Large icon URL."
}
}
},
"status": {
"type": "string",
"description": "Provider status. `active` when credentials are configured and valid."
},
"models": {
"type": "array",
"description": "List of available models from this provider.",
"items": {
"type": "object",
"properties": {
"model": {
"type": "string",
"description": "Model identifier. Use this as the `embedding_model` value when creating or updating a knowledge base."
},
"label": {
"type": "object",
"description": "Localized display name of the model.",
"properties": {
"en_US": {
"type": "string",
"description": "English model name."
},
"zh_Hans": {
"type": "string",
"description": "Chinese model name."
}
}
},
"model_type": {
"type": "string",
"description": "Type of the model, matching the `model_type` path parameter."
},
"features": {
"type": "array",
"nullable": true,
"description": "Supported features of the model, `null` if none.",
"items": {
"type": "string"
}
},
"fetch_from": {
"type": "string",
"description": "Where the model definition comes from. `predefined-model` for built-in models, `customizable-model` for user-configured models."
},
"model_properties": {
"type": "object",
"description": "Model-specific properties such as `context_size`."
},
"status": {
"type": "string",
"description": "Model availability status. `active` when ready to use."
}
}
}
}
}
}
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"data": [
{
"provider": "openai",
"label": {
"en_US": "OpenAI",
"zh_Hans": "OpenAI"
},
"icon_small": {
"en_US": "https://example.com/openai-small.svg"
},
"icon_large": {
"en_US": "https://example.com/openai-large.svg"
},
"status": "active",
"models": [
{
"model": "text-embedding-3-small",
"label": {
"en_US": "text-embedding-3-small",
"zh_Hans": "text-embedding-3-small"
},
"model_type": "text-embedding",
"features": null,
"fetch_from": "predefined-model",
"model_properties": {
"context_size": 8191
},
"status": "active"
}
]
}
]
}
}
}
}
}
}
}
}
},
"/datasets/pipeline/file-upload": {
"post": {
"tags": [
"Knowledge Pipeline"
],
"summary": "Upload Pipeline File",
"description": "Upload a file for use in a knowledge pipeline. Accepts a single file via `multipart/form-data`.",
"operationId": "uploadPipelineFile",
"requestBody": {
"required": true,
"content": {
"multipart/form-data": {
"schema": {
"type": "object",
"required": [
"file"
],
"properties": {
"file": {
"type": "string",
"format": "binary",
"description": "The file to upload."
}
}
}
}
}
},
"responses": {
"201": {
"description": "File uploaded successfully.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Unique identifier of the uploaded file."
},
"name": {
"type": "string",
"description": "Original file name."
},
"size": {
"type": "integer",
"description": "File size in bytes."
},
"extension": {
"type": "string",
"description": "File extension."
},
"mime_type": {
"type": "string",
"description": "MIME type of the file."
},
"created_by": {
"type": "string",
"description": "ID of the user who uploaded the file."
},
"created_at": {
"type": "string",
"description": "Upload timestamp in ISO 8601 format."
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": {
"id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
"name": "report.pdf",
"size": 524288,
"extension": "pdf",
"mime_type": "application/pdf",
"created_by": "ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4",
"created_at": "2025-03-06T12:00:00"
}
}
}
}
}
},
"400": {
"description": "- `no_file_uploaded` : Please upload your file.\n- `filename_not_exists_error` : The specified filename does not exist.\n- `too_many_files` : Only one file is allowed.",
"content": {
"application/json": {
"examples": {
"no_file_uploaded": {
"summary": "no_file_uploaded",
"value": {
"status": 400,
"code": "no_file_uploaded",
"message": "Please upload your file."
}
},
"filename_not_exists_error": {
"summary": "filename_not_exists_error",
"value": {
"status": 400,
"code": "filename_not_exists_error",
"message": "The specified filename does not exist."
}
},
"too_many_files": {
"summary": "too_many_files",
"value": {
"status": 400,
"code": "too_many_files",
"message": "Only one file is allowed."
}
}
}
}
}
},
"413": {
"description": "`file_too_large` : File size exceeded.",
"content": {
"application/json": {
"examples": {
"file_too_large": {
"summary": "file_too_large",
"value": {
"status": 413,
"code": "file_too_large",
"message": "File size exceeded."
}
}
}
}
}
},
"415": {
"description": "`unsupported_file_type` : File type not allowed.",
"content": {
"application/json": {
"examples": {
"unsupported_file_type": {
"summary": "unsupported_file_type",
"value": {
"status": 415,
"code": "unsupported_file_type",
"message": "File type not allowed."
}
}
}
}
}
}
}
}
},
"/datasets/{dataset_id}/pipeline/datasource-plugins": {
"get": {
"tags": [
"Knowledge Pipeline"
],
"summary": "List Datasource Plugins",
"description": "List all datasource plugins available for a knowledge pipeline. Returns published or draft plugins depending on the `is_published` query parameter.",
"operationId": "listDatasourcePlugins",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
},
{
"name": "is_published",
"in": "query",
"schema": {
"type": "boolean",
"default": true
},
"description": "Whether to retrieve published or draft datasource plugins. `true` returns published plugins, `false` returns draft plugins."
}
],
"responses": {
"200": {
"description": "List of datasource plugins.",
"content": {
"application/json": {
"schema": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Unique identifier of the datasource plugin."
},
"name": {
"type": "string",
"description": "Display name of the datasource plugin."
},
"type": {
"type": "string",
"description": "Type of the datasource plugin."
}
}
}
},
"examples": {
"success": {
"summary": "Response Example",
"value": [
{
"id": "b1c2d3e4-f5a6-7890-abcd-ef1234567890",
"name": "Web Scraper",
"type": "online_document"
}
]
}
}
}
}
},
"404": {
"description": "`not_found` : Dataset not found.",
"content": {
"application/json": {
"examples": {
"not_found": {
"summary": "not_found",
"value": {
"status": 404,
"code": "not_found",
"message": "Dataset not found."
}
}
}
}
}
}
}
}
},
"/datasets/{dataset_id}/pipeline/datasource/nodes/{node_id}/run": {
"post": {
"tags": [
"Knowledge Pipeline"
],
"summary": "Run Datasource Node",
"description": "Execute a single datasource node within the knowledge pipeline. Returns a streaming response with the node execution results.",
"operationId": "runDatasourceNode",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
},
{
"name": "node_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
},
"description": "ID of the datasource node to execute."
}
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": [
"inputs",
"datasource_type",
"is_published"
],
"properties": {
"inputs": {
"type": "object",
"additionalProperties": true,
"description": "Input variables for the datasource node."
},
"datasource_type": {
"type": "string",
"enum": [
"online_document",
"local_file",
"website_crawl",
"online_drive"
],
"description": "Type of the datasource."
},
"credential_id": {
"type": "string",
"nullable": true,
"description": "ID of the credential to use for authentication with the datasource."
},
"is_published": {
"type": "boolean",
"description": "Whether to run the published or draft version of the node. `true` runs the published version, `false` runs the draft."
}
}
}
}
}
},
"responses": {
"200": {
"description": "Streaming response with node execution events.",
"content": {
"text/event-stream": {
"schema": {
"type": "string",
"description": "Server-Sent Events stream containing node execution progress and results."
}
}
}
},
"404": {
"description": "`not_found` : Dataset not found.",
"content": {
"application/json": {
"examples": {
"not_found": {
"summary": "not_found",
"value": {
"status": 404,
"code": "not_found",
"message": "Dataset not found."
}
}
}
}
}
}
}
}
},
"/datasets/{dataset_id}/pipeline/run": {
"post": {
"tags": [
"Knowledge Pipeline"
],
"summary": "Run Pipeline",
"description": "Execute the full knowledge pipeline for a knowledge base. Supports both streaming and blocking response modes.",
"operationId": "runPipeline",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
},
"description": "Knowledge base ID."
}
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": [
"inputs",
"datasource_type",
"datasource_info_list",
"start_node_id",
"is_published",
"response_mode"
],
"properties": {
"inputs": {
"type": "object",
"additionalProperties": true,
"description": "Input variables for the pipeline."
},
"datasource_type": {
"type": "string",
"enum": [
"online_document",
"local_file",
"website_crawl",
"online_drive"
],
"description": "Type of the datasource."
},
"datasource_info_list": {
"type": "array",
"items": {
"type": "object",
"additionalProperties": true
},
"description": "List of datasource information objects for the pipeline to process."
},
"start_node_id": {
"type": "string",
"description": "ID of the node to start pipeline execution from."
},
"is_published": {
"type": "boolean",
"description": "Whether to run the published or draft version of the pipeline. `true` runs the published version, `false` runs the draft."
},
"response_mode": {
"type": "string",
"enum": [
"streaming",
"blocking"
],
"description": "Response mode for the pipeline execution. `streaming` returns a Server-Sent Events stream, `blocking` waits and returns the complete result."
}
}
}
}
}
},
"responses": {
"200": {
"description": "Pipeline execution result. Format depends on `response_mode`: streaming returns a `text/event-stream`, blocking returns a JSON object.",
"content": {
"text/event-stream": {
"schema": {
"type": "string",
"description": "Server-Sent Events stream containing pipeline execution progress and results. Returned when `response_mode` is `streaming`."
}
},
"application/json": {
"schema": {
"type": "object",
"description": "Complete pipeline execution result. Returned when `response_mode` is `blocking`.",
"additionalProperties": true
},
"examples": {
"success": {
"summary": "Blocking Response Example",
"value": {
"task_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
"workflow_run_id": "f1e2d3c4-b5a6-7890-abcd-ef0987654321",
"data": {
"id": "f1e2d3c4-b5a6-7890-abcd-ef0987654321",
"status": "succeeded",
"outputs": {},
"created_at": 1741267200,
"finished_at": 1741267210
}
}
}
}
}
}
},
"403": {
"description": "`forbidden` : Forbidden.",
"content": {
"application/json": {
"examples": {
"forbidden": {
"summary": "forbidden",
"value": {
"status": 403,
"code": "forbidden",
"message": "Forbidden."
}
}
}
}
}
},
"404": {
"description": "`not_found` : Dataset not found.",
"content": {
"application/json": {
"examples": {
"not_found": {
"summary": "not_found",
"value": {
"status": 404,
"code": "not_found",
"message": "Dataset not found."
}
}
}
}
}
},
"500": {
"description": "`pipeline_run_error` : Pipeline execution failed.",
"content": {
"application/json": {
"examples": {
"pipeline_run_error": {
"summary": "pipeline_run_error",
"value": {
"status": 500,
"code": "pipeline_run_error",
"message": "Pipeline execution failed: connection timeout"
}
}
}
}
}
}
}
}
}
},
"components": {
"schemas": {
"Dataset": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Unique identifier of the knowledge base."
},
"name": {
"type": "string",
"description": "Display name of the knowledge base. Unique within the workspace."
},
"description": {
"type": "string",
"description": "Optional text describing the purpose or contents of the knowledge base."
},
"provider": {
"type": "string",
"description": "Provider type. `vendor` for internally managed, `external` for external knowledge base connections."
},
"permission": {
"type": "string",
"description": "Controls who can access this knowledge base. Possible values: `only_me`, `all_team_members`, `partial_members`."
},
"data_source_type": {
"type": "string",
"description": "Data source type of the documents, `null` if not yet configured."
},
"indexing_technique": {
"type": "string",
"description": "`high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing."
},
"app_count": {
"type": "integer",
"description": "Number of applications currently using this knowledge base."
},
"document_count": {
"type": "integer",
"description": "Total number of documents in the knowledge base."
},
"word_count": {
"type": "integer",
"description": "Total word count across all documents."
},
"created_by": {
"type": "string",
"description": "ID of the user who created the knowledge base."
},
"author_name": {
"type": "string",
"description": "Display name of the creator."
},
"created_at": {
"type": "number",
"description": "Creation timestamp (Unix epoch in seconds)."
},
"updated_by": {
"type": "string",
"description": "ID of the user who last updated the knowledge base."
},
"updated_at": {
"type": "number",
"description": "Last update timestamp (Unix epoch in seconds)."
},
"embedding_model": {
"type": "string",
"description": "Name of the embedding model used for indexing."
},
"embedding_model_provider": {
"type": "string",
"description": "Embedding model provider. Use the `provider` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`."
},
"embedding_available": {
"type": "boolean",
"description": "Whether the configured embedding model is currently available."
},
"retrieval_model_dict": {
"type": "object",
"description": "Retrieval configuration for the knowledge base.",
"properties": {
"search_method": {
"type": "string",
"description": "Search method used for retrieval. `keyword_search` for keyword matching, `semantic_search` for embedding-based similarity, `full_text_search` for full-text indexing, `hybrid_search` for a combination of semantic and keyword approaches."
},
"reranking_enable": {
"type": "boolean",
"description": "Whether reranking is enabled."
},
"reranking_mode": {
"type": "string",
"nullable": true,
"description": "Reranking mode. `reranking_model` for model-based reranking, `weighted_score` for score-based weighting. `null` if reranking is disabled."
},
"reranking_model": {
"type": "object",
"description": "Reranking model configuration.",
"properties": {
"reranking_provider_name": {
"type": "string",
"description": "Provider name of the reranking model."
},
"reranking_model_name": {
"type": "string",
"description": "Name of the reranking model."
}
}
},
"weights": {
"type": "object",
"nullable": true,
"description": "Weight configuration for hybrid search.",
"properties": {
"weight_type": {
"type": "string",
"description": "Strategy for balancing semantic and keyword search weights."
},
"vector_setting": {
"type": "object",
"description": "Semantic search weight settings.",
"properties": {
"vector_weight": {
"type": "number",
"description": "Weight assigned to semantic (vector) search results."
},
"embedding_provider_name": {
"type": "string",
"description": "Provider of the embedding model used for vector search."
},
"embedding_model_name": {
"type": "string",
"description": "Name of the embedding model used for vector search."
}
}
},
"keyword_setting": {
"type": "object",
"description": "Keyword search weight settings.",
"properties": {
"keyword_weight": {
"type": "number",
"description": "Weight assigned to keyword search results."
}
}
}
}
},
"top_k": {
"type": "integer",
"description": "Maximum number of results to return."
},
"score_threshold_enabled": {
"type": "boolean",
"description": "Whether score threshold filtering is enabled."
},
"score_threshold": {
"type": "number",
"description": "Minimum relevance score for results. Only effective when `score_threshold_enabled` is `true`."
}
}
},
"summary_index_setting": {
"type": "object",
"nullable": true,
"description": "Summary index configuration.",
"properties": {
"enable": {
"type": "boolean",
"description": "Whether summary indexing is enabled."
},
"model_name": {
"type": "string",
"description": "Name of the model used for generating summaries."
},
"model_provider_name": {
"type": "string",
"description": "Provider of the summary generation model."
},
"summary_prompt": {
"type": "string",
"description": "Prompt template used for summary generation."
}
}
},
"tags": {
"type": "array",
"description": "Tags associated with this knowledge base.",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Tag identifier."
},
"name": {
"type": "string",
"description": "Tag name."
},
"type": {
"type": "string",
"description": "Tag type. Always `knowledge` for knowledge base tags."
}
}
}
},
"doc_form": {
"type": "string",
"description": "Document chunking mode. `text_model` for standard text chunking, `hierarchical_model` for parent-child structure, `qa_model` for QA pair extraction."
},
"external_knowledge_info": {
"type": "object",
"nullable": true,
"description": "Connection details for external knowledge bases. Present when `provider` is `external`.",
"properties": {
"external_knowledge_id": {
"type": "string",
"description": "ID of the external knowledge base."
},
"external_knowledge_api_id": {
"type": "string",
"description": "ID of the external knowledge API connection."
},
"external_knowledge_api_name": {
"type": "string",
"description": "Display name of the external knowledge API."
},
"external_knowledge_api_endpoint": {
"type": "string",
"description": "Endpoint URL of the external knowledge API."
}
}
},
"external_retrieval_model": {
"type": "object",
"nullable": true,
"description": "Retrieval settings for external knowledge bases. `null` for internal knowledge bases.",
"properties": {
"top_k": {
"type": "integer",
"description": "Maximum number of results to return from the external knowledge base."
},
"score_threshold": {
"type": "number",
"description": "Minimum relevance score threshold."
},
"score_threshold_enabled": {
"type": "boolean",
"description": "Whether score threshold filtering is enabled."
}
}
},
"doc_metadata": {
"type": "array",
"description": "Metadata field definitions for the knowledge base.",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Metadata field identifier."
},
"name": {
"type": "string",
"description": "Metadata field name."
},
"type": {
"type": "string",
"description": "Metadata field value type."
}
}
}
},
"built_in_field_enabled": {
"type": "boolean",
"description": "Whether built-in metadata fields (e.g., `document_name`, `uploader`) are enabled."
},
"pipeline_id": {
"type": "string",
"nullable": true,
"description": "Pipeline ID, if a custom processing pipeline is configured."
},
"runtime_mode": {
"type": "string",
"nullable": true,
"description": "Runtime processing mode."
},
"chunk_structure": {
"type": "string",
"nullable": true,
"description": "Chunk structure configuration."
},
"icon_info": {
"type": "object",
"nullable": true,
"description": "Icon display configuration for the knowledge base.",
"properties": {
"icon_type": {
"type": "string",
"description": "Type of icon."
},
"icon": {
"type": "string",
"description": "Icon identifier or emoji."
},
"icon_background": {
"type": "string",
"description": "Background color for the icon."
},
"icon_url": {
"type": "string",
"description": "URL of a custom icon image."
}
}
},
"is_published": {
"type": "boolean",
"description": "Whether the knowledge base is published."
},
"total_documents": {
"type": "integer",
"description": "Total number of documents."
},
"total_available_documents": {
"type": "integer",
"description": "Number of documents that are enabled and available."
},
"enable_api": {
"type": "boolean",
"description": "Whether API access is enabled for this knowledge base."
},
"is_multimodal": {
"type": "boolean",
"description": "Whether multimodal content processing is enabled."
}
}
},
"Document": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Unique identifier of the document."
},
"position": {
"type": "integer",
"description": "Display position of the document in the list."
},
"data_source_type": {
"type": "string",
"description": "How the document was created. `upload_file` for file uploads, `notion_import` for Notion imports."
},
"data_source_info": {
"type": "object",
"description": "Raw data source information, varies by `data_source_type`."
},
"data_source_detail_dict": {
"type": "object",
"description": "Detailed data source information including file details."
},
"dataset_process_rule_id": {
"type": "string",
"description": "ID of the processing rule applied to this document."
},
"name": {
"type": "string",
"description": "Document name."
},
"created_from": {
"type": "string",
"description": "Origin of the document. `api` for API creation, `web` for UI creation."
},
"created_by": {
"type": "string",
"description": "ID of the user who created the document."
},
"created_at": {
"type": "number",
"description": "Creation timestamp (Unix epoch in seconds)."
},
"tokens": {
"type": "integer",
"description": "Total number of tokens in the document."
},
"indexing_status": {
"type": "string",
"description": "Current indexing status. `waiting` for queued, `parsing` while extracting content, `cleaning` while removing noise, `splitting` while chunking, `indexing` while building vectors, `completed` when ready, `error` if failed, `paused` if manually paused."
},
"error": {
"type": "string",
"nullable": true,
"description": "Error message if indexing failed. `null` when no error."
},
"enabled": {
"type": "boolean",
"description": "Whether the document is enabled for retrieval."
},
"disabled_at": {
"type": "number",
"nullable": true,
"description": "Timestamp when the document was disabled. `null` if enabled."
},
"disabled_by": {
"type": "string",
"nullable": true,
"description": "ID of the user who disabled the document. `null` if enabled."
},
"archived": {
"type": "boolean",
"description": "Whether the document is archived."
},
"display_status": {
"type": "string",
"description": "User-facing display status derived from `indexing_status` and `enabled` state."
},
"word_count": {
"type": "integer",
"description": "Total word count of the document."
},
"hit_count": {
"type": "integer",
"description": "Number of times the document has been matched in retrieval queries."
},
"doc_form": {
"type": "string",
"description": "Document chunking mode. `text_model` for standard text chunking, `hierarchical_model` for parent-child structure, `qa_model` for QA pair extraction."
},
"doc_metadata": {
"type": "array",
"description": "Metadata values assigned to this document.",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Metadata field identifier."
},
"name": {
"type": "string",
"description": "Metadata field name."
},
"type": {
"type": "string",
"description": "Metadata field value type."
},
"value": {
"type": "string",
"description": "Metadata value for this document."
}
}
}
},
"summary_index_status": {
"type": "string",
"nullable": true,
"description": "Status of the summary index for this document. `null` if summary indexing is not configured."
},
"need_summary": {
"type": "boolean",
"description": "Whether a summary needs to be generated for this document."
}
}
},
"Segment": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Unique identifier of the chunk."
},
"position": {
"type": "integer",
"description": "Position of the chunk within the document."
},
"document_id": {
"type": "string",
"description": "ID of the document this chunk belongs to."
},
"content": {
"type": "string",
"description": "Text content of the chunk."
},
"sign_content": {
"type": "string",
"description": "Signed content hash for integrity verification."
},
"answer": {
"type": "string",
"description": "Answer content, used in Q&A mode documents."
},
"word_count": {
"type": "integer",
"description": "Word count of the chunk content."
},
"tokens": {
"type": "integer",
"description": "Token count of the chunk content."
},
"keywords": {
"type": "array",
"description": "Keywords associated with this chunk for keyword-based retrieval.",
"items": {
"type": "string"
}
},
"index_node_id": {
"type": "string",
"description": "ID of the index node in the vector store."
},
"index_node_hash": {
"type": "string",
"description": "Hash of the indexed content, used to detect changes."
},
"hit_count": {
"type": "integer",
"description": "Number of times this chunk has been matched in retrieval queries."
},
"enabled": {
"type": "boolean",
"description": "Whether the chunk is enabled for retrieval."
},
"disabled_at": {
"type": "number",
"nullable": true,
"description": "Timestamp when the chunk was disabled. `null` if enabled."
},
"disabled_by": {
"type": "string",
"nullable": true,
"description": "ID of the user who disabled the chunk. `null` if enabled."
},
"status": {
"type": "string",
"description": "Current indexing status of the chunk, e.g. `completed`, `indexing`, `error`."
},
"created_by": {
"type": "string",
"description": "ID of the user who created the chunk."
},
"created_at": {
"type": "number",
"description": "Creation timestamp (Unix epoch in seconds)."
},
"updated_at": {
"type": "number",
"description": "Last update timestamp (Unix epoch in seconds)."
},
"updated_by": {
"type": "string",
"description": "ID of the user who last updated the chunk."
},
"indexing_at": {
"type": "number",
"nullable": true,
"description": "Timestamp when indexing started. `null` if not yet started."
},
"completed_at": {
"type": "number",
"nullable": true,
"description": "Timestamp when indexing completed. `null` if not yet completed."
},
"error": {
"type": "string",
"nullable": true,
"description": "Error message if indexing failed. `null` when no error."
},
"stopped_at": {
"type": "number",
"nullable": true,
"description": "Timestamp when indexing was stopped. `null` if not stopped."
},
"child_chunks": {
"type": "array",
"description": "Child chunks belonging to this chunk. Only present for hierarchical mode documents.",
"items": {
"$ref": "#/components/schemas/ChildChunk"
}
},
"attachments": {
"type": "array",
"description": "Files attached to this chunk.",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Attachment file identifier."
},
"name": {
"type": "string",
"description": "Original file name."
},
"size": {
"type": "integer",
"description": "File size in bytes."
},
"extension": {
"type": "string",
"description": "File extension."
},
"mime_type": {
"type": "string",
"description": "MIME type of the file."
},
"source_url": {
"type": "string",
"description": "URL to access the attachment."
}
}
}
},
"summary": {
"type": "string",
"nullable": true,
"description": "AI-generated summary of the chunk content. `null` if summary indexing is not enabled."
}
}
},
"ChildChunk": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Unique identifier of the child chunk."
},
"segment_id": {
"type": "string",
"description": "ID of the parent chunk this child chunk belongs to."
},
"content": {
"type": "string",
"description": "Text content of the child chunk."
},
"position": {
"type": "integer",
"description": "Position of the child chunk within the parent chunk."
},
"word_count": {
"type": "integer",
"description": "Word count of the child chunk content."
},
"type": {
"type": "string",
"description": "How the child chunk was created. `automatic` for system-generated, `custom` for manually created."
},
"created_at": {
"type": "number",
"description": "Creation timestamp (Unix epoch in seconds)."
},
"updated_at": {
"type": "number",
"description": "Last update timestamp (Unix epoch in seconds)."
}
}
},
"RetrievalModel": {
"type": "object",
"required": [
"search_method",
"reranking_enable",
"top_k",
"score_threshold_enabled"
],
"properties": {
"search_method": {
"type": "string",
"description": "Search method used for retrieval.",
"enum": [
"keyword_search",
"semantic_search",
"full_text_search",
"hybrid_search"
]
},
"reranking_enable": {
"type": "boolean",
"description": "Whether reranking is enabled."
},
"reranking_model": {
"type": "object",
"description": "Reranking model configuration.",
"properties": {
"reranking_provider_name": {
"type": "string",
"description": "Provider name of the reranking model."
},
"reranking_model_name": {
"type": "string",
"description": "Name of the reranking model."
}
}
},
"reranking_mode": {
"type": "string",
"enum": [
"reranking_model",
"weighted_score"
],
"nullable": true,
"description": "Reranking mode. Required when `reranking_enable` is `true`."
},
"top_k": {
"type": "integer",
"description": "Maximum number of results to return."
},
"score_threshold_enabled": {
"type": "boolean",
"description": "Whether score threshold filtering is enabled."
},
"score_threshold": {
"type": "number",
"nullable": true,
"description": "Minimum relevance score for results. Only effective when `score_threshold_enabled` is `true`."
},
"weights": {
"type": "object",
"nullable": true,
"description": "Weight configuration for hybrid search.",
"properties": {
"weight_type": {
"type": "string",
"description": "Strategy for balancing semantic and keyword search weights.",
"enum": [
"semantic_first",
"keyword_first",
"customized"
]
},
"vector_setting": {
"type": "object",
"description": "Semantic search weight settings.",
"properties": {
"vector_weight": {
"type": "number",
"description": "Weight assigned to semantic (vector) search results."
},
"embedding_provider_name": {
"type": "string",
"description": "Provider of the embedding model used for vector search."
},
"embedding_model_name": {
"type": "string",
"description": "Name of the embedding model used for vector search."
}
}
},
"keyword_setting": {
"type": "object",
"description": "Keyword search weight settings.",
"properties": {
"keyword_weight": {
"type": "number",
"description": "Weight assigned to keyword search results."
}
}
}
}
}
}
}
},
"securitySchemes": {
"ApiKeyAuth": {
"type": "http",
"scheme": "bearer",
"bearerFormat": "API_KEY",
"description": "API Key authentication. For all API requests, include your API Key in the `Authorization` HTTP Header, prefixed with `Bearer `. Example: `Authorization: Bearer {API_KEY}`. **Strongly recommend storing your API Key on the server-side, not shared or stored on the client-side, to avoid possible API-Key leakage that can lead to serious consequences.**"
}
}
}
}