diff --git a/components/changelog/content/config_v1.2.8.mdx b/components/changelog/content/config_v1.2.8.mdx
index 159fc94..4efcca9 100644
--- a/components/changelog/content/config_v1.2.8.mdx
+++ b/components/changelog/content/config_v1.2.8.mdx
@@ -86,4 +86,20 @@
- See [Interface Object Structure - fileSearch](/docs/configuration/librechat_yaml/object_structure/interface#filesearch) for details
- Improved [Model Specs documentation](/docs/configuration/librechat_yaml/object_structure/model_specs) with parameter support updates:
- - Added support for `disableStreaming`, `thinking`, `thinkingBudget`, `web_search`, and other parameters
\ No newline at end of file
+ - Added support for `disableStreaming`, `thinking`, `thinkingBudget`, `web_search`, and other parameters
+
+- Added OCR, text parsing, and STT separation to `fileConfig`:
+ - Added `ocr` configuration to control which file types use OCR processing
+ - Added `text` configuration to control which file types use direct text extraction
+ - Added `stt` configuration to control which audio file types use Speech-to-Text transcription
+ - Separate processing paths for visual documents (OCR), text files (native parsing), and audio files (STT)
+ - Processing precedence: OCR > STT > text parsing
+ - Default OCR support: images (JPEG, GIF, PNG, WebP, HEIC, HEIF), PDFs, Office documents, EPUB files
+ - Default text parsing support: all text MIME types and common programming languages
+ - Default STT support: audio formats (MP3, WAV, FLAC, OGG, M4A, WebM, etc.)
+ - See [File Config Object Structure](/docs/configuration/librechat_yaml/object_structure/file_config) for details
+
+- Added `fileTokenLimit` parameter support for all endpoints:
+ - Allows setting default and on-the-fly maximum token limits for file processing to control costs and resource usage
+ - Available as URL query parameter and in endpoint configuration panels, or can be configured in `fileConfig` field of `librechat.yaml`
+ - Runtime behavior: text from attached files is truncated to this limit just before prompt construction (default: 100000)
\ No newline at end of file
diff --git a/pages/docs/configuration/librechat_yaml/object_structure/config.mdx b/pages/docs/configuration/librechat_yaml/object_structure/config.mdx
index 636a7ef..51635aa 100644
--- a/pages/docs/configuration/librechat_yaml/object_structure/config.mdx
+++ b/pages/docs/configuration/librechat_yaml/object_structure/config.mdx
@@ -150,6 +150,10 @@ see: [Web Search Object Structure](/docs/configuration/librechat_yaml/object_str
['serverFileSizeLimit', 'Number', 'The maximum file size (in MB) that the server will accept. Applies globally across all endpoints unless overridden by endpoint-specific settings.', ''],
['avatarSizeLimit', 'Number', 'Maximum size (in MB) for user avatar images.', ''],
['clientImageResize', 'Object', 'Configures client-side image resizing to optimize file uploads and prevent upload errors due to large image sizes.', ''],
+ ['ocr', 'Object', 'Settings for Optical Character Recognition (OCR) file processing.', ''],
+ ['text', 'Object', 'Settings for direct text file parsing.', ''],
+ ['stt', 'Object', 'Settings for Speech-to-Text (STT) audio file processing.', ''],
+ ['fileTokenLimit', 'Number', 'Maximum number of tokens from text files to include in prompts before truncation.', 'fileTokenLimit: 100000'],
]}
/>
diff --git a/pages/docs/configuration/librechat_yaml/object_structure/file_config.mdx b/pages/docs/configuration/librechat_yaml/object_structure/file_config.mdx
index 197301e..0f7f822 100644
--- a/pages/docs/configuration/librechat_yaml/object_structure/file_config.mdx
+++ b/pages/docs/configuration/librechat_yaml/object_structure/file_config.mdx
@@ -4,12 +4,15 @@
The `fileConfig` object allows you to configure file handling settings for the application, including size limits and MIME type restrictions. This section provides a detailed breakdown of the `fileConfig` object structure.
-There are 4 main fields under `fileConfig`:
+There are 7 main fields under `fileConfig`:
- `endpoints`
- `serverFileSizeLimit`
- `avatarSizeLimit`
- `imageGeneration`
+ - `ocr`
+ - `text`
+ - `stt`
**Notes:**
@@ -51,9 +54,23 @@ fileConfig:
- "image/.*"
serverFileSizeLimit: 1000
avatarSizeLimit: 2
+ fileTokenLimit: 100000
imageGeneration:
percentage: 100
px: 1024
+ ocr:
+ supportedMimeTypes:
+ - "^image/(jpeg|gif|png|webp|heic|heif)$"
+ - "^application/pdf$"
+ - "^application/vnd\\.openxmlformats-officedocument\\.(wordprocessingml\\.document|presentationml\\.presentation|spreadsheetml\\.sheet)$"
+ - "^application/vnd\\.ms-(word|powerpoint|excel)$"
+ - "^application/epub\\+zip$"
+ text:
+ supportedMimeTypes:
+ - "^text/(plain|markdown|csv|json|xml|html|css|javascript|typescript|x-python|x-java|x-csharp|x-php|x-ruby|x-go|x-rust|x-kotlin|x-swift|x-scala|x-perl|x-lua|x-shell|x-sql|x-yaml|x-toml)$"
+ stt:
+ supportedMimeTypes:
+ - "^audio/(mp3|mpeg|mpeg3|wav|wave|x-wav|ogg|vorbis|mp4|x-m4a|flac|x-flac|webm)$"
```
## serverFileSizeLimit
@@ -111,6 +128,115 @@ fileConfig:
px: 1024
```
+## fileTokenLimit
+
+
+
+**Description:** When attaching text content, LibreChat truncates the text at runtime to the configured token limit just before prompt construction.
+
+**Default:** `100000`
+
+```yaml filename="fileConfig / fileTokenLimit"
+fileConfig:
+ fileTokenLimit: 100000
+```
+
+## ocr
+
+
+
+**Description:** The `ocr` section configures which file types should be processed using OCR functionality for extracting text from visual documents.
+
+### supportedMimeTypes
+
+
+
+**Default:** Images, PDFs, and Office documents
+
+```yaml filename="fileConfig / ocr / supportedMimeTypes"
+fileConfig:
+ ocr:
+ supportedMimeTypes:
+ - "^image/(jpeg|gif|png|webp|heic|heif)$"
+ - "^application/pdf$"
+ - "^application/vnd\\.openxmlformats-officedocument\\.(wordprocessingml\\.document|presentationml\\.presentation|spreadsheetml\\.sheet)$"
+ - "^application/vnd\\.ms-(word|powerpoint|excel)$"
+ - "^application/epub\\+zip$"
+```
+
+## text
+
+
+
+**Description:** The `text` section configures which file types should be processed using direct text extraction.
+
+### supportedMimeTypes
+
+
+
+**Default:** All text files and common programming languages
+
+```yaml filename="fileConfig / text / supportedMimeTypes"
+fileConfig:
+ text:
+ supportedMimeTypes:
+ - "^text/(plain|markdown|csv|json|xml|html|css|javascript|typescript|x-python|x-java|x-csharp|x-php|x-ruby|x-go|x-rust|x-kotlin|x-swift|x-scala|x-perl|x-lua|x-shell|x-sql|x-yaml|x-toml)$"
+```
+
+## stt
+
+
+
+**Description:** The `stt` section configures which audio file types should be processed using Speech-to-Text functionality for converting audio to text.
+
+### supportedMimeTypes
+
+
+
+**Default:** Common audio formats
+
+```yaml filename="fileConfig / stt / supportedMimeTypes"
+fileConfig:
+ stt:
+ supportedMimeTypes:
+ - "^audio/(mp3|mpeg|mpeg3|wav|wave|x-wav|ogg|vorbis|mp4|x-m4a|flac|x-flac|webm)$"
+```
+
+**Notes:**
+- Files matching `text` patterns are processed with simple text extraction
+- Files matching `ocr` patterns are processed with the provided OCR service
+- Files matching `stt` patterns are processed with Speech-to-Text transcription
+- Processing precedence: OCR > STT > text parsing
+- Files not matching any pattern will not be processed
+
## endpoints