From 306c50704e5fa13d4e56d8423a0602378c2927e7 Mon Sep 17 00:00:00 2001
From: YuTengjing <ytj2713151713@gmail.com>
Date: Thu, 26 Feb 2026 22:59:10 +0800
Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20fix:=20improve=20crawler=20error?=
 =?UTF-8?q?=20handling=20and=20timeout=20cancellation=20(#12487)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .agents/skills/pr/SKILL.md                    |  55 ++++
 .agents/skills/upstash-workflow/SKILL.md      |   5 +
 AGENTS.md                                     |   3 +-
 CLAUDE.md                                     |   2 +
 GEMINI.md                                     |   2 +
 docs/self-hosting/advanced/online-search.mdx  |  18 ++
 .../advanced/online-search.zh-CN.mdx          |  18 ++
 .../web-crawler/src/__tests__/crawler.test.ts |  21 +-
 .../crawImpl/__tests__/browserless.test.ts    |  50 ++--
 .../src/crawImpl/__tests__/exa.test.ts        | 163 ++++++------
 .../src/crawImpl/__tests__/firecrawl.test.ts  | 207 +++++++--------
 .../src/crawImpl/__tests__/jina.test.ts       | 133 +++++-----
 .../src/crawImpl/__tests__/naive.test.ts      |  53 ++--
 .../src/crawImpl/__tests__/search1api.test.ts |  78 +++---
 .../src/crawImpl/__tests__/tavily.test.ts     | 244 ++++++++----------
 .../web-crawler/src/crawImpl/browserless.ts   |  91 ++++---
 packages/web-crawler/src/crawImpl/exa.ts      |  84 +++---
 .../web-crawler/src/crawImpl/firecrawl.ts     |  91 +++----
 packages/web-crawler/src/crawImpl/jina.ts     |  78 ++++--
 packages/web-crawler/src/crawImpl/naive.ts    |  34 ++-
 .../web-crawler/src/crawImpl/search1api.ts    |  70 +++--
 packages/web-crawler/src/crawImpl/tavily.ts   |  84 +++---
 packages/web-crawler/src/crawler.ts           |  11 +-
 packages/web-crawler/src/test-utils.ts        |  25 ++
 .../utils/{ => __tests__}/appUrlRules.test.ts |   2 +-
 .../src/utils/__tests__/errorType.test.ts     |  45 +++-
 .../src/utils/__tests__/response.test.ts      | 102 ++++++++
 .../src/utils/__tests__/withTimeout.test.ts   |  80 ++++--
 packages/web-crawler/src/utils/errorType.ts   |  31 +++
 .../src/utils/htmlToMarkdown.test.ts          |  27 +-
 .../web-crawler/src/utils/htmlToMarkdown.ts   |   6 +-
 packages/web-crawler/src/utils/response.ts    |  49 ++++
 packages/web-crawler/src/utils/withTimeout.ts |  21 +-
 src/envs/tools.ts                             |  10 +
 src/server/routers/tools/search.test.ts       |  21 ++
 src/server/routers/tools/search.ts            |   5 +-
 src/server/services/search/index.test.ts      | 153 ++++++++++-
 src/server/services/search/index.ts           |  74 +++++-
 38 files changed, 1462 insertions(+), 784 deletions(-)
 create mode 100644 .agents/skills/pr/SKILL.md
 create mode 100644 packages/web-crawler/src/test-utils.ts
 rename packages/web-crawler/src/utils/{ => __tests__}/appUrlRules.test.ts (98%)
 create mode 100644 packages/web-crawler/src/utils/__tests__/response.test.ts
 create mode 100644 packages/web-crawler/src/utils/response.ts

diff --git a/.agents/skills/pr/SKILL.md b/.agents/skills/pr/SKILL.md
new file mode 100644
index 0000000000..b751d5a0eb
--- /dev/null
+++ b/.agents/skills/pr/SKILL.md
@@ -0,0 +1,55 @@
+---
+name: pr
+description: "Create a PR for the current branch. Use when the user asks to create a pull request, submit PR, or says 'pr'."
+user_invocable: true
+---
+
+# Create Pull Request
+
+## Branch Strategy
+
+- **Target branch**: `canary` (development branch, cloud production)
+- `main` is the release branch — never PR directly to main
+
+## Steps
+
+1. **Gather context** (run in parallel):
+   - `git branch --show-current` — current branch name
+   - `git rev-parse --abbrev-ref @{u} 2>/dev/null` — remote tracking status
+   - `git log --oneline origin/canary..HEAD` — unpushed commits
+   - `gh pr list --head "$(git branch --show-current)" --json number,title,state,url` — existing PR
+   - `git log --oneline origin/canary..HEAD` — commit history for PR title
+   - `git diff --stat --stat-count=20 origin/canary..HEAD` — change summary
+
+2. **Push if needed**:
+   - No upstream: `git push -u origin $(git branch --show-current)`
+   - Has upstream: `git push origin $(git branch --show-current)`
+
+3. **Search related GitHub issues**:
+   - `gh issue list --search "<keywords>" --state all --limit 10`
+   - Only link issues with matching scope (avoid large umbrella issues)
+   - Skip if no matching issue found
+
+4. **Create PR** with `gh pr create --base canary`:
+   - Title: `<gitmoji> <type>(<scope>): <description>`
+   - Body: based on PR template (`.github/PULL_REQUEST_TEMPLATE.md`), fill checkboxes
+   - Link related GitHub issues using magic keywords (`Fixes #123`, `Closes #123`)
+   - Link Linear issues if applicable (`Fixes LOBE-xxx`)
+   - Use HEREDOC for body to preserve formatting
+
+5. **Open in browser**: `gh pr view --web`
+
+## PR Template
+
+Use `.github/PULL_REQUEST_TEMPLATE.md` as the body structure. Key sections:
+
+- **Change Type**: Check the appropriate gitmoji type
+- **Related Issue**: Link GitHub/Linear issues with magic keywords
+- **Description of Change**: Summarize what and why
+- **How to Test**: Describe test approach, check relevant boxes
+
+## Notes
+
+- **Release impact**: PR titles with `✨ feat/` or `🐛 fix` trigger releases — use carefully
+- **Language**: All PR content must be in English
+- If a PR already exists for the branch, inform the user instead of creating a duplicate
diff --git a/.agents/skills/upstash-workflow/SKILL.md b/.agents/skills/upstash-workflow/SKILL.md
index 1d2178302f..1f410280ce 100644
--- a/.agents/skills/upstash-workflow/SKILL.md
+++ b/.agents/skills/upstash-workflow/SKILL.md
@@ -1,3 +1,8 @@
+---
+name: upstash-workflow
+description: 'Upstash Workflow implementation guide. Use when creating async workflows with QStash, implementing fan-out patterns, or building 3-layer workflow architecture (process → paginate → execute).'
+---
+
 # Upstash Workflow Implementation Guide
 
 This guide covers the standard patterns for implementing Upstash Workflow + QStash async workflows in the LobeHub codebase.
diff --git a/AGENTS.md b/AGENTS.md
index 9df160c082..722c995404 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -38,7 +38,8 @@ lobe-chat/
 
 ### Git Workflow
 
-- The current release branch is `next` until v2.0.0 is officially released
+- **Branch strategy**: `canary` is the development branch (cloud production); `main` is the release branch (periodically cherry-picks from canary)
+- New branches should be created from `canary`; PRs should target `canary`
 - Use rebase for git pull
 - Git commit messages should prefix with gitmoji
 - Git branch name format: `username/feat/feature-name`
diff --git a/CLAUDE.md b/CLAUDE.md
index dc4a72b7d8..83f683670c 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -33,6 +33,8 @@ lobe-chat/
 
 ### Git Workflow
 
+- **Branch strategy**: `canary` is the development branch (cloud production); `main` is the release branch (periodically cherry-picks from canary)
+- New branches should be created from `canary`; PRs should target `canary`
 - Use rebase for `git pull`
 - Commit messages: prefix with gitmoji
 - Branch format: `<type>/<feature-name>`
diff --git a/GEMINI.md b/GEMINI.md
index b4cd3a15b7..c1bdab8da2 100644
--- a/GEMINI.md
+++ b/GEMINI.md
@@ -33,6 +33,8 @@ lobe-chat/
 
 ### Git Workflow
 
+- **Branch strategy**: `canary` is the development branch (cloud production); `main` is the release branch (periodically cherry-picks from canary)
+- New branches should be created from `canary`; PRs should target `canary`
 - Use rebase for `git pull`
 - Commit messages: prefix with gitmoji
 - Branch format: `<type>/<feature-name>`
diff --git a/docs/self-hosting/advanced/online-search.mdx b/docs/self-hosting/advanced/online-search.mdx
index 6e7862fdc5..23b9e7fa85 100644
--- a/docs/self-hosting/advanced/online-search.mdx
+++ b/docs/self-hosting/advanced/online-search.mdx
@@ -51,6 +51,24 @@ Supported crawler types are listed below:
 
 ---
 
+## `CRAWL_CONCURRENCY`
+
+Controls crawler concurrency per crawl task. The default is `3`. On low-resource servers, use `1` to reduce CPU spikes.
+
+```env
+CRAWL_CONCURRENCY=3
+```
+
+## `CRAWLER_RETRY`
+
+Controls retry attempts per URL on crawl failures. The default is `1` (up to 2 attempts total).
+
+```env
+CRAWLER_RETRY=1
+```
+
+---
+
 ## `SEARCH_PROVIDERS`
 
 Configure which search engine providers to use for web search.
diff --git a/docs/self-hosting/advanced/online-search.zh-CN.mdx b/docs/self-hosting/advanced/online-search.zh-CN.mdx
index a9af246822..3b599c34be 100644
--- a/docs/self-hosting/advanced/online-search.zh-CN.mdx
+++ b/docs/self-hosting/advanced/online-search.zh-CN.mdx
@@ -46,6 +46,24 @@ CRAWLER_IMPLS="naive,search1api"
 
 ---
 
+## `CRAWL_CONCURRENCY`
+
+控制单次网页抓取任务的并发数量，默认值为 `3`。在低配置服务器上建议设置为 `1` 以降低 CPU 峰值。
+
+```env
+CRAWL_CONCURRENCY=3
+```
+
+## `CRAWLER_RETRY`
+
+控制单个 URL 的抓取失败重试次数，默认值为 `1`（即最多尝试 2 次）。
+
+```env
+CRAWLER_RETRY=1
+```
+
+---
+
 ## `SEARCH_PROVIDERS`
 
 配置联网搜索使用的搜索引擎提供商。
diff --git a/packages/web-crawler/src/__tests__/crawler.test.ts b/packages/web-crawler/src/__tests__/crawler.test.ts
index a7868f7762..ba971ca7e7 100644
--- a/packages/web-crawler/src/__tests__/crawler.test.ts
+++ b/packages/web-crawler/src/__tests__/crawler.test.ts
@@ -1,4 +1,4 @@
-import { describe, expect, it, vi } from 'vitest';
+import { beforeEach, describe, expect, it, vi } from 'vitest';
 
 import { Crawler } from '../crawler';
 
@@ -19,6 +19,16 @@ vi.mock('../utils/appUrlRules', () => ({
 }));
 
 describe('Crawler', () => {
+  beforeEach(async () => {
+    vi.clearAllMocks();
+    // Reset applyUrlRules to default (no impls override)
+    const { applyUrlRules } = await import('../utils/appUrlRules');
+    vi.mocked(applyUrlRules).mockReturnValue({
+      transformedUrl: 'https://example.com',
+      filterOptions: {},
+    });
+  });
+
   const crawler = new Crawler();
 
   it('should crawl successfully with default impls', async () => {
@@ -194,11 +204,12 @@ describe('Crawler', () => {
     });
 
     expect(result).toEqual({
-      crawler: undefined,
+      crawler: 'browserless',
       data: {
-        content: 'Fail to crawl the page. Error type: UnknownError, error message: undefined',
-        errorMessage: undefined,
-        errorType: 'UnknownError',
+        content:
+          'Fail to crawl the page. Error type: EmptyCrawlResultError, error message: browserless returned empty or short content',
+        errorMessage: 'browserless returned empty or short content',
+        errorType: 'EmptyCrawlResultError',
       },
       originalUrl: 'https://example.com',
       transformedUrl: undefined,
diff --git a/packages/web-crawler/src/crawImpl/__tests__/browserless.test.ts b/packages/web-crawler/src/crawImpl/__tests__/browserless.test.ts
index 563c7e29f8..63006744bb 100644
--- a/packages/web-crawler/src/crawImpl/__tests__/browserless.test.ts
+++ b/packages/web-crawler/src/crawImpl/__tests__/browserless.test.ts
@@ -1,7 +1,13 @@
 import { describe, expect, it, vi } from 'vitest';
 
+import * as withTimeoutModule from '../../utils/withTimeout';
 import { browserless } from '../browserless';
 
+// Mock withTimeout to just call the factory function directly (bypassing real timeout)
+vi.spyOn(withTimeoutModule, 'withTimeout').mockImplementation((fn) =>
+  fn(new AbortController().signal),
+);
+
 describe('browserless', () => {
   it('should throw BrowserlessInitError when env vars not set', async () => {
     const originalEnv = { ...process.env };
@@ -16,17 +22,22 @@ describe('browserless', () => {
     process.env = originalEnv;
   });
 
-  it('should return undefined on fetch error', async () => {
+  it('should throw NetworkConnectionError on fetch failed', async () => {
     process.env.BROWSERLESS_TOKEN = 'test-token';
-    global.fetch = vi.fn().mockRejectedValue(new Error('Fetch error'));
+    global.fetch = vi.fn().mockRejectedValue(new TypeError('fetch failed'));
 
-    const result = await browserless('https://example.com', { filterOptions: {} });
-    expect(result).toBeUndefined();
+    const { NetworkConnectionError } = await import('../../utils/errorType');
+    await expect(browserless('https://example.com', { filterOptions: {} })).rejects.toThrow(
+      NetworkConnectionError,
+    );
   });
 
   it('should return undefined when content is empty', async () => {
     process.env.BROWSERLESS_TOKEN = 'test-token';
     global.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      status: 200,
+      statusText: 'OK',
       text: vi.fn().mockResolvedValue('<html></html>'),
     } as any);
 
@@ -37,6 +48,9 @@ describe('browserless', () => {
   it('should return undefined when title is "Just a moment..."', async () => {
     process.env.BROWSERLESS_TOKEN = 'test-token';
     global.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      status: 200,
+      statusText: 'OK',
       text: vi.fn().mockResolvedValue('<html><title>Just a moment...</title></html>'),
     } as any);
 
@@ -46,7 +60,12 @@ describe('browserless', () => {
 
   it('should return crawl result on successful fetch', async () => {
     process.env.BROWSERLESS_TOKEN = 'test-token';
+    const longContent =
+      'This is a test paragraph with enough content to pass the length check. '.repeat(3);
     global.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      status: 200,
+      statusText: 'OK',
       text: vi.fn().mockResolvedValue(`
         <html>
           <head>
@@ -54,7 +73,7 @@ describe('browserless', () => {
             <meta name="description" content="Test Description">
           </head>
           <body>
-            <h1>Test Content</h1>
+            <p>${longContent}</p>
           </body>
         </html>
       `),
@@ -76,6 +95,9 @@ describe('browserless', () => {
   it('should include rejectRequestPattern in request payload', async () => {
     process.env.BROWSERLESS_TOKEN = 'test-token';
     const fetchMock = vi.fn().mockResolvedValue({
+      ok: true,
+      status: 200,
+      statusText: 'OK',
       text: vi.fn().mockResolvedValue('<html><title>Test</title></html>'),
     });
     global.fetch = fetchMock;
@@ -90,9 +112,7 @@ describe('browserless', () => {
 
   it('should allow requests to permitted file types', async () => {
     const allowedExtensions = ['html', 'css', 'js', 'json', 'xml', 'webmanifest', 'txt', 'md'];
-    const pattern = new RegExp(
-      '.*\\.(?!(html|css|js|json|xml|webmanifest|txt|md)(\\?|#|$))[\\w-]+(?:[?#].*)?$',
-    );
+    const pattern = /.*\.(?!(html|css|js|json|xml|webmanifest|txt|md)(\?|#|$))[\w-]+(?:[?#].*)?$/;
 
     allowedExtensions.forEach((ext) => {
       expect(`file.${ext}`).not.toMatch(pattern);
@@ -103,9 +123,7 @@ describe('browserless', () => {
 
   it('should reject requests to non-permitted file types', async () => {
     const rejectedExtensions = ['jpg', 'png', 'gif', 'pdf', 'doc', 'mp4', 'wav'];
-    const pattern = new RegExp(
-      '.*\\.(?!(html|css|js|json|xml|webmanifest|txt|md)(\\?|#|$))[\\w-]+(?:[?#].*)?$',
-    );
+    const pattern = /.*\.(?!(html|css|js|json|xml|webmanifest|txt|md)(\?|#|$))[\w-]+(?:[?#].*)?$/;
 
     rejectedExtensions.forEach((ext) => {
       expect(`file.${ext}`).toMatch(pattern);
@@ -114,14 +132,16 @@ describe('browserless', () => {
     });
   });
 
-  it('should use correct URL when BROWSERLESS_URL is provided', async () => {
-    const customUrl = 'https://custom.browserless.io';
+  it('should call fetch with the base URL and content path', async () => {
     const originalEnv = { ...process.env };
     process.env.BROWSERLESS_TOKEN = 'test-token';
-    process.env.BROWSERLESS_URL = customUrl;
     global.fetch = vi.fn().mockImplementation((url) => {
-      expect(url).toContain(customUrl);
+      // BASE_URL is captured at module load time, so we verify fetch is called with /content path
+      expect(url).toContain('/content');
       return Promise.resolve({
+        ok: true,
+        status: 200,
+        statusText: 'OK',
         text: () => Promise.resolve('<html><title>Test</title></html>'),
       });
     });
diff --git a/packages/web-crawler/src/crawImpl/__tests__/exa.test.ts b/packages/web-crawler/src/crawImpl/__tests__/exa.test.ts
index 23dddb03e5..05872a823e 100644
--- a/packages/web-crawler/src/crawImpl/__tests__/exa.test.ts
+++ b/packages/web-crawler/src/crawImpl/__tests__/exa.test.ts
@@ -1,5 +1,6 @@
 import { beforeEach, describe, expect, it, vi } from 'vitest';
 
+import { createMockResponse } from '../../test-utils';
 import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../../utils/errorType';
 import { exa } from '../exa';
 
@@ -18,23 +19,20 @@ describe('exa crawler', () => {
   it('should successfully crawl content with API key', async () => {
     process.env.EXA_API_KEY = 'test-api-key';
 
-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        requestId: 'test-request-id',
-        results: [
-          {
-            id: 'test-id',
-            title: 'Test Article',
-            url: 'https://example.com',
-            text: 'This is a test article with enough content to pass the length check. '.repeat(3),
-            author: 'Test Author',
-            publishedDate: '2023-01-01',
-            summary: 'Test summary',
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      requestId: 'test-request-id',
+      results: [
+        {
+          id: 'test-id',
+          title: 'Test Article',
+          url: 'https://example.com',
+          text: 'This is a test article with enough content to pass the length check. '.repeat(3),
+          author: 'Test Author',
+          publishedDate: '2023-01-01',
+          summary: 'Test summary',
+        },
+      ],
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@@ -51,23 +49,20 @@ describe('exa crawler', () => {
       url: 'https://example.com',
     });
 
-    expect(withTimeout).toHaveBeenCalledWith(expect.any(Promise), 30000);
+    expect(withTimeout).toHaveBeenCalledWith(expect.any(Function), 30000);
   });
 
   it('should handle missing API key', async () => {
     // API key is undefined
-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        results: [
-          {
-            title: 'Test Article',
-            url: 'https://example.com',
-            text: 'Test content with sufficient length. '.repeat(5),
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      results: [
+        {
+          title: 'Test Article',
+          url: 'https://example.com',
+          text: 'Test content with sufficient length. '.repeat(5),
+        },
+      ],
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@@ -75,19 +70,16 @@ describe('exa crawler', () => {
     await exa('https://example.com', { filterOptions: {} });
 
     // Check that fetch was called with empty API key header
-    expect(withTimeout).toHaveBeenCalledWith(expect.any(Promise), 30000);
+    expect(withTimeout).toHaveBeenCalledWith(expect.any(Function), 30000);
   });
 
   it('should return undefined when no results are returned', async () => {
     process.env.EXA_API_KEY = 'test-api-key';
 
-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        requestId: 'test-request-id',
-        results: [],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      requestId: 'test-request-id',
+      results: [],
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@@ -108,18 +100,15 @@ describe('exa crawler', () => {
   it('should return undefined for short content', async () => {
     process.env.EXA_API_KEY = 'test-api-key';
 
-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        results: [
-          {
-            title: 'Test Article',
-            url: 'https://example.com',
-            text: 'Short', // Content too short
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      results: [
+        {
+          title: 'Test Article',
+          url: 'https://example.com',
+          text: 'Short', // Content too short
+        },
+      ],
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@@ -132,11 +121,11 @@ describe('exa crawler', () => {
   it('should throw PageNotFoundError for 404 status', async () => {
     process.env.EXA_API_KEY = 'test-api-key';
 
-    const mockResponse = {
+    const mockResponse = createMockResponse('Not Found', {
       ok: false,
       status: 404,
       statusText: 'Not Found',
-    };
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@@ -149,11 +138,11 @@ describe('exa crawler', () => {
   it('should throw error for other HTTP errors', async () => {
     process.env.EXA_API_KEY = 'test-api-key';
 
-    const mockResponse = {
+    const mockResponse = createMockResponse('', {
       ok: false,
       status: 500,
       statusText: 'Internal Server Error',
-    };
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@@ -167,7 +156,7 @@ describe('exa crawler', () => {
     process.env.EXA_API_KEY = 'test-api-key';
 
     const { withTimeout } = await import('../../utils/withTimeout');
-    vi.mocked(withTimeout).mockRejectedValue(new Error('fetch failed'));
+    vi.mocked(withTimeout).mockRejectedValue(new TypeError('fetch failed'));
 
     await expect(exa('https://example.com', { filterOptions: {} })).rejects.toThrow(
       NetworkConnectionError,
@@ -198,42 +187,37 @@ describe('exa crawler', () => {
     );
   });
 
-  it('should return undefined when JSON parsing fails', async () => {
+  it('should throw ResponseBodyParseError when JSON parsing fails', async () => {
     process.env.EXA_API_KEY = 'test-api-key';
 
-    const mockResponse = {
-      ok: true,
+    const mockResponse = createMockResponse('not json', { ok: true });
+    mockResponse.json = vi.fn().mockRejectedValue(new Error('Invalid JSON'));
+    mockResponse.clone.mockReturnValue({
+      ...mockResponse,
       json: vi.fn().mockRejectedValue(new Error('Invalid JSON')),
-    };
+      text: vi.fn().mockResolvedValue('not json'),
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
 
-    const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
-
-    const result = await exa('https://example.com', { filterOptions: {} });
-
-    expect(result).toBeUndefined();
-    expect(consoleSpy).toHaveBeenCalled();
-
-    consoleSpy.mockRestore();
+    await expect(exa('https://example.com', { filterOptions: {} })).rejects.toThrow(
+      'Exa returned non-JSON response: not json',
+    );
   });
 
   it('should use result URL when available', async () => {
     process.env.EXA_API_KEY = 'test-api-key';
 
-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        results: [
-          {
-            title: 'Test Article',
-            url: 'https://redirected.example.com',
-            text: 'Test content with sufficient length. '.repeat(5),
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      results: [
+        {
+          title: 'Test Article',
+          url: 'https://redirected.example.com',
+          text: 'Test content with sufficient length. '.repeat(5),
+        },
+      ],
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@@ -246,18 +230,15 @@ describe('exa crawler', () => {
   it('should fallback to original URL when result URL is missing', async () => {
     process.env.EXA_API_KEY = 'test-api-key';
 
-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        results: [
-          {
-            title: 'Test Article',
-            text: 'Test content with sufficient length. '.repeat(5),
-            // url is missing
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      results: [
+        {
+          title: 'Test Article',
+          text: 'Test content with sufficient length. '.repeat(5),
+          // url is missing
+        },
+      ],
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
diff --git a/packages/web-crawler/src/crawImpl/__tests__/firecrawl.test.ts b/packages/web-crawler/src/crawImpl/__tests__/firecrawl.test.ts
index 8cd3d27427..5b8216363e 100644
--- a/packages/web-crawler/src/crawImpl/__tests__/firecrawl.test.ts
+++ b/packages/web-crawler/src/crawImpl/__tests__/firecrawl.test.ts
@@ -1,5 +1,6 @@
 import { beforeEach, describe, expect, it, vi } from 'vitest';
 
+import { createMockResponse } from '../../test-utils';
 import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../../utils/errorType';
 import { firecrawl } from '../firecrawl';
 
@@ -19,25 +20,23 @@ describe('firecrawl crawler', () => {
   it('should successfully crawl content with API key', async () => {
     process.env.FIRECRAWL_API_KEY = 'test-api-key';
 
-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        success: true,
-        data: {
-          markdown:
-            'This is a test markdown content with enough length to pass validation. '.repeat(3),
-          metadata: {
-            title: 'Test Article',
-            description: 'Test description',
-            sourceURL: 'https://example.com',
-            statusCode: 200,
-            language: 'en',
-            keywords: 'test',
-            robots: 'index',
-          },
+    const mockResponse = createMockResponse({
+      success: true,
+      data: {
+        markdown: 'This is a test markdown content with enough length to pass validation. '.repeat(
+          3,
+        ),
+        metadata: {
+          title: 'Test Article',
+          description: 'Test description',
+          sourceURL: 'https://example.com',
+          statusCode: 200,
+          language: 'en',
+          keywords: 'test',
+          robots: 'index',
         },
-      }),
-    };
+      },
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@@ -55,58 +54,52 @@ describe('firecrawl crawler', () => {
       url: 'https://example.com',
     });
 
-    expect(withTimeout).toHaveBeenCalledWith(expect.any(Promise), 30000);
+    expect(withTimeout).toHaveBeenCalledWith(expect.any(Function), 30000);
   });
 
   it('should handle missing API key', async () => {
-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        success: true,
-        data: {
-          markdown: 'Test content with sufficient length. '.repeat(5),
-          metadata: {
-            title: 'Test',
-            description: 'Test',
-            sourceURL: 'https://example.com',
-            statusCode: 200,
-            language: 'en',
-            keywords: 'test',
-            robots: 'index',
-          },
+    const mockResponse = createMockResponse({
+      success: true,
+      data: {
+        markdown: 'Test content with sufficient length. '.repeat(5),
+        metadata: {
+          title: 'Test',
+          description: 'Test',
+          sourceURL: 'https://example.com',
+          statusCode: 200,
+          language: 'en',
+          keywords: 'test',
+          robots: 'index',
         },
-      }),
-    };
+      },
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
 
     await firecrawl('https://example.com', { filterOptions: {} });
 
-    expect(withTimeout).toHaveBeenCalledWith(expect.any(Promise), 30000);
+    expect(withTimeout).toHaveBeenCalledWith(expect.any(Function), 30000);
   });
 
   it('should return undefined for short content', async () => {
     process.env.FIRECRAWL_API_KEY = 'test-api-key';
 
-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        success: true,
-        data: {
-          markdown: 'Short', // Content too short
-          metadata: {
-            title: 'Test',
-            description: 'Test',
-            sourceURL: 'https://example.com',
-            statusCode: 200,
-            language: 'en',
-            keywords: 'test',
-            robots: 'index',
-          },
+    const mockResponse = createMockResponse({
+      success: true,
+      data: {
+        markdown: 'Short', // Content too short
+        metadata: {
+          title: 'Test',
+          description: 'Test',
+          sourceURL: 'https://example.com',
+          statusCode: 200,
+          language: 'en',
+          keywords: 'test',
+          robots: 'index',
         },
-      }),
-    };
+      },
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@@ -119,24 +112,21 @@ describe('firecrawl crawler', () => {
   it('should return undefined when markdown is missing', async () => {
     process.env.FIRECRAWL_API_KEY = 'test-api-key';
 
-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        success: true,
-        data: {
-          // markdown is missing
-          metadata: {
-            title: 'Test',
-            description: 'Test',
-            sourceURL: 'https://example.com',
-            statusCode: 200,
-            language: 'en',
-            keywords: 'test',
-            robots: 'index',
-          },
+    const mockResponse = createMockResponse({
+      success: true,
+      data: {
+        // markdown is missing
+        metadata: {
+          title: 'Test',
+          description: 'Test',
+          sourceURL: 'https://example.com',
+          statusCode: 200,
+          language: 'en',
+          keywords: 'test',
+          robots: 'index',
         },
-      }),
-    };
+      },
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@@ -149,11 +139,11 @@ describe('firecrawl crawler', () => {
   it('should throw PageNotFoundError for 404 status', async () => {
     process.env.FIRECRAWL_API_KEY = 'test-api-key';
 
-    const mockResponse = {
+    const mockResponse = createMockResponse('Not Found', {
       ok: false,
       status: 404,
       statusText: 'Not Found',
-    };
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@@ -166,11 +156,11 @@ describe('firecrawl crawler', () => {
   it('should throw error for other HTTP errors', async () => {
     process.env.FIRECRAWL_API_KEY = 'test-api-key';
 
-    const mockResponse = {
+    const mockResponse = createMockResponse('', {
       ok: false,
       status: 500,
       statusText: 'Internal Server Error',
-    };
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@@ -184,7 +174,7 @@ describe('firecrawl crawler', () => {
     process.env.FIRECRAWL_API_KEY = 'test-api-key';
 
     const { withTimeout } = await import('../../utils/withTimeout');
-    vi.mocked(withTimeout).mockRejectedValue(new Error('fetch failed'));
+    vi.mocked(withTimeout).mockRejectedValue(new TypeError('fetch failed'));
 
     await expect(firecrawl('https://example.com', { filterOptions: {} })).rejects.toThrow(
       NetworkConnectionError,
@@ -217,54 +207,49 @@ describe('firecrawl crawler', () => {
     );
   });
 
-  it('should return undefined when JSON parsing fails', async () => {
+  it('should throw ResponseBodyParseError when JSON parsing fails', async () => {
     process.env.FIRECRAWL_API_KEY = 'test-api-key';
 
-    const mockResponse = {
-      ok: true,
+    const mockResponse = createMockResponse('not json', { ok: true });
+    mockResponse.json = vi.fn().mockRejectedValue(new Error('Invalid JSON'));
+    mockResponse.clone.mockReturnValue({
+      ...mockResponse,
       json: vi.fn().mockRejectedValue(new Error('Invalid JSON')),
-    };
+      text: vi.fn().mockResolvedValue('not json'),
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
 
-    const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
-
-    const result = await firecrawl('https://example.com', { filterOptions: {} });
-
-    expect(result).toBeUndefined();
-    expect(consoleSpy).toHaveBeenCalled();
-
-    consoleSpy.mockRestore();
+    await expect(firecrawl('https://example.com', { filterOptions: {} })).rejects.toThrow(
+      'Firecrawl returned non-JSON response: not json',
+    );
   });
 
   it('should handle metadata with all optional fields', async () => {
     process.env.FIRECRAWL_API_KEY = 'test-api-key';
 
-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        success: true,
-        data: {
-          markdown: 'Complete test content with all metadata fields provided. '.repeat(3),
-          metadata: {
-            title: 'Complete Test Article',
-            description: 'Complete test description',
-            keywords: 'test,complete,article',
-            language: 'en',
-            ogDescription: 'OG description',
-            ogImage: 'https://example.com/image.jpg',
-            ogLocaleAlternate: ['en-US', 'fr-FR'],
-            ogSiteName: 'Example Site',
-            ogTitle: 'OG Title',
-            ogUrl: 'https://example.com/og',
-            robots: 'index,follow',
-            statusCode: 200,
-            sourceURL: 'https://example.com',
-          },
+    const mockResponse = createMockResponse({
+      success: true,
+      data: {
+        markdown: 'Complete test content with all metadata fields provided. '.repeat(3),
+        metadata: {
+          title: 'Complete Test Article',
+          description: 'Complete test description',
+          keywords: 'test,complete,article',
+          language: 'en',
+          ogDescription: 'OG description',
+          ogImage: 'https://example.com/image.jpg',
+          ogLocaleAlternate: ['en-US', 'fr-FR'],
+          ogSiteName: 'Example Site',
+          ogTitle: 'OG Title',
+          ogUrl: 'https://example.com/og',
+          robots: 'index,follow',
+          statusCode: 200,
+          sourceURL: 'https://example.com',
         },
-      }),
-    };
+      },
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
diff --git a/packages/web-crawler/src/crawImpl/__tests__/jina.test.ts b/packages/web-crawler/src/crawImpl/__tests__/jina.test.ts
index 6798c2896c..fdf8efe4e4 100644
--- a/packages/web-crawler/src/crawImpl/__tests__/jina.test.ts
+++ b/packages/web-crawler/src/crawImpl/__tests__/jina.test.ts
@@ -1,29 +1,44 @@
 import { beforeEach, describe, expect, it, vi } from 'vitest';
 
+import { createMockResponse } from '../../test-utils';
+import * as withTimeoutModule from '../../utils/withTimeout';
 import { jina } from '../jina';
 
+// Mock withTimeout to just call the factory function directly (bypassing real timeout)
+vi.spyOn(withTimeoutModule, 'withTimeout').mockImplementation((fn) =>
+  fn(new AbortController().signal),
+);
+
 describe('jina crawler', () => {
   const mockFetch = vi.fn();
   global.fetch = mockFetch;
 
   beforeEach(() => {
     vi.resetAllMocks();
+    // Re-apply the withTimeout spy after resetAllMocks
+    vi.spyOn(withTimeoutModule, 'withTimeout').mockImplementation((fn) =>
+      fn(new AbortController().signal),
+    );
   });
 
   it('should crawl url successfully', async () => {
-    const mockResponse = {
-      ok: true,
-      json: () =>
-        Promise.resolve({
-          code: 200,
-          data: {
-            content: 'test content',
-            description: 'test description',
-            siteName: 'test site',
-            title: 'test title',
-          },
-        }),
-    };
+    const testContent =
+      'This is a test content that is long enough to pass the minimum length validation check. '.repeat(
+        2,
+      );
+
+    const mockResponse = createMockResponse(
+      {
+        code: 200,
+        data: {
+          content: testContent,
+          description: 'test description',
+          siteName: 'test site',
+          title: 'test title',
+        },
+      },
+      { ok: true },
+    );
 
     mockFetch.mockResolvedValue(mockResponse);
 
@@ -38,13 +53,14 @@ describe('jina crawler', () => {
         'Authorization': 'Bearer test-key',
         'x-send-from': 'LobeChat Community',
       },
+      signal: expect.any(AbortSignal),
     });
 
     expect(result).toEqual({
-      content: 'test content',
+      content: testContent,
       contentType: 'text',
       description: 'test description',
-      length: 12,
+      length: testContent.length,
       siteName: 'test site',
       title: 'test title',
       url: 'https://example.com',
@@ -54,16 +70,15 @@ describe('jina crawler', () => {
   it('should use JINA_READER_API_KEY from env if apiKey not provided', async () => {
     process.env.JINA_READER_API_KEY = 'env-reader-key';
 
-    const mockResponse = {
-      ok: true,
-      json: () =>
-        Promise.resolve({
-          code: 200,
-          data: {
-            content: 'test content',
-          },
-        }),
-    };
+    const mockResponse = createMockResponse(
+      {
+        code: 200,
+        data: {
+          content: 'test content',
+        },
+      },
+      { ok: true },
+    );
 
     mockFetch.mockResolvedValue(mockResponse);
 
@@ -75,6 +90,7 @@ describe('jina crawler', () => {
         'Authorization': 'Bearer env-reader-key',
         'x-send-from': 'LobeChat Community',
       },
+      signal: expect.any(AbortSignal),
     });
 
     delete process.env.JINA_READER_API_KEY;
@@ -83,16 +99,15 @@ describe('jina crawler', () => {
   it('should use JINA_API_KEY from env if apiKey and JINA_READER_API_KEY not provided', async () => {
     process.env.JINA_API_KEY = 'env-key';
 
-    const mockResponse = {
-      ok: true,
-      json: () =>
-        Promise.resolve({
-          code: 200,
-          data: {
-            content: 'test content',
-          },
-        }),
-    };
+    const mockResponse = createMockResponse(
+      {
+        code: 200,
+        data: {
+          content: 'test content',
+        },
+      },
+      { ok: true },
+    );
 
     mockFetch.mockResolvedValue(mockResponse);
 
@@ -104,22 +119,22 @@ describe('jina crawler', () => {
         'Authorization': 'Bearer env-key',
         'x-send-from': 'LobeChat Community',
       },
+      signal: expect.any(AbortSignal),
     });
 
     delete process.env.JINA_API_KEY;
   });
 
   it('should send empty Authorization header if no api key provided', async () => {
-    const mockResponse = {
-      ok: true,
-      json: () =>
-        Promise.resolve({
-          code: 200,
-          data: {
-            content: 'test content',
-          },
-        }),
-    };
+    const mockResponse = createMockResponse(
+      {
+        code: 200,
+        data: {
+          content: 'test content',
+        },
+      },
+      { ok: true },
+    );
 
     mockFetch.mockResolvedValue(mockResponse);
 
@@ -131,11 +146,14 @@ describe('jina crawler', () => {
         'Authorization': '',
         'x-send-from': 'LobeChat Community',
       },
+      signal: expect.any(AbortSignal),
     });
   });
 
   it('should return undefined if response is not ok', async () => {
-    mockFetch.mockResolvedValue({ ok: false });
+    mockFetch.mockResolvedValue(
+      createMockResponse(null, { ok: false, status: 500, statusText: 'Internal Server Error' }),
+    );
 
     const result = await jina('https://example.com', { filterOptions: {} });
 
@@ -143,14 +161,13 @@ describe('jina crawler', () => {
   });
 
   it('should return undefined if response code is not 200', async () => {
-    const mockResponse = {
-      ok: true,
-      json: () =>
-        Promise.resolve({
-          code: 400,
-          message: 'Bad Request',
-        }),
-    };
+    const mockResponse = createMockResponse(
+      {
+        code: 400,
+        message: 'Bad Request',
+      },
+      { ok: true },
+    );
 
     mockFetch.mockResolvedValue(mockResponse);
 
@@ -159,11 +176,11 @@ describe('jina crawler', () => {
     expect(result).toBeUndefined();
   });
 
-  it('should return undefined if fetch throws error', async () => {
+  it('should throw error if fetch throws non-fetch-failed error', async () => {
     mockFetch.mockRejectedValue(new Error('Network error'));
 
-    const result = await jina('https://example.com', { filterOptions: {} });
-
-    expect(result).toBeUndefined();
+    await expect(jina('https://example.com', { filterOptions: {} })).rejects.toThrow(
+      'Network error',
+    );
   });
 });
diff --git a/packages/web-crawler/src/crawImpl/__tests__/naive.test.ts b/packages/web-crawler/src/crawImpl/__tests__/naive.test.ts
index 1848c95b9d..464a5653de 100644
--- a/packages/web-crawler/src/crawImpl/__tests__/naive.test.ts
+++ b/packages/web-crawler/src/crawImpl/__tests__/naive.test.ts
@@ -22,9 +22,10 @@ describe('naive crawler', () => {
     vi.clearAllMocks();
   });
 
-  it('should return undefined for normal pages (due to cloudflare logic)', async () => {
+  it('should return content for normal pages', async () => {
     const mockResponse = {
       status: 200,
+      ok: true,
       headers: new Map([['content-type', 'text/html']]),
       text: vi.fn().mockResolvedValue('<html><body>Test content</body></html>'),
     };
@@ -34,8 +35,8 @@ describe('naive crawler', () => {
 
     const { htmlToMarkdown } = await import('../../utils/htmlToMarkdown');
     vi.mocked(htmlToMarkdown).mockReturnValue({
-      content: 'Test content'.padEnd(101, ' '), // Ensure length > 100
-      title: 'Normal Page Title', // Not "Just a moment..." so it returns undefined
+      content: 'Test content'.padEnd(101, ' '),
+      title: 'Normal Page Title',
       description: 'Test description',
       siteName: 'Test Site',
       length: 101,
@@ -43,13 +44,22 @@ describe('naive crawler', () => {
 
     const result = await naive('https://example.com', { filterOptions: {} });
 
-    expect(result).toBeUndefined();
+    expect(result).toEqual({
+      content: 'Test content'.padEnd(101, ' '),
+      contentType: 'text',
+      description: 'Test description',
+      length: 101,
+      siteName: 'Test Site',
+      title: 'Normal Page Title',
+      url: 'https://example.com',
+    });
   });
 
   it('should successfully crawl JSON content', async () => {
     const mockJsonData = { message: 'Hello world', data: [1, 2, 3] };
     const mockResponse = {
       status: 200,
+      ok: true,
       headers: new Map([['content-type', 'application/json']]),
       clone: () => ({
         json: vi.fn().mockResolvedValue(mockJsonData),
@@ -74,6 +84,7 @@ describe('naive crawler', () => {
     const mockText = '{"invalid": json}';
     const mockResponse = {
       status: 200,
+      ok: true,
       headers: new Map([['content-type', 'application/json']]),
       clone: () => ({
         json: vi.fn().mockRejectedValue(new Error('Invalid JSON')),
@@ -97,6 +108,7 @@ describe('naive crawler', () => {
   it('should return undefined for short content', async () => {
     const mockResponse = {
       status: 200,
+      ok: true,
       headers: new Map([['content-type', 'text/html']]),
       text: vi.fn().mockResolvedValue('<html><body>Short</body></html>'),
     };
@@ -116,9 +128,10 @@ describe('naive crawler', () => {
     expect(result).toBeUndefined();
   });
 
-  it('should return content when NOT blocked by Cloudflare', async () => {
+  it('should return undefined when blocked by Cloudflare', async () => {
     const mockResponse = {
       status: 200,
+      ok: true,
       headers: new Map([['content-type', 'text/html']]),
       text: vi.fn().mockResolvedValue('<html><body>Normal content</body></html>'),
     };
@@ -129,7 +142,7 @@ describe('naive crawler', () => {
     const { htmlToMarkdown } = await import('../../utils/htmlToMarkdown');
     vi.mocked(htmlToMarkdown).mockReturnValue({
       content: 'Test content'.padEnd(101, ' '),
-      title: 'Just a moment...', // Cloudflare blocking page - this will cause return
+      title: 'Just a moment...', // Cloudflare blocking page
       description: 'Test description',
       siteName: 'Test Site',
       length: 101,
@@ -137,15 +150,21 @@ describe('naive crawler', () => {
 
     const result = await naive('https://example.com', { filterOptions: {} });
 
-    expect(result).toEqual({
-      content: 'Test content'.padEnd(101, ' '),
-      contentType: 'text',
-      description: 'Test description',
-      length: 101,
-      siteName: 'Test Site',
-      title: 'Just a moment...',
-      url: 'https://example.com',
-    });
+    expect(result).toBeUndefined();
+  });
+
+  it('should throw error for non-ok status codes', async () => {
+    const mockResponse = {
+      status: 500,
+      ok: false,
+      statusText: 'Internal Server Error',
+      text: vi.fn().mockResolvedValue('Server Error'),
+    };
+
+    const { withTimeout } = await import('../../utils/withTimeout');
+    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
+
+    await expect(naive('https://example.com', { filterOptions: {} })).rejects.toThrow(/500/);
   });
 
   it('should throw PageNotFoundError for 404 status', async () => {
@@ -164,7 +183,7 @@ describe('naive crawler', () => {
 
   it('should throw NetworkConnectionError for fetch failures', async () => {
     const { withTimeout } = await import('../../utils/withTimeout');
-    vi.mocked(withTimeout).mockRejectedValue(new Error('fetch failed'));
+    vi.mocked(withTimeout).mockRejectedValue(new TypeError('fetch failed'));
 
     await expect(naive('https://example.com', { filterOptions: {} })).rejects.toThrow(
       NetworkConnectionError,
@@ -194,6 +213,7 @@ describe('naive crawler', () => {
   it('should return undefined when HTML processing fails', async () => {
     const mockResponse = {
       status: 200,
+      ok: true,
       headers: new Map([['content-type', 'text/html']]),
       text: vi.fn().mockRejectedValue(new Error('Failed to read text')),
     };
@@ -209,6 +229,7 @@ describe('naive crawler', () => {
   it('should pass filter options to htmlToMarkdown', async () => {
     const mockResponse = {
       status: 200,
+      ok: true,
       headers: new Map([['content-type', 'text/html']]),
       text: vi.fn().mockResolvedValue('<html><body>Test content</body></html>'),
     };
diff --git a/packages/web-crawler/src/crawImpl/__tests__/search1api.test.ts b/packages/web-crawler/src/crawImpl/__tests__/search1api.test.ts
index 7c22728154..2e2981f71c 100644
--- a/packages/web-crawler/src/crawImpl/__tests__/search1api.test.ts
+++ b/packages/web-crawler/src/crawImpl/__tests__/search1api.test.ts
@@ -1,5 +1,6 @@
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 
+import { createMockResponse } from '../../test-utils';
 import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../../utils/errorType';
 import * as withTimeoutModule from '../../utils/withTimeout';
 import { search1api } from '../search1api';
@@ -17,8 +18,10 @@ describe('search1api crawler', () => {
     originalEnv = { ...process.env };
     process.env.SEARCH1API_API_KEY = 'test-api-key';
 
-    // Mock withTimeout to directly return the promise
-    vi.spyOn(withTimeoutModule, 'withTimeout').mockImplementation((promise) => promise);
+    // Mock withTimeout to call the factory function directly (bypassing real timeout)
+    vi.spyOn(withTimeoutModule, 'withTimeout').mockImplementation((fn) =>
+      fn(new AbortController().signal),
+    );
   });
 
   afterEach(() => {
@@ -26,7 +29,7 @@ describe('search1api crawler', () => {
   });
 
   it('should throw NetworkConnectionError when fetch fails', async () => {
-    mockFetch.mockRejectedValue(new Error('fetch failed'));
+    mockFetch.mockRejectedValue(new TypeError('fetch failed'));
 
     await expect(search1api('https://example.com', { filterOptions: {} })).rejects.toThrow(
       NetworkConnectionError,
@@ -48,11 +51,13 @@ describe('search1api crawler', () => {
   });
 
   it('should throw PageNotFoundError when status is 404', async () => {
-    mockFetch.mockResolvedValue({
-      ok: false,
-      status: 404,
-      statusText: 'Not Found',
-    });
+    mockFetch.mockResolvedValue(
+      createMockResponse('Not Found', {
+        ok: false,
+        status: 404,
+        statusText: 'Not Found',
+      }),
+    );
 
     await expect(search1api('https://example.com', { filterOptions: {} })).rejects.toThrow(
       PageNotFoundError,
@@ -60,11 +65,13 @@ describe('search1api crawler', () => {
   });
 
   it('should throw error for other failed responses', async () => {
-    mockFetch.mockResolvedValue({
-      ok: false,
-      status: 500,
-      statusText: 'Internal Server Error',
-    });
+    mockFetch.mockResolvedValue(
+      createMockResponse('', {
+        ok: false,
+        status: 500,
+        statusText: 'Internal Server Error',
+      }),
+    );
 
     await expect(search1api('https://example.com', { filterOptions: {} })).rejects.toThrow(
       'Search1API request failed with status 500: Internal Server Error',
@@ -72,18 +79,19 @@ describe('search1api crawler', () => {
   });
 
   it('should return undefined when content is too short', async () => {
-    mockFetch.mockResolvedValue({
-      ok: true,
-      json: () =>
-        Promise.resolve({
+    mockFetch.mockResolvedValue(
+      createMockResponse(
+        {
           crawlParameters: { url: 'https://example.com' },
           results: {
             title: 'Test Title',
             link: 'https://example.com',
             content: 'Short', // Less than 100 characters
           },
-        }),
-    });
+        },
+        { ok: true },
+      ),
+    );
 
     const result = await search1api('https://example.com', { filterOptions: {} });
     expect(result).toBeUndefined();
@@ -92,18 +100,19 @@ describe('search1api crawler', () => {
   it('should return crawl result on successful fetch', async () => {
     const mockContent = 'This is a test content that is longer than 100 characters. '.repeat(3);
 
-    mockFetch.mockResolvedValue({
-      ok: true,
-      json: () =>
-        Promise.resolve({
+    mockFetch.mockResolvedValue(
+      createMockResponse(
+        {
           crawlParameters: { url: 'https://example.com' },
           results: {
             title: 'Test Title',
             link: 'https://example.com',
             content: mockContent,
           },
-        }),
-    });
+        },
+        { ok: true },
+      ),
+    );
 
     const result = await search1api('https://example.com', { filterOptions: {} });
 
@@ -116,6 +125,7 @@ describe('search1api crawler', () => {
       body: JSON.stringify({
         url: 'https://example.com',
       }),
+      signal: expect.any(AbortSignal),
     });
 
     expect(result).toEqual({
@@ -130,12 +140,18 @@ describe('search1api crawler', () => {
   });
 
   it('should handle JSON parse errors', async () => {
-    mockFetch.mockResolvedValue({
-      ok: true,
-      json: () => Promise.reject(new Error('Invalid JSON')),
-    });
+    mockFetch.mockResolvedValue(createMockResponse('invalid json', { ok: true }));
+    // Override json to reject for this specific test
+    const response = createMockResponse('invalid json', { ok: true });
+    response.json = () => Promise.reject(new Error('Invalid JSON'));
+    // clone should also return a response whose text() works for error reporting
+    response.clone = () => {
+      const cloned = createMockResponse('invalid json', { ok: true });
+      cloned.json = () => Promise.reject(new Error('Invalid JSON'));
+      return cloned;
+    };
+    mockFetch.mockResolvedValue(response);
 
-    const result = await search1api('https://example.com', { filterOptions: {} });
-    expect(result).toBeUndefined();
+    await expect(search1api('https://example.com', { filterOptions: {} })).rejects.toThrow();
   });
 });
diff --git a/packages/web-crawler/src/crawImpl/__tests__/tavily.test.ts b/packages/web-crawler/src/crawImpl/__tests__/tavily.test.ts
index 3be81eab47..d8fee80e80 100644
--- a/packages/web-crawler/src/crawImpl/__tests__/tavily.test.ts
+++ b/packages/web-crawler/src/crawImpl/__tests__/tavily.test.ts
@@ -1,5 +1,6 @@
 import { beforeEach, describe, expect, it, vi } from 'vitest';
 
+import { createMockResponse } from '../../test-utils';
 import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../../utils/errorType';
 import { tavily } from '../tavily';
 
@@ -19,21 +20,18 @@ describe('tavily crawler', () => {
   it('should successfully crawl content with API key', async () => {
     process.env.TAVILY_API_KEY = 'test-api-key';
 
-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        base_url: 'https://api.tavily.com',
-        response_time: 1.5,
-        results: [
-          {
-            url: 'https://example.com',
-            raw_content:
-              'This is a test raw content with sufficient length to pass validation. '.repeat(3),
-            images: ['https://example.com/image1.jpg', 'https://example.com/image2.jpg'],
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      base_url: 'https://api.tavily.com',
+      response_time: 1.5,
+      results: [
+        {
+          url: 'https://example.com',
+          raw_content:
+            'This is a test raw content with sufficient length to pass validation. '.repeat(3),
+          images: ['https://example.com/image1.jpg', 'https://example.com/image2.jpg'],
+        },
+      ],
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@@ -50,69 +48,60 @@ describe('tavily crawler', () => {
       url: 'https://example.com',
     });
 
-    expect(withTimeout).toHaveBeenCalledWith(expect.any(Promise), 30000);
+    expect(withTimeout).toHaveBeenCalledWith(expect.any(Function), 30000);
   });
 
   it('should use custom extract depth when provided', async () => {
     process.env.TAVILY_API_KEY = 'test-api-key';
     process.env.TAVILY_EXTRACT_DEPTH = 'advanced';
 
-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        base_url: 'https://api.tavily.com',
-        response_time: 2.1,
-        results: [
-          {
-            url: 'https://example.com',
-            raw_content: 'Advanced extraction content with more details. '.repeat(5),
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      base_url: 'https://api.tavily.com',
+      response_time: 2.1,
+      results: [
+        {
+          url: 'https://example.com',
+          raw_content: 'Advanced extraction content with more details. '.repeat(5),
+        },
+      ],
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
 
     await tavily('https://example.com', { filterOptions: {} });
 
-    expect(withTimeout).toHaveBeenCalledWith(expect.any(Promise), 30000);
+    expect(withTimeout).toHaveBeenCalledWith(expect.any(Function), 30000);
   });
 
   it('should handle missing API key', async () => {
-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        base_url: 'https://api.tavily.com',
-        response_time: 1.2,
-        results: [
-          {
-            url: 'https://example.com',
-            raw_content: 'Test content with sufficient length. '.repeat(5),
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      base_url: 'https://api.tavily.com',
+      response_time: 1.2,
+      results: [
+        {
+          url: 'https://example.com',
+          raw_content: 'Test content with sufficient length. '.repeat(5),
+        },
+      ],
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
 
     await tavily('https://example.com', { filterOptions: {} });
 
-    expect(withTimeout).toHaveBeenCalledWith(expect.any(Promise), 30000);
+    expect(withTimeout).toHaveBeenCalledWith(expect.any(Function), 30000);
   });
 
   it('should return undefined when no results are returned', async () => {
     process.env.TAVILY_API_KEY = 'test-api-key';
 
-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        base_url: 'https://api.tavily.com',
-        response_time: 0.8,
-        results: [],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      base_url: 'https://api.tavily.com',
+      response_time: 0.8,
+      results: [],
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@@ -133,19 +122,16 @@ describe('tavily crawler', () => {
   it('should return undefined for short content', async () => {
     process.env.TAVILY_API_KEY = 'test-api-key';
 
-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        base_url: 'https://api.tavily.com',
-        response_time: 1.1,
-        results: [
-          {
-            url: 'https://example.com',
-            raw_content: 'Short', // Content too short
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      base_url: 'https://api.tavily.com',
+      response_time: 1.1,
+      results: [
+        {
+          url: 'https://example.com',
+          raw_content: 'Short', // Content too short
+        },
+      ],
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@@ -158,20 +144,17 @@ describe('tavily crawler', () => {
   it('should return undefined when raw_content is missing', async () => {
     process.env.TAVILY_API_KEY = 'test-api-key';
 
-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        base_url: 'https://api.tavily.com',
-        response_time: 1,
-        results: [
-          {
-            url: 'https://example.com',
-            // raw_content is missing
-            images: ['https://example.com/image.jpg'],
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      base_url: 'https://api.tavily.com',
+      response_time: 1,
+      results: [
+        {
+          url: 'https://example.com',
+          // raw_content is missing
+          images: ['https://example.com/image.jpg'],
+        },
+      ],
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@@ -184,11 +167,11 @@ describe('tavily crawler', () => {
   it('should throw PageNotFoundError for 404 status', async () => {
     process.env.TAVILY_API_KEY = 'test-api-key';
 
-    const mockResponse = {
+    const mockResponse = createMockResponse('Not Found', {
       ok: false,
       status: 404,
       statusText: 'Not Found',
-    };
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@@ -201,11 +184,11 @@ describe('tavily crawler', () => {
   it('should throw error for other HTTP errors', async () => {
     process.env.TAVILY_API_KEY = 'test-api-key';
 
-    const mockResponse = {
+    const mockResponse = createMockResponse('', {
       ok: false,
       status: 500,
       statusText: 'Internal Server Error',
-    };
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@@ -219,7 +202,7 @@ describe('tavily crawler', () => {
     process.env.TAVILY_API_KEY = 'test-api-key';
 
     const { withTimeout } = await import('../../utils/withTimeout');
-    vi.mocked(withTimeout).mockRejectedValue(new Error('fetch failed'));
+    vi.mocked(withTimeout).mockRejectedValue(new TypeError('fetch failed'));
 
     await expect(tavily('https://example.com', { filterOptions: {} })).rejects.toThrow(
       NetworkConnectionError,
@@ -252,43 +235,38 @@ describe('tavily crawler', () => {
     );
   });
 
-  it('should return undefined when JSON parsing fails', async () => {
+  it('should throw ResponseBodyParseError when JSON parsing fails', async () => {
     process.env.TAVILY_API_KEY = 'test-api-key';
 
-    const mockResponse = {
-      ok: true,
+    const mockResponse = createMockResponse('not json', { ok: true });
+    mockResponse.json = vi.fn().mockRejectedValue(new Error('Invalid JSON'));
+    mockResponse.clone.mockReturnValue({
+      ...mockResponse,
       json: vi.fn().mockRejectedValue(new Error('Invalid JSON')),
-    };
+      text: vi.fn().mockResolvedValue('not json'),
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
 
-    const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
-
-    const result = await tavily('https://example.com', { filterOptions: {} });
-
-    expect(result).toBeUndefined();
-    expect(consoleSpy).toHaveBeenCalled();
-
-    consoleSpy.mockRestore();
+    await expect(tavily('https://example.com', { filterOptions: {} })).rejects.toThrow(
+      'Tavily returned non-JSON response: not json',
+    );
   });
 
   it('should use result URL when available', async () => {
     process.env.TAVILY_API_KEY = 'test-api-key';
 
-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        base_url: 'https://api.tavily.com',
-        response_time: 1.3,
-        results: [
-          {
-            url: 'https://redirected.example.com',
-            raw_content: 'Test content with sufficient length. '.repeat(5),
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      base_url: 'https://api.tavily.com',
+      response_time: 1.3,
+      results: [
+        {
+          url: 'https://redirected.example.com',
+          raw_content: 'Test content with sufficient length. '.repeat(5),
+        },
+      ],
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@@ -301,19 +279,16 @@ describe('tavily crawler', () => {
   it('should fallback to original URL when result URL is missing', async () => {
     process.env.TAVILY_API_KEY = 'test-api-key';
 
-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        base_url: 'https://api.tavily.com',
-        response_time: 1.4,
-        results: [
-          {
-            raw_content: 'Test content with sufficient length. '.repeat(5),
-            // url is missing
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      base_url: 'https://api.tavily.com',
+      response_time: 1.4,
+      results: [
+        {
+          raw_content: 'Test content with sufficient length. '.repeat(5),
+          // url is missing
+        },
+      ],
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@@ -326,20 +301,17 @@ describe('tavily crawler', () => {
   it('should handle failed results in response', async () => {
     process.env.TAVILY_API_KEY = 'test-api-key';
 
-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        base_url: 'https://api.tavily.com',
-        response_time: 1.6,
-        results: [],
-        failed_results: [
-          {
-            url: 'https://example.com',
-            error: 'Page not accessible',
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      base_url: 'https://api.tavily.com',
+      response_time: 1.6,
+      results: [],
+      failed_results: [
+        {
+          url: 'https://example.com',
+          error: 'Page not accessible',
+        },
+      ],
+    });
 
     const { withTimeout } = await import('../../utils/withTimeout');
     vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
diff --git a/packages/web-crawler/src/crawImpl/browserless.ts b/packages/web-crawler/src/crawImpl/browserless.ts
index b07f11c6d1..44f4464983 100644
--- a/packages/web-crawler/src/crawImpl/browserless.ts
+++ b/packages/web-crawler/src/crawImpl/browserless.ts
@@ -2,7 +2,10 @@ import qs from 'query-string';
 import urlJoin from 'url-join';
 
 import type { CrawlImpl, CrawlSuccessResult } from '../type';
+import { PageNotFoundError, toFetchError } from '../utils/errorType';
 import { htmlToMarkdown } from '../utils/htmlToMarkdown';
+import { createHTTPStatusError } from '../utils/response';
+import { DEFAULT_TIMEOUT, withTimeout } from '../utils/withTimeout';
 
 const BASE_URL = process.env.BROWSERLESS_URL ?? 'https://chrome.browserless.io';
 // Allowed file types: html, css, js, json, xml, webmanifest, txt, md
@@ -31,46 +34,62 @@ export const browserless: CrawlImpl = async (url, { filterOptions }) => {
     url,
   };
 
+  let res: Response;
+
   try {
-    const res = await fetch(
-      qs.stringifyUrl({
-        query: {
-          blockAds: BROWSERLESS_BLOCK_ADS,
-          launch: JSON.stringify({ stealth: BROWSERLESS_STEALTH_MODE }),
-          token: BROWSERLESS_TOKEN,
-        },
-        url: urlJoin(BASE_URL, '/content'),
-      }),
-      {
-        body: JSON.stringify(input),
-        headers: {
-          'Content-Type': 'application/json',
-        },
-        method: 'POST',
-      },
+    res = await withTimeout(
+      (signal) =>
+        fetch(
+          qs.stringifyUrl({
+            query: {
+              blockAds: BROWSERLESS_BLOCK_ADS,
+              launch: JSON.stringify({ stealth: BROWSERLESS_STEALTH_MODE }),
+              token: BROWSERLESS_TOKEN,
+            },
+            url: urlJoin(BASE_URL, '/content'),
+          }),
+          {
+            body: JSON.stringify(input),
+            headers: {
+              'Content-Type': 'application/json',
+            },
+            method: 'POST',
+            signal,
+          },
+        ),
+      DEFAULT_TIMEOUT,
     );
-    const html = await res.text();
+  } catch (e) {
+    throw toFetchError(e);
+  }
 
-    const result = htmlToMarkdown(html, { filterOptions, url });
-
-    if (
-      !!result.content &&
-      result.title &&
-      // "Just a moment..." indicates being blocked by CloudFlare
-      result.title.trim() !== 'Just a moment...'
-    ) {
-      return {
-        content: result.content,
-        contentType: 'text',
-        description: result?.description,
-        length: result.length,
-        siteName: result?.siteName,
-        title: result?.title,
-        url,
-      } satisfies CrawlSuccessResult;
+  if (!res.ok) {
+    if (res.status === 404) {
+      throw new PageNotFoundError(res.statusText);
     }
-  } catch (error) {
-    console.error(error);
+
+    throw await createHTTPStatusError(res, 'Browserless');
+  }
+
+  const html = await res.text();
+  const result = htmlToMarkdown(html, { filterOptions, url });
+
+  if (
+    !!result.content &&
+    result.content.length > 100 &&
+    result.title &&
+    // "Just a moment..." indicates being blocked by CloudFlare
+    result.title.trim() !== 'Just a moment...'
+  ) {
+    return {
+      content: result.content,
+      contentType: 'text',
+      description: result?.description,
+      length: result.length,
+      siteName: result?.siteName,
+      title: result?.title,
+      url,
+    } satisfies CrawlSuccessResult;
   }
 
   return;
diff --git a/packages/web-crawler/src/crawImpl/exa.ts b/packages/web-crawler/src/crawImpl/exa.ts
index dd4e031b85..f1e15a770b 100644
--- a/packages/web-crawler/src/crawImpl/exa.ts
+++ b/packages/web-crawler/src/crawImpl/exa.ts
@@ -1,5 +1,6 @@
 import type { CrawlImpl, CrawlSuccessResult } from '../type';
-import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../utils/errorType';
+import { PageNotFoundError, toFetchError } from '../utils/errorType';
+import { createHTTPStatusError, parseJSONResponse } from '../utils/response';
 import { DEFAULT_TIMEOUT, withTimeout } from '../utils/withTimeout';
 
 interface ExaResults {
@@ -27,31 +28,24 @@ export const exa: CrawlImpl = async (url) => {
 
   try {
     res = await withTimeout(
-      fetch('https://api.exa.ai/contents', {
-        body: JSON.stringify({
-          livecrawl: 'fallback', // always, fallback
-          text: true,
-          urls: [url],
+      (signal) =>
+        fetch('https://api.exa.ai/contents', {
+          body: JSON.stringify({
+            livecrawl: 'fallback', // always, fallback
+            text: true,
+            urls: [url],
+          }),
+          headers: {
+            'Content-Type': 'application/json',
+            'x-api-key': !apiKey ? '' : apiKey,
+          },
+          method: 'POST',
+          signal,
         }),
-        headers: {
-          'Content-Type': 'application/json',
-          'x-api-key': !apiKey ? '' : apiKey,
-        },
-        method: 'POST',
-      }),
       DEFAULT_TIMEOUT,
     );
   } catch (e) {
-    const error = e as Error;
-    if (error.message === 'fetch failed') {
-      throw new NetworkConnectionError();
-    }
-
-    if (error instanceof TimeoutError) {
-      throw error;
-    }
-
-    throw e;
+    throw toFetchError(e);
   }
 
   if (!res.ok) {
@@ -59,35 +53,29 @@ export const exa: CrawlImpl = async (url) => {
       throw new PageNotFoundError(res.statusText);
     }
 
-    throw new Error(`Exa request failed with status ${res.status}: ${res.statusText}`);
+    throw await createHTTPStatusError(res, 'Exa');
   }
 
-  try {
-    const data = (await res.json()) as ExaResponse;
+  const data = await parseJSONResponse<ExaResponse>(res, 'Exa');
 
-    if (!data.results || data.results.length === 0) {
-      console.warn('Exa API returned no results for URL:', url);
-      return;
-    }
-
-    const firstResult = data.results[0];
-
-    // Check if content is empty or too short
-    if (!firstResult.text || firstResult.text.length < 100) {
-      return;
-    }
-
-    return {
-      content: firstResult.text,
-      contentType: 'text',
-      length: firstResult.text.length,
-      siteName: new URL(url).hostname,
-      title: firstResult.title,
-      url: firstResult.url || url,
-    } satisfies CrawlSuccessResult;
-  } catch (error) {
-    console.error(error);
+  if (!data.results || data.results.length === 0) {
+    console.warn('Exa API returned no results for URL:', url);
+    return;
   }
 
-  return;
+  const firstResult = data.results[0];
+
+  // Check if content is empty or too short
+  if (!firstResult.text || firstResult.text.length < 100) {
+    return;
+  }
+
+  return {
+    content: firstResult.text,
+    contentType: 'text',
+    length: firstResult.text.length,
+    siteName: new URL(url).hostname,
+    title: firstResult.title,
+    url: firstResult.url || url,
+  } satisfies CrawlSuccessResult;
 };
diff --git a/packages/web-crawler/src/crawImpl/firecrawl.ts b/packages/web-crawler/src/crawImpl/firecrawl.ts
index e63e1b7338..74d9902be1 100644
--- a/packages/web-crawler/src/crawImpl/firecrawl.ts
+++ b/packages/web-crawler/src/crawImpl/firecrawl.ts
@@ -1,5 +1,6 @@
 import type { CrawlImpl, CrawlSuccessResult } from '../type';
-import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../utils/errorType';
+import { PageNotFoundError, toFetchError } from '../utils/errorType';
+import { createHTTPStatusError, parseJSONResponse } from '../utils/response';
 import { DEFAULT_TIMEOUT, withTimeout } from '../utils/withTimeout';
 
 interface FirecrawlMetadata {
@@ -57,30 +58,23 @@ export const firecrawl: CrawlImpl = async (url) => {
 
   try {
     res = await withTimeout(
-      fetch(`${baseUrl}/scrape`, {
-        body: JSON.stringify({
-          formats: ['markdown'], // ["markdown", "html"]
-          url,
+      (signal) =>
+        fetch(`${baseUrl}/scrape`, {
+          body: JSON.stringify({
+            formats: ['markdown'], // ["markdown", "html"]
+            url,
+          }),
+          headers: {
+            'Authorization': !apiKey ? '' : `Bearer ${apiKey}`,
+            'Content-Type': 'application/json',
+          },
+          method: 'POST',
+          signal,
         }),
-        headers: {
-          'Authorization': !apiKey ? '' : `Bearer ${apiKey}`,
-          'Content-Type': 'application/json',
-        },
-        method: 'POST',
-      }),
       DEFAULT_TIMEOUT,
     );
   } catch (e) {
-    const error = e as Error;
-    if (error.message === 'fetch failed') {
-      throw new NetworkConnectionError();
-    }
-
-    if (error instanceof TimeoutError) {
-      throw error;
-    }
-
-    throw e;
+    throw toFetchError(e);
   }
 
   if (!res.ok) {
@@ -88,37 +82,34 @@ export const firecrawl: CrawlImpl = async (url) => {
       throw new PageNotFoundError(res.statusText);
     }
 
-    throw new Error(`Firecrawl request failed with status ${res.status}: ${res.statusText}`);
+    throw await createHTTPStatusError(res, 'Firecrawl');
   }
 
-  try {
-    const data = (await res.json()) as FirecrawlResponse;
-
-    if (data.data.warning) {
-      console.warn('[Firecrawl] Warning:', data.data.warning);
-    }
-
-    if (data.data.metadata.error) {
-      console.error('[Firecrawl] Metadata error:', data.data.metadata.error);
-    }
-
-    // Check if content is empty or too short
-    if (!data.data.markdown || data.data.markdown.length < 100) {
-      return;
-    }
-
-    return {
-      content: data.data.markdown,
-      contentType: 'text',
-      description: data.data.metadata.description || '',
-      length: data.data.markdown.length,
-      siteName: new URL(url).hostname,
-      title: data.data.metadata.title || '',
-      url: url,
-    } satisfies CrawlSuccessResult;
-  } catch (error) {
-    console.error('[Firecrawl] Parse error:', error);
+  const data = await parseJSONResponse<FirecrawlResponse>(res, 'Firecrawl');
+  if (!data.data) {
+    throw new Error('Firecrawl response missing data field');
   }
 
-  return;
+  if (data.data.warning) {
+    console.warn('[Firecrawl] Warning:', data.data.warning);
+  }
+
+  if (data.data.metadata.error) {
+    console.error('[Firecrawl] Metadata error:', data.data.metadata.error);
+  }
+
+  // Check if content is empty or too short
+  if (!data.data.markdown || data.data.markdown.length < 100) {
+    return;
+  }
+
+  return {
+    content: data.data.markdown,
+    contentType: 'text',
+    description: data.data.metadata.description || '',
+    length: data.data.markdown.length,
+    siteName: new URL(url).hostname,
+    title: data.data.metadata.title || '',
+    url,
+  } satisfies CrawlSuccessResult;
 };
diff --git a/packages/web-crawler/src/crawImpl/jina.ts b/packages/web-crawler/src/crawImpl/jina.ts
index d4fe9ba7b8..ccdb9998ef 100644
--- a/packages/web-crawler/src/crawImpl/jina.ts
+++ b/packages/web-crawler/src/crawImpl/jina.ts
@@ -1,37 +1,59 @@
 import type { CrawlImpl } from '../type';
+import { toFetchError } from '../utils/errorType';
+import { parseJSONResponse } from '../utils/response';
+import { DEFAULT_TIMEOUT, withTimeout } from '../utils/withTimeout';
 
 export const jina: CrawlImpl<{ apiKey?: string }> = async (url, params) => {
   const token = params.apiKey ?? process.env.JINA_READER_API_KEY ?? process.env.JINA_API_KEY;
+  let res: Response;
 
   try {
-    const res = await fetch(`https://r.jina.ai/${url}`, {
-      headers: {
-        'Accept': 'application/json',
-        'Authorization': token ? `Bearer ${token}` : '',
-        'x-send-from': 'LobeChat Community',
-      },
-    });
-
-    if (res.ok) {
-      const json = await res.json();
-      if (json.code === 200) {
-        const result = json.data;
-        return {
-          content: result.content,
-          contentType: 'text',
-          description: result?.description,
-          length: result.content.length,
-          siteName: result?.siteName,
-          title: result?.title,
-          url: url,
-        };
-      }
-
-      throw json;
-    }
-  } catch (error) {
-    console.error(error);
+    res = await withTimeout(
+      (signal) =>
+        fetch(`https://r.jina.ai/${url}`, {
+          headers: {
+            'Accept': 'application/json',
+            'Authorization': token ? `Bearer ${token}` : '',
+            'x-send-from': 'LobeChat Community',
+          },
+          signal,
+        }),
+      DEFAULT_TIMEOUT,
+    );
+  } catch (e) {
+    throw toFetchError(e);
   }
 
-  return;
+  if (!res.ok) {
+    return;
+  }
+
+  const json = await parseJSONResponse<{
+    code: number;
+    data: {
+      content: string;
+      description?: string;
+      siteName?: string;
+      title?: string;
+    };
+  }>(res, 'Jina');
+
+  if (json.code !== 200) {
+    return;
+  }
+
+  const result = json.data;
+  if (!result?.content || result.content.length < 100) {
+    return;
+  }
+
+  return {
+    content: result.content,
+    contentType: 'text',
+    description: result?.description,
+    length: result.content.length,
+    siteName: result?.siteName,
+    title: result?.title,
+    url,
+  };
 };
diff --git a/packages/web-crawler/src/crawImpl/naive.ts b/packages/web-crawler/src/crawImpl/naive.ts
index f0af72f5ba..05f780b06e 100755
--- a/packages/web-crawler/src/crawImpl/naive.ts
+++ b/packages/web-crawler/src/crawImpl/naive.ts
@@ -1,8 +1,9 @@
 import { ssrfSafeFetch } from '@lobechat/ssrf-safe-fetch';
 
 import type { CrawlImpl, CrawlSuccessResult } from '../type';
-import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../utils/errorType';
+import { PageNotFoundError, toFetchError } from '../utils/errorType';
 import { htmlToMarkdown } from '../utils/htmlToMarkdown';
+import { createHTTPStatusError } from '../utils/response';
 import { DEFAULT_TIMEOUT, withTimeout } from '../utils/withTimeout';
 
 const mixinHeaders = {
@@ -39,28 +40,25 @@ export const naive: CrawlImpl = async (url, { filterOptions }) => {
 
   try {
     res = await withTimeout(
-      ssrfSafeFetch(url, {
-        headers: mixinHeaders,
-        signal: new AbortController().signal,
-      }),
+      (signal) =>
+        ssrfSafeFetch(url, {
+          headers: mixinHeaders,
+          signal,
+        }),
       DEFAULT_TIMEOUT,
     );
   } catch (e) {
-    const error = e as Error;
-    if (error.message === 'fetch failed') {
-      throw new NetworkConnectionError();
-    }
-
-    if (error instanceof TimeoutError) {
-      throw error;
-    }
-
-    throw e;
+    throw toFetchError(e);
   }
 
   if (res.status === 404) {
     throw new PageNotFoundError(res.statusText);
   }
+
+  if (!res.ok) {
+    throw await createHTTPStatusError(res, 'Naive');
+  }
+
   const type = res.headers.get('content-type');
 
   if (type?.includes('application/json')) {
@@ -74,7 +72,7 @@ export const naive: CrawlImpl = async (url, { filterOptions }) => {
     }
 
     return {
-      content: content,
+      content,
       contentType: 'json',
       length: content.length,
       url,
@@ -91,8 +89,8 @@ export const naive: CrawlImpl = async (url, { filterOptions }) => {
       return;
     }
 
-    // it's blocked by cloudflare
-    if (result.title !== 'Just a moment...') {
+    // It's blocked by Cloudflare.
+    if (result.title === 'Just a moment...') {
       return;
     }
 
diff --git a/packages/web-crawler/src/crawImpl/search1api.ts b/packages/web-crawler/src/crawImpl/search1api.ts
index 11e826a5a1..edc6f690f1 100644
--- a/packages/web-crawler/src/crawImpl/search1api.ts
+++ b/packages/web-crawler/src/crawImpl/search1api.ts
@@ -1,5 +1,6 @@
 import type { CrawlImpl, CrawlSuccessResult } from '../type';
-import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../utils/errorType';
+import { PageNotFoundError, toFetchError } from '../utils/errorType';
+import { createHTTPStatusError, parseJSONResponse } from '../utils/response';
 import { DEFAULT_TIMEOUT, withTimeout } from '../utils/withTimeout';
 
 interface Search1ApiResponse {
@@ -21,29 +22,22 @@ export const search1api: CrawlImpl = async (url) => {
 
   try {
     res = await withTimeout(
-      fetch('https://api.search1api.com/crawl', {
-        body: JSON.stringify({
-          url,
+      (signal) =>
+        fetch('https://api.search1api.com/crawl', {
+          body: JSON.stringify({
+            url,
+          }),
+          headers: {
+            'Authorization': !apiKey ? '' : `Bearer ${apiKey}`,
+            'Content-Type': 'application/json',
+          },
+          method: 'POST',
+          signal,
         }),
-        headers: {
-          'Authorization': !apiKey ? '' : `Bearer ${apiKey}`,
-          'Content-Type': 'application/json',
-        },
-        method: 'POST',
-      }),
       DEFAULT_TIMEOUT,
     );
   } catch (e) {
-    const error = e as Error;
-    if (error.message === 'fetch failed') {
-      throw new NetworkConnectionError();
-    }
-
-    if (error instanceof TimeoutError) {
-      throw error;
-    }
-
-    throw e;
+    throw toFetchError(e);
   }
 
   if (!res.ok) {
@@ -51,30 +45,24 @@ export const search1api: CrawlImpl = async (url) => {
       throw new PageNotFoundError(res.statusText);
     }
 
-    throw new Error(`Search1API request failed with status ${res.status}: ${res.statusText}`);
+    throw await createHTTPStatusError(res, 'Search1API');
   }
 
-  try {
-    const data = (await res.json()) as Search1ApiResponse;
+  const data = await parseJSONResponse<Search1ApiResponse>(res, 'Search1API');
 
-    // Check if content is empty or too short
-    if (!data.results.content || data.results.content.length < 100) {
-      return;
-    }
-
-    return {
-      content: data.results.content,
-      contentType: 'text',
-      description: data.results.title,
-      // Using title as description since API doesn't provide a separate description
-      length: data.results.content.length,
-      siteName: new URL(url).hostname,
-      title: data.results.title,
-      url: data.results.link || url,
-    } satisfies CrawlSuccessResult;
-  } catch (error) {
-    console.error(error);
+  // Check if content is empty or too short
+  if (!data.results?.content || data.results.content.length < 100) {
+    return;
   }
 
-  return;
+  return {
+    content: data.results.content,
+    contentType: 'text',
+    description: data.results?.title,
+    // Using title as description since API doesn't provide a separate description
+    length: data.results.content.length,
+    siteName: new URL(url).hostname,
+    title: data.results?.title,
+    url: data.results?.link || url,
+  } satisfies CrawlSuccessResult;
 };
diff --git a/packages/web-crawler/src/crawImpl/tavily.ts b/packages/web-crawler/src/crawImpl/tavily.ts
index 4adbb7eebc..95e6d56b2b 100644
--- a/packages/web-crawler/src/crawImpl/tavily.ts
+++ b/packages/web-crawler/src/crawImpl/tavily.ts
@@ -1,5 +1,6 @@
 import type { CrawlImpl, CrawlSuccessResult } from '../type';
-import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../utils/errorType';
+import { PageNotFoundError, toFetchError } from '../utils/errorType';
+import { createHTTPStatusError, parseJSONResponse } from '../utils/response';
 import { DEFAULT_TIMEOUT, withTimeout } from '../utils/withTimeout';
 
 interface TavilyResults {
@@ -28,31 +29,24 @@ export const tavily: CrawlImpl = async (url) => {
 
   try {
     res = await withTimeout(
-      fetch('https://api.tavily.com/extract', {
-        body: JSON.stringify({
-          extract_depth: process.env.TAVILY_EXTRACT_DEPTH || 'basic', // basic or advanced
-          include_images: false,
-          urls: url,
+      (signal) =>
+        fetch('https://api.tavily.com/extract', {
+          body: JSON.stringify({
+            extract_depth: process.env.TAVILY_EXTRACT_DEPTH || 'basic', // basic or advanced
+            include_images: false,
+            urls: url,
+          }),
+          headers: {
+            'Authorization': !apiKey ? '' : `Bearer ${apiKey}`,
+            'Content-Type': 'application/json',
+          },
+          method: 'POST',
+          signal,
         }),
-        headers: {
-          'Authorization': !apiKey ? '' : `Bearer ${apiKey}`,
-          'Content-Type': 'application/json',
-        },
-        method: 'POST',
-      }),
       DEFAULT_TIMEOUT,
     );
   } catch (e) {
-    const error = e as Error;
-    if (error.message === 'fetch failed') {
-      throw new NetworkConnectionError();
-    }
-
-    if (error instanceof TimeoutError) {
-      throw error;
-    }
-
-    throw e;
+    throw toFetchError(e);
   }
 
   if (!res.ok) {
@@ -60,35 +54,29 @@ export const tavily: CrawlImpl = async (url) => {
       throw new PageNotFoundError(res.statusText);
     }
 
-    throw new Error(`Tavily request failed with status ${res.status}: ${res.statusText}`);
+    throw await createHTTPStatusError(res, 'Tavily');
   }
 
-  try {
-    const data = (await res.json()) as TavilyResponse;
+  const data = await parseJSONResponse<TavilyResponse>(res, 'Tavily');
 
-    if (!data.results || data.results.length === 0) {
-      console.warn('Tavily API returned no results for URL:', url);
-      return;
-    }
-
-    const firstResult = data.results[0];
-
-    // Check if content is empty or too short
-    if (!firstResult.raw_content || firstResult.raw_content.length < 100) {
-      return;
-    }
-
-    return {
-      content: firstResult.raw_content,
-      contentType: 'text',
-      length: firstResult.raw_content.length,
-      siteName: new URL(url).hostname,
-      title: new URL(url).hostname,
-      url: firstResult.url || url,
-    } satisfies CrawlSuccessResult;
-  } catch (error) {
-    console.error(error);
+  if (!data.results || data.results.length === 0) {
+    console.warn('Tavily API returned no results for URL:', url);
+    return;
   }
 
-  return;
+  const firstResult = data.results[0];
+
+  // Check if content is empty or too short
+  if (!firstResult.raw_content || firstResult.raw_content.length < 100) {
+    return;
+  }
+
+  return {
+    content: firstResult.raw_content,
+    contentType: 'text',
+    length: firstResult.raw_content.length,
+    siteName: new URL(url).hostname,
+    title: new URL(url).hostname,
+    url: firstResult.url || url,
+  } satisfies CrawlSuccessResult;
 };
diff --git a/packages/web-crawler/src/crawler.ts b/packages/web-crawler/src/crawler.ts
index d14669c3f6..14dcf1362f 100644
--- a/packages/web-crawler/src/crawler.ts
+++ b/packages/web-crawler/src/crawler.ts
@@ -59,13 +59,18 @@ export class Crawler {
       try {
         const res = await crawlImpls[impl](transformedUrl, { filterOptions: mergedFilterOptions });
 
-        if (res && res.content && res.content?.length > 100)
+        if (res && res.content && res.content.length > 100) {
           return {
             crawler: impl,
             data: res,
             originalUrl: url,
             transformedUrl: transformedUrl !== url ? transformedUrl : undefined,
           };
+        }
+
+        finalError = new Error(`${impl} returned empty or short content`);
+        finalError.name = 'EmptyCrawlResultError';
+        finalCrawler = impl;
       } catch (error) {
         console.error(error);
         finalError = error as Error;
@@ -77,10 +82,10 @@ export class Crawler {
     const errorMessage = finalError?.message;
 
     return {
-      crawler: finalCrawler!,
+      crawler: finalCrawler || finalImpls.at(-1) || 'unknown',
       data: {
         content: `Fail to crawl the page. Error type: ${errorType}, error message: ${errorMessage}`,
-        errorMessage: errorMessage,
+        errorMessage,
         errorType,
       },
       originalUrl: url,
diff --git a/packages/web-crawler/src/test-utils.ts b/packages/web-crawler/src/test-utils.ts
new file mode 100644
index 0000000000..0fc80b4b64
--- /dev/null
+++ b/packages/web-crawler/src/test-utils.ts
@@ -0,0 +1,25 @@
+import { vi } from 'vitest';
+
+/**
+ * Create a mock Response object for crawler tests.
+ * Uses `vi.fn()` for `json`, `text`, and `clone` so individual tests can override them.
+ */
+export const createMockResponse = (
+  body: any,
+  opts: { ok: boolean; status?: number; statusText?: string } = { ok: true },
+) => {
+  const self: any = {
+    ok: opts.ok,
+    status: opts.status ?? (opts.ok ? 200 : 500),
+    statusText: opts.statusText ?? (opts.ok ? 'OK' : 'Internal Server Error'),
+    json: vi.fn().mockResolvedValue(body),
+    text: vi.fn().mockResolvedValue(typeof body === 'string' ? body : JSON.stringify(body)),
+    clone: vi.fn(),
+  };
+  self.clone.mockReturnValue({
+    ...self,
+    json: vi.fn().mockResolvedValue(body),
+    text: vi.fn().mockResolvedValue(typeof body === 'string' ? body : JSON.stringify(body)),
+  });
+  return self;
+};
diff --git a/packages/web-crawler/src/utils/appUrlRules.test.ts b/packages/web-crawler/src/utils/__tests__/appUrlRules.test.ts
similarity index 98%
rename from packages/web-crawler/src/utils/appUrlRules.test.ts
rename to packages/web-crawler/src/utils/__tests__/appUrlRules.test.ts
index abc1ff971a..e3ecb33a20 100644
--- a/packages/web-crawler/src/utils/appUrlRules.test.ts
+++ b/packages/web-crawler/src/utils/__tests__/appUrlRules.test.ts
@@ -1,6 +1,6 @@
 import { describe, expect, it } from 'vitest';
 
-import { applyUrlRules } from './appUrlRules';
+import { applyUrlRules } from '../appUrlRules';
 
 describe('applyUrlRules', () => {
   // @gru-agent github file rules 不要改
diff --git a/packages/web-crawler/src/utils/__tests__/errorType.test.ts b/packages/web-crawler/src/utils/__tests__/errorType.test.ts
index ba06111b25..d2ad87c793 100644
--- a/packages/web-crawler/src/utils/__tests__/errorType.test.ts
+++ b/packages/web-crawler/src/utils/__tests__/errorType.test.ts
@@ -1,6 +1,12 @@
 import { describe, expect, it } from 'vitest';
 
-import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../errorType';
+import {
+  isFetchNetworkError,
+  NetworkConnectionError,
+  PageNotFoundError,
+  TimeoutError,
+  toFetchError,
+} from '../errorType';
 
 describe('errorType', () => {
   describe('PageNotFoundError', () => {
@@ -170,6 +176,43 @@ describe('errorType', () => {
     });
   });
 
+  describe('isFetchNetworkError', () => {
+    it('should return true for TypeError with "fetch failed" message', () => {
+      expect(isFetchNetworkError(new TypeError('fetch failed'))).toBe(true);
+    });
+
+    it('should return false for plain Error with "fetch failed" message', () => {
+      expect(isFetchNetworkError(new Error('fetch failed'))).toBe(false);
+    });
+
+    it('should return false for TypeError with different message', () => {
+      expect(isFetchNetworkError(new TypeError('something else'))).toBe(false);
+    });
+
+    it('should return false for non-error values', () => {
+      expect(isFetchNetworkError('fetch failed')).toBe(false);
+      expect(isFetchNetworkError(null)).toBe(false);
+      expect(isFetchNetworkError(undefined)).toBe(false);
+    });
+  });
+
+  describe('toFetchError', () => {
+    it('should return NetworkConnectionError for fetch network errors', () => {
+      const result = toFetchError(new TypeError('fetch failed'));
+      expect(result).toBeInstanceOf(NetworkConnectionError);
+    });
+
+    it('should return TimeoutError as-is', () => {
+      const timeout = new TimeoutError('Request timeout after 10000ms');
+      expect(toFetchError(timeout)).toBe(timeout);
+    });
+
+    it('should return unknown errors unchanged', () => {
+      const unknown = new Error('something unexpected');
+      expect(toFetchError(unknown)).toBe(unknown);
+    });
+  });
+
   describe('error catching scenarios', () => {
     it('should allow catching specific error types', () => {
       const testErrors = [
diff --git a/packages/web-crawler/src/utils/__tests__/response.test.ts b/packages/web-crawler/src/utils/__tests__/response.test.ts
new file mode 100644
index 0000000000..cbd434e732
--- /dev/null
+++ b/packages/web-crawler/src/utils/__tests__/response.test.ts
@@ -0,0 +1,102 @@
+import { describe, expect, it } from 'vitest';
+
+import { createHTTPStatusError, parseJSONResponse, ResponseBodyParseError } from '../response';
+
+const createMockResponse = (
+  body: string,
+  options: { ok?: boolean; status?: number; statusText?: string } = {},
+) => {
+  const { ok = true, status = 200, statusText = 'OK' } = options;
+  return new Response(body, {
+    status,
+    statusText,
+    headers: { 'Content-Type': ok ? 'application/json' : 'text/html' },
+  });
+};
+
+describe('ResponseBodyParseError', () => {
+  it('should create error with provider and body snippet', () => {
+    const error = new ResponseBodyParseError('Jina', '<html>error</html>');
+    expect(error.message).toBe('Jina returned non-JSON response: <html>error</html>');
+    expect(error.name).toBe('ResponseBodyParseError');
+  });
+
+  it('should create error without body snippet', () => {
+    const error = new ResponseBodyParseError('Firecrawl');
+    expect(error.message).toBe('Firecrawl returned non-JSON response');
+  });
+});
+
+describe('parseJSONResponse', () => {
+  it('should parse valid JSON response', async () => {
+    const data = { code: 200, results: ['a', 'b'] };
+    const response = createMockResponse(JSON.stringify(data));
+
+    const result = await parseJSONResponse<typeof data>(response, 'TestProvider');
+
+    expect(result).toEqual(data);
+  });
+
+  it('should throw ResponseBodyParseError for non-JSON response', async () => {
+    const response = createMockResponse('<html><body>Error</body></html>');
+
+    await expect(parseJSONResponse(response, 'Jina')).rejects.toThrow(ResponseBodyParseError);
+    await expect(
+      parseJSONResponse(createMockResponse('<html><body>Error</body></html>'), 'Jina'),
+    ).rejects.toThrow('Jina returned non-JSON response');
+  });
+
+  it('should include body snippet in error for non-JSON response', async () => {
+    const htmlBody = '<html><body>Internal Server Error</body></html>';
+    const response = createMockResponse(htmlBody);
+
+    await expect(parseJSONResponse(response, 'Firecrawl')).rejects.toThrow(
+      /Firecrawl returned non-JSON response: .*Internal Server Error/,
+    );
+  });
+
+  it('should handle empty response body', async () => {
+    const response = createMockResponse('');
+
+    await expect(parseJSONResponse(response, 'TestProvider')).rejects.toThrow(
+      'TestProvider returned non-JSON response',
+    );
+  });
+});
+
+describe('createHTTPStatusError', () => {
+  it('should create error with status and body snippet', async () => {
+    const response = createMockResponse('Not Found', {
+      ok: false,
+      status: 404,
+      statusText: 'Not Found',
+    });
+
+    const error = await createHTTPStatusError(response, 'Exa');
+
+    expect(error.message).toContain('Exa request failed with status 404: Not Found');
+    expect(error.message).toContain('Not Found');
+  });
+
+  it('should create error without body when response text fails', async () => {
+    const response = createMockResponse('', {
+      ok: false,
+      status: 500,
+      statusText: 'Internal Server Error',
+    });
+
+    const error = await createHTTPStatusError(response, 'Tavily');
+
+    expect(error.message).toBe('Tavily request failed with status 500: Internal Server Error');
+  });
+
+  it('should truncate long body snippets', async () => {
+    const longBody = 'x'.repeat(500);
+    const response = createMockResponse(longBody, { ok: false, status: 500, statusText: 'Error' });
+
+    const error = await createHTTPStatusError(response, 'Test');
+
+    // Body snippet should be truncated to 200 chars
+    expect(error.message.length).toBeLessThan(500 + 100);
+  });
+});
diff --git a/packages/web-crawler/src/utils/__tests__/withTimeout.test.ts b/packages/web-crawler/src/utils/__tests__/withTimeout.test.ts
index 3b52aee4cf..2445b75522 100644
--- a/packages/web-crawler/src/utils/__tests__/withTimeout.test.ts
+++ b/packages/web-crawler/src/utils/__tests__/withTimeout.test.ts
@@ -12,18 +12,18 @@ describe('withTimeout', () => {
     vi.useRealTimers();
   });
 
-  it('should resolve when promise resolves before timeout', async () => {
-    const promise = Promise.resolve('success');
-    const result = await withTimeout(promise, 1000);
+  it('should resolve when factory function resolves before timeout', async () => {
+    const result = await withTimeout(() => Promise.resolve('success'), 1000);
     expect(result).toBe('success');
   });
 
-  it('should reject with TimeoutError when promise takes too long', async () => {
-    const slowPromise = new Promise((resolve) => {
-      setTimeout(() => resolve('too late'), 200);
-    });
+  it('should reject with TimeoutError when factory takes too long', async () => {
+    const fn = () =>
+      new Promise((resolve) => {
+        setTimeout(() => resolve('too late'), 200);
+      });
 
-    const timeoutPromise = withTimeout(slowPromise, 100);
+    const timeoutPromise = withTimeout(fn, 100);
     vi.advanceTimersByTime(100);
 
     await expect(timeoutPromise).rejects.toThrow(TimeoutError);
@@ -31,32 +31,70 @@ describe('withTimeout', () => {
   });
 
   it('should use DEFAULT_TIMEOUT when no timeout specified', async () => {
-    const slowPromise = new Promise((resolve) => {
-      setTimeout(() => resolve('success'), DEFAULT_TIMEOUT + 100);
-    });
+    const fn = () =>
+      new Promise((resolve) => {
+        setTimeout(() => resolve('success'), DEFAULT_TIMEOUT + 100);
+      });
 
-    const timeoutPromise = withTimeout(slowPromise);
+    const timeoutPromise = withTimeout(fn);
     vi.advanceTimersByTime(DEFAULT_TIMEOUT);
 
     await expect(timeoutPromise).rejects.toThrow(TimeoutError);
     await expect(timeoutPromise).rejects.toThrow(`Request timeout after ${DEFAULT_TIMEOUT}ms`);
   });
 
-  it('should reject with original error if promise rejects before timeout', async () => {
+  it('should reject with original error if factory rejects before timeout', async () => {
     const error = new Error('Original error');
-    const failingPromise = Promise.reject(error);
+    const fn = () => Promise.reject(error);
 
-    await expect(withTimeout(failingPromise, 1000)).rejects.toThrow('Original error');
+    await expect(withTimeout(fn, 1000)).rejects.toThrow('Original error');
   });
 
-  it('should abort controller when timeout occurs', async () => {
-    const slowPromise = new Promise((resolve) => {
-      setTimeout(() => resolve('too late'), 2000);
-    });
+  it('should pass AbortSignal to the factory function', async () => {
+    const factoryFn = vi.fn().mockResolvedValue('result');
+    await withTimeout(factoryFn, 1000);
 
-    const timeoutPromise = withTimeout(slowPromise, 1000);
-    vi.advanceTimersByTime(1000);
+    expect(factoryFn).toHaveBeenCalledTimes(1);
+    const signal = factoryFn.mock.calls[0][0];
+    expect(signal).toBeInstanceOf(AbortSignal);
+    expect(signal.aborted).toBe(false);
+  });
 
+  it('should abort the signal when timeout occurs', async () => {
+    let capturedSignal: AbortSignal | undefined;
+    const fn = (signal: AbortSignal) => {
+      capturedSignal = signal;
+      return new Promise((resolve) => {
+        setTimeout(() => resolve('too late'), 2000);
+      });
+    };
+
+    const timeoutPromise = withTimeout(fn, 100);
+    expect(capturedSignal!.aborted).toBe(false);
+
+    vi.advanceTimersByTime(100);
     await expect(timeoutPromise).rejects.toThrow(TimeoutError);
+
+    expect(capturedSignal!.aborted).toBe(true);
+  });
+
+  it('should clear timeout timer when promise resolves successfully', async () => {
+    const clearTimeoutSpy = vi.spyOn(globalThis, 'clearTimeout');
+
+    await withTimeout(() => Promise.resolve('success'), 5000);
+
+    expect(clearTimeoutSpy).toHaveBeenCalled();
+    clearTimeoutSpy.mockRestore();
+  });
+
+  it('should clear timeout timer when promise rejects', async () => {
+    const clearTimeoutSpy = vi.spyOn(globalThis, 'clearTimeout');
+
+    await expect(withTimeout(() => Promise.reject(new Error('fail')), 5000)).rejects.toThrow(
+      'fail',
+    );
+
+    expect(clearTimeoutSpy).toHaveBeenCalled();
+    clearTimeoutSpy.mockRestore();
   });
 });
diff --git a/packages/web-crawler/src/utils/errorType.ts b/packages/web-crawler/src/utils/errorType.ts
index c3ade9c806..fc4e4fbb3d 100644
--- a/packages/web-crawler/src/utils/errorType.ts
+++ b/packages/web-crawler/src/utils/errorType.ts
@@ -17,3 +17,34 @@ export class TimeoutError extends Error {
     this.name = 'TimeoutError';
   }
 }
+
+/**
+ * Check if an error is a Node.js fetch network failure.
+ * Node.js undici throws TypeError with message "fetch failed" on network errors.
+ */
+export const isFetchNetworkError = (error: unknown): boolean =>
+  error instanceof TypeError && (error as Error).message === 'fetch failed';
+
+/**
+ * Normalize a fetch error into a typed error for consistent handling.
+ * Converts network failures to `NetworkConnectionError`, passes through `TimeoutError`,
+ * and returns any other error unchanged. Callers should `throw` the returned value.
+ *
+ * @example
+ * ```ts
+ * } catch (e) {
+ *   throw toFetchError(e);
+ * }
+ * ```
+ */
+export const toFetchError = (error: unknown): Error => {
+  if (isFetchNetworkError(error)) {
+    return new NetworkConnectionError();
+  }
+
+  if (error instanceof TimeoutError) {
+    return error;
+  }
+
+  return error as Error;
+};
diff --git a/packages/web-crawler/src/utils/htmlToMarkdown.test.ts b/packages/web-crawler/src/utils/htmlToMarkdown.test.ts
index 7452541829..611d44743b 100644
--- a/packages/web-crawler/src/utils/htmlToMarkdown.test.ts
+++ b/packages/web-crawler/src/utils/htmlToMarkdown.test.ts
@@ -1,5 +1,5 @@
 import { readFileSync } from 'node:fs';
-import * as path from 'node:path';
+import path from 'node:path';
 
 import { describe, expect, it } from 'vitest';
 
@@ -33,4 +33,29 @@ describe('htmlToMarkdown', () => {
       expect(data).toMatchSnapshot();
     }, 20000);
   });
+
+  it('should truncate HTML exceeding 1 MB', () => {
+    // Create HTML slightly over 1 MB
+    const maxSize = 1024 * 1024;
+    const largeContent = 'x'.repeat(maxSize + 1000);
+    const html = `<html><body><p>${largeContent}</p></body></html>`;
+
+    // Should not throw - the function handles large HTML by truncating
+    const result = htmlToMarkdown(html, { url: 'https://example.com', filterOptions: {} });
+
+    // Verify content was produced (truncated HTML is still parseable)
+    expect(result).toBeDefined();
+    expect(result.content).toBeDefined();
+    // The output content should be smaller than the input due to truncation
+    expect(result.content.length).toBeLessThan(html.length);
+  }, 20000);
+
+  it('should not truncate HTML under 1 MB', () => {
+    const html = '<html><body><p>Small content</p></body></html>';
+
+    const result = htmlToMarkdown(html, { url: 'https://example.com', filterOptions: {} });
+
+    expect(result).toBeDefined();
+    expect(result.content).toContain('Small content');
+  });
 });
diff --git a/packages/web-crawler/src/utils/htmlToMarkdown.ts b/packages/web-crawler/src/utils/htmlToMarkdown.ts
index 66a899f835..c964b352d4 100644
--- a/packages/web-crawler/src/utils/htmlToMarkdown.ts
+++ b/packages/web-crawler/src/utils/htmlToMarkdown.ts
@@ -5,6 +5,9 @@ import { NodeHtmlMarkdown } from 'node-html-markdown';
 
 import type { FilterOptions } from '../type';
 
+/** Truncate HTML to 1 MB before DOM parsing to prevent CPU spikes on large pages */
+const MAX_HTML_SIZE = 1024 * 1024;
+
 const cleanObj = <T extends object>(
   obj: T,
 ): {
@@ -24,9 +27,10 @@ interface HtmlToMarkdownOutput {
 }
 
 export const htmlToMarkdown = (
-  html: string,
+  rawHtml: string,
   { url, filterOptions }: { filterOptions: FilterOptions; url: string },
 ): HtmlToMarkdownOutput => {
+  const html = rawHtml.length > MAX_HTML_SIZE ? rawHtml.slice(0, MAX_HTML_SIZE) : rawHtml;
   const window = new Window({ url });
 
   const document = window.document;
diff --git a/packages/web-crawler/src/utils/response.ts b/packages/web-crawler/src/utils/response.ts
new file mode 100644
index 0000000000..901ffffdcb
--- /dev/null
+++ b/packages/web-crawler/src/utils/response.ts
@@ -0,0 +1,49 @@
+const ERROR_BODY_SNIPPET_LIMIT = 200;
+
+const normalizeBodySnippet = (body: string) => body.replaceAll(/\s+/g, ' ').trim();
+
+export class ResponseBodyParseError extends Error {
+  constructor(provider: string, bodySnippet?: string) {
+    super(
+      bodySnippet
+        ? `${provider} returned non-JSON response: ${bodySnippet}`
+        : `${provider} returned non-JSON response`,
+    );
+    this.name = 'ResponseBodyParseError';
+  }
+}
+
+const getBodySnippet = async (response: Response): Promise<string | undefined> => {
+  try {
+    const body = await response.text();
+    const snippet = normalizeBodySnippet(body).slice(0, ERROR_BODY_SNIPPET_LIMIT);
+
+    return snippet.length > 0 ? snippet : undefined;
+  } catch {
+    return undefined;
+  }
+};
+
+export const parseJSONResponse = async <T>(response: Response, provider: string): Promise<T> => {
+  const clonedResponse = response.clone();
+
+  try {
+    return (await response.json()) as T;
+  } catch {
+    const bodySnippet = await getBodySnippet(clonedResponse);
+    throw new ResponseBodyParseError(provider, bodySnippet);
+  }
+};
+
+export const createHTTPStatusError = async (
+  response: Response,
+  provider: string,
+): Promise<Error> => {
+  const bodySnippet = await getBodySnippet(response);
+
+  return new Error(
+    bodySnippet
+      ? `${provider} request failed with status ${response.status}: ${response.statusText}. Response: ${bodySnippet}`
+      : `${provider} request failed with status ${response.status}: ${response.statusText}`,
+  );
+};
diff --git a/packages/web-crawler/src/utils/withTimeout.ts b/packages/web-crawler/src/utils/withTimeout.ts
index 9f295fa43b..bcc8d62829 100644
--- a/packages/web-crawler/src/utils/withTimeout.ts
+++ b/packages/web-crawler/src/utils/withTimeout.ts
@@ -3,19 +3,28 @@ import { TimeoutError } from './errorType';
 export const DEFAULT_TIMEOUT = 10_000;
 
 /**
- * Wraps a promise with a timeout
- * @param promise Promise to wrap
+ * Wraps a factory function with a timeout and abort support.
+ * The factory receives an AbortSignal that is aborted on timeout,
+ * allowing the underlying request (e.g. fetch) to be properly cancelled.
+ * @param fn Factory function that receives an AbortSignal and returns a Promise
  * @param ms Timeout in milliseconds
- * @returns Promise that will be rejected if it takes longer than ms to resolve
  */
-export const withTimeout = <T>(promise: Promise<T>, ms: number = DEFAULT_TIMEOUT): Promise<T> => {
+export const withTimeout = <T>(
+  fn: (signal: AbortSignal) => Promise<T>,
+  ms: number = DEFAULT_TIMEOUT,
+): Promise<T> => {
   const controller = new AbortController();
+  let timeoutId: ReturnType<typeof setTimeout>;
+
   const timeoutPromise = new Promise<T>((_, reject) => {
-    setTimeout(() => {
+    timeoutId = setTimeout(() => {
       controller.abort();
       reject(new TimeoutError(`Request timeout after ${ms}ms`));
     }, ms);
   });
 
-  return Promise.race([promise, timeoutPromise]);
+  return Promise.race([
+    fn(controller.signal).finally(() => clearTimeout(timeoutId)),
+    timeoutPromise,
+  ]);
 };
diff --git a/src/envs/tools.ts b/src/envs/tools.ts
index 3fc2054815..753a236622 100644
--- a/src/envs/tools.ts
+++ b/src/envs/tools.ts
@@ -1,15 +1,25 @@
 import { createEnv } from '@t3-oss/env-nextjs';
 import { z } from 'zod';
 
+const optionalNumberEnv = (min: number, max: number) =>
+  z.preprocess(
+    (value) => (value === '' || value === null ? undefined : value),
+    z.coerce.number().int().max(max).min(min).optional(),
+  );
+
 export const getToolsConfig = () => {
   return createEnv({
     runtimeEnv: {
+      CRAWL_CONCURRENCY: process.env.CRAWL_CONCURRENCY,
+      CRAWLER_RETRY: process.env.CRAWLER_RETRY,
       CRAWLER_IMPLS: process.env.CRAWLER_IMPLS,
       SEARCH_PROVIDERS: process.env.SEARCH_PROVIDERS,
       SEARXNG_URL: process.env.SEARXNG_URL,
     },
 
     server: {
+      CRAWL_CONCURRENCY: optionalNumberEnv(1, 10),
+      CRAWLER_RETRY: optionalNumberEnv(0, 3),
       CRAWLER_IMPLS: z.string().optional(),
       SEARCH_PROVIDERS: z.string().optional(),
       SEARXNG_URL: z.string().url().optional(),
diff --git a/src/server/routers/tools/search.test.ts b/src/server/routers/tools/search.test.ts
index 42bef5b943..ba79581727 100644
--- a/src/server/routers/tools/search.test.ts
+++ b/src/server/routers/tools/search.test.ts
@@ -46,6 +46,27 @@ describe('searchRouter', () => {
       expect(result.results[1]).toEqual({ content: 'test content' });
     });
 
+    it('should accept all supported crawler implementations', async () => {
+      const caller = searchRouter.createCaller(mockContext as any);
+
+      const allImpls = [
+        'browserless',
+        'exa',
+        'firecrawl',
+        'jina',
+        'naive',
+        'search1api',
+        'tavily',
+      ] as const;
+      for (const impl of allImpls) {
+        const result = await caller.crawlPages({
+          urls: ['http://test.com'],
+          impls: [impl],
+        });
+        expect(result.results).toHaveLength(1);
+      }
+    });
+
     it('should work without specifying impls', async () => {
       const caller = searchRouter.createCaller(mockContext as any);
 
diff --git a/src/server/routers/tools/search.ts b/src/server/routers/tools/search.ts
index 67c0f72315..682d4de58f 100644
--- a/src/server/routers/tools/search.ts
+++ b/src/server/routers/tools/search.ts
@@ -9,7 +9,10 @@ export const searchRouter = router({
   crawlPages: searchProcedure
     .input(
       z.object({
-        impls: z.enum(['jina', 'naive', 'browserless']).array().optional(),
+        impls: z
+          .enum(['browserless', 'exa', 'firecrawl', 'jina', 'naive', 'search1api', 'tavily'])
+          .array()
+          .optional(),
         urls: z.string().array(),
       }),
     )
diff --git a/src/server/services/search/index.test.ts b/src/server/services/search/index.test.ts
index 50730a16c1..a7092c9252 100644
--- a/src/server/services/search/index.test.ts
+++ b/src/server/services/search/index.test.ts
@@ -3,7 +3,7 @@ import { beforeEach, describe, expect, it, vi } from 'vitest';
 
 import { toolsEnv } from '@/envs/tools';
 
-import { createSearchServiceImpl,SearchImplType } from './impls';
+import { createSearchServiceImpl, SearchImplType } from './impls';
 import { SearchService } from './index';
 
 // Mock dependencies
@@ -11,7 +11,9 @@ vi.mock('@lobechat/web-crawler');
 vi.mock('./impls');
 vi.mock('@/envs/tools', () => ({
   toolsEnv: {
+    CRAWL_CONCURRENCY: undefined,
     CRAWLER_IMPLS: '',
+    CRAWLER_RETRY: undefined,
     SEARCH_PROVIDERS: '',
   },
 }));
@@ -279,10 +281,9 @@ describe('SearchService', () => {
   describe('crawlPages', () => {
     it('should crawl multiple pages concurrently', async () => {
       const mockCrawlResult = {
-        content: 'Page content',
-        description: 'Page description',
-        title: 'Page title',
-        url: 'https://example.com',
+        crawler: 'naive',
+        data: { content: 'Page content', contentType: 'text' },
+        originalUrl: 'https://example.com',
       };
 
       const mockCrawler = {
@@ -304,8 +305,13 @@ describe('SearchService', () => {
     it('should use crawler implementations from env', async () => {
       vi.mocked(toolsEnv).CRAWLER_IMPLS = 'jina,reader';
 
+      const mockSuccessResult = {
+        crawler: 'jina',
+        data: { content: 'ok', contentType: 'text' },
+        originalUrl: 'https://example.com',
+      };
       const mockCrawler = {
-        crawl: vi.fn().mockResolvedValue({}),
+        crawl: vi.fn().mockResolvedValue(mockSuccessResult),
       };
       vi.mocked(Crawler).mockImplementation(() => mockCrawler as any);
 
@@ -317,8 +323,13 @@ describe('SearchService', () => {
     });
 
     it('should pass impls parameter to crawler.crawl', async () => {
+      const mockSuccessResult = {
+        crawler: 'jina',
+        data: { content: 'ok', contentType: 'text' },
+        originalUrl: 'https://example.com',
+      };
       const mockCrawler = {
-        crawl: vi.fn().mockResolvedValue({}),
+        crawl: vi.fn().mockResolvedValue(mockSuccessResult),
       };
       vi.mocked(Crawler).mockImplementation(() => mockCrawler as any);
 
@@ -334,5 +345,133 @@ describe('SearchService', () => {
         url: 'https://example.com',
       });
     });
+
+    it('should use CRAWL_CONCURRENCY from env', async () => {
+      vi.mocked(toolsEnv).CRAWL_CONCURRENCY = 1;
+
+      const mockCrawler = {
+        crawl: vi.fn().mockResolvedValue({
+          crawler: 'naive',
+          data: { content: 'ok', contentType: 'text' },
+          originalUrl: 'https://example.com',
+        }),
+      };
+      vi.mocked(Crawler).mockImplementation(() => mockCrawler as any);
+
+      searchService = new SearchService();
+      const urls = ['https://a.com', 'https://b.com'];
+      await searchService.crawlPages({ urls });
+
+      // All URLs should still be crawled
+      expect(mockCrawler.crawl).toHaveBeenCalledTimes(2);
+    });
+
+    it('should retry on failed crawl results', async () => {
+      vi.mocked(toolsEnv).CRAWLER_RETRY = 1;
+
+      const failedResult = {
+        crawler: 'naive',
+        data: { content: 'Fail', errorType: 'NetworkError', errorMessage: 'timeout' },
+        originalUrl: 'https://example.com',
+      };
+      const successResult = {
+        crawler: 'naive',
+        data: { content: 'Page content', contentType: 'text' },
+        originalUrl: 'https://example.com',
+      };
+
+      const mockCrawler = {
+        crawl: vi.fn().mockResolvedValueOnce(failedResult).mockResolvedValueOnce(successResult),
+      };
+      vi.mocked(Crawler).mockImplementation(() => mockCrawler as any);
+
+      searchService = new SearchService();
+      const result = await searchService.crawlPages({ urls: ['https://example.com'] });
+
+      expect(mockCrawler.crawl).toHaveBeenCalledTimes(2);
+      expect(result.results[0]).toBe(successResult);
+    });
+
+    it('should return last failed result after all retries exhausted', async () => {
+      vi.mocked(toolsEnv).CRAWLER_RETRY = 1;
+
+      const failedResult = {
+        crawler: 'naive',
+        data: { content: 'Fail', errorType: 'NetworkError', errorMessage: 'timeout' },
+        originalUrl: 'https://example.com',
+      };
+
+      const mockCrawler = {
+        crawl: vi.fn().mockResolvedValue(failedResult),
+      };
+      vi.mocked(Crawler).mockImplementation(() => mockCrawler as any);
+
+      searchService = new SearchService();
+      const result = await searchService.crawlPages({ urls: ['https://example.com'] });
+
+      expect(mockCrawler.crawl).toHaveBeenCalledTimes(2); // 1 + 1 retry
+      expect(result.results[0]).toBe(failedResult);
+    });
+
+    it('should not retry when CRAWLER_RETRY is 0', async () => {
+      vi.mocked(toolsEnv).CRAWLER_RETRY = 0;
+
+      const failedResult = {
+        crawler: 'naive',
+        data: { content: 'Fail', errorType: 'Error', errorMessage: 'fail' },
+        originalUrl: 'https://example.com',
+      };
+
+      const mockCrawler = {
+        crawl: vi.fn().mockResolvedValue(failedResult),
+      };
+      vi.mocked(Crawler).mockImplementation(() => mockCrawler as any);
+
+      searchService = new SearchService();
+      const result = await searchService.crawlPages({ urls: ['https://example.com'] });
+
+      expect(mockCrawler.crawl).toHaveBeenCalledTimes(1);
+      expect(result.results[0]).toBe(failedResult);
+    });
+
+    it('should handle crawl exceptions during retry', async () => {
+      vi.mocked(toolsEnv).CRAWLER_RETRY = 1;
+
+      const mockCrawler = {
+        crawl: vi.fn().mockRejectedValue(new Error('Network error')),
+      };
+      vi.mocked(Crawler).mockImplementation(() => mockCrawler as any);
+
+      searchService = new SearchService();
+      const result = await searchService.crawlPages({ urls: ['https://example.com'] });
+
+      expect(mockCrawler.crawl).toHaveBeenCalledTimes(2);
+      expect(result.results[0].data).toMatchObject({
+        errorType: 'Error',
+        errorMessage: 'Network error',
+      });
+    });
+
+    it('should detect successful results by contentType presence', async () => {
+      vi.mocked(toolsEnv).CRAWLER_RETRY = 1;
+
+      const successResult = {
+        crawler: 'naive',
+        data: { content: 'Page content', contentType: 'text' },
+        originalUrl: 'https://example.com',
+      };
+
+      const mockCrawler = {
+        crawl: vi.fn().mockResolvedValue(successResult),
+      };
+      vi.mocked(Crawler).mockImplementation(() => mockCrawler as any);
+
+      searchService = new SearchService();
+      const result = await searchService.crawlPages({ urls: ['https://example.com'] });
+
+      // Should not retry since result has contentType (successful)
+      expect(mockCrawler.crawl).toHaveBeenCalledTimes(1);
+      expect(result.results[0]).toBe(successResult);
+    });
   });
 });
diff --git a/src/server/services/search/index.ts b/src/server/services/search/index.ts
index 2da4066656..fa18db53af 100644
--- a/src/server/services/search/index.ts
+++ b/src/server/services/search/index.ts
@@ -1,5 +1,5 @@
-import { type SearchParams, type SearchQuery } from '@lobechat/types';
-import { type CrawlImplType } from '@lobechat/web-crawler';
+import type { SearchParams, SearchQuery } from '@lobechat/types';
+import type { Crawler, CrawlImplType, CrawlUniformResult } from '@lobechat/web-crawler';
 import pMap from 'p-map';
 
 import { toolsEnv } from '@/envs/tools';
@@ -7,6 +7,9 @@ import { toolsEnv } from '@/envs/tools';
 import { type SearchImplType, type SearchServiceImpl } from './impls';
 import { createSearchServiceImpl } from './impls';
 
+const DEFAULT_CRAWL_CONCURRENCY = 3;
+const DEFAULT_CRAWLER_RETRY = 1;
+
 const parseImplEnv = (envString: string = '') => {
   // Handle full-width commas and extra whitespace
   const envValue = envString.replaceAll('，', ',').trim();
@@ -24,6 +27,14 @@ export class SearchService {
     return parseImplEnv(toolsEnv.CRAWLER_IMPLS);
   }
 
+  private get crawlConcurrency() {
+    return toolsEnv.CRAWL_CONCURRENCY ?? DEFAULT_CRAWL_CONCURRENCY;
+  }
+
+  private get crawlerRetry() {
+    return toolsEnv.CRAWLER_RETRY ?? DEFAULT_CRAWLER_RETRY;
+  }
+
   constructor() {
     const impls = this.searchImpls;
     // TODO: need use turn mode
@@ -37,14 +48,59 @@ export class SearchService {
     const results = await pMap(
       input.urls,
       async (url) => {
-        return await crawler.crawl({ impls: input.impls, url });
+        return await this.crawlWithRetry(crawler, url, input.impls);
       },
-      { concurrency: 3 },
+      { concurrency: this.crawlConcurrency },
     );
 
     return { results };
   }
 
+  private async crawlWithRetry(
+    crawler: Crawler,
+    url: string,
+    impls?: CrawlImplType[],
+  ): Promise<CrawlUniformResult> {
+    const maxAttempts = this.crawlerRetry + 1;
+    let lastResult: CrawlUniformResult | undefined;
+    let lastError: Error | undefined;
+
+    for (let attempt = 1; attempt <= maxAttempts; attempt++) {
+      try {
+        const result = await crawler.crawl({ impls, url });
+        lastResult = result;
+
+        if (!this.isFailedCrawlResult(result)) {
+          return result;
+        }
+      } catch (error) {
+        lastError = error as Error;
+      }
+    }
+
+    if (lastResult) {
+      return lastResult;
+    }
+
+    return {
+      crawler: 'unknown',
+      data: {
+        content: `Fail to crawl the page. Error type: ${lastError?.name || 'UnknownError'}, error message: ${lastError?.message}`,
+        errorMessage: lastError?.message,
+        errorType: lastError?.name || 'UnknownError',
+      },
+      originalUrl: url,
+    };
+  }
+
+  /**
+   * A successful crawl result always includes `contentType` (e.g. 'text', 'json')
+   * in `result.data`, while a failed result contains `errorType`/`errorMessage` instead.
+   */
+  private isFailedCrawlResult(result: CrawlUniformResult): boolean {
+    return !('contentType' in result.data);
+  }
+
   private get searchImpls() {
     return parseImplEnv(toolsEnv.SEARCH_PROVIDERS) as SearchImplType[];
   }
@@ -58,17 +114,17 @@ export class SearchService {
 
   async webSearch({ query, searchCategories, searchEngines, searchTimeRange }: SearchQuery) {
     let data = await this.query(query, {
-      searchCategories: searchCategories,
-      searchEngines: searchEngines,
-      searchTimeRange: searchTimeRange,
+      searchCategories,
+      searchEngines,
+      searchTimeRange,
     });
 
     // First retry: remove search engine restrictions if no results found
     if (data.results.length === 0 && searchEngines && searchEngines?.length > 0) {
       const paramsExcludeSearchEngines = {
-        searchCategories: searchCategories,
+        searchCategories,
         searchEngines: undefined,
-        searchTimeRange: searchTimeRange,
+        searchTimeRange,
       };
       data = await this.query(query, paramsExcludeSearchEngines);
     }