🐛 fix(pdf): upgrade pdfjs-dist and react-pdf to v5.x (#11686)

* 🐛 fix(pdf): upgrade pdfjs-dist and react-pdf to v5.x

Resolves: LOBE-2658

- Upgrade pdfjs-dist from 4.x to 5.4.530
- Upgrade react-pdf from 9.x to 10.3.0
- Fix PDF worker loading using import.meta.url pattern
- Add @napi-rs/canvas dependency for react-pdf renderer
- Fix typo: ResouceManagerMode → ResourceManagerMode
- Clean up meaningless comments in ListItem component
- Simplify next config by removing unused isDesktop logic

* chore: update claude

Signed-off-by: Innei <tukon479@gmail.com>

* 🐛 fix(pdf): update PDF version in snapshots to 5.4.530

- Updated pdfVersion in PDF loader snapshots to reflect the new version 5.4.530.

Signed-off-by: Innei <tukon479@gmail.com>

*  feat(file-loaders): implement lazy loading for file loaders

- Refactored file loader imports to use dynamic loading, improving performance by preventing heavy dependencies from being loaded until needed.
- Introduced `getFileLoader` function to manage loader retrieval based on file type.
- Updated logging and fallback mechanisms for unsupported file types.

This change enhances the efficiency of file loading operations.

Signed-off-by: Innei <tukon479@gmail.com>

*  feat(config): enhance next configuration for improved package handling

- Updated `nextConfig` to include `@napi-rs/canvas` and `pdfjs-dist` in `serverExternalPackages` to address bundling issues with Turbopack.
- Removed unused `isDesktop` logic and simplified the configuration structure.
- Adjusted `transpilePackages` to exclude `pdfjs-dist`, reflecting recent upgrades.

This change optimizes the configuration for better compatibility and performance.

Signed-off-by: Innei <tukon479@gmail.com>

* 🐛 fix: use CDN pdfjs worker

---------

Signed-off-by: Innei <tukon479@gmail.com>
This commit is contained in:
Innei
2026-01-23 15:20:34 +08:00
committed by GitHub
parent 0047ffe770
commit 2b620dfc99
19 changed files with 140 additions and 90 deletions

View File

@@ -61,9 +61,11 @@ see @.cursor/rules/typescript.mdc
- **Dev**: Translate `locales/zh-CN/namespace.json` and `locales/en-US/namespace.json` locales file only for dev preview
- DON'T run `pnpm i18n`, let CI auto handle it
## Linear Issue Management(ignore if not installed linear mcp)
## Linear Issue Management (search tools first; ignore if not installed)
Read @.cursor/rules/linear.mdc when working with Linear issues.
ClaudeCode may not inject MCP tools until they are discovered/used.\
Before applying Linear workflows, **use tool search** to confirm `linear-server` exists (e.g. search `linear` / `mcp__linear-server__`). If not found, treat it as not installed.\
Then read `@.cursor/rules/linear.mdc` when working with Linear issues.
## Rules Index

View File

@@ -35,12 +35,12 @@
"prebuild": "tsx scripts/prebuild.mts && npm run lint",
"build": "cross-env NODE_OPTIONS=--max-old-space-size=8192 next build --webpack",
"postbuild": "npm run build-sitemap && npm run build-migrate-db",
"build-migrate-db": "bun run db:migrate",
"build-sitemap": "tsx ./scripts/buildSitemapIndex/index.ts",
"build:analyze": "NODE_OPTIONS=--max-old-space-size=81920 ANALYZE=true next build --webpack",
"build:docker": "npm run prebuild && NODE_OPTIONS=--max-old-space-size=8192 DOCKER=true next build --webpack && npm run build-sitemap",
"build:electron": "cross-env NODE_OPTIONS=--max-old-space-size=8192 NEXT_PUBLIC_IS_DESKTOP_APP=1 tsx scripts/electronWorkflow/buildNextApp.mts",
"build:vercel": "npm run prebuild && cross-env NODE_OPTIONS=--max-old-space-size=6144 next build --webpack && npm run postbuild",
"build-migrate-db": "bun run db:migrate",
"build-sitemap": "tsx ./scripts/buildSitemapIndex/index.ts",
"clean:node_modules": "bash -lc 'set -e; echo \"Removing all node_modules...\"; rm -rf node_modules; pnpm -r exec rm -rf node_modules; rm -rf apps/desktop/node_modules; echo \"All node_modules removed.\"'",
"db:generate": "drizzle-kit generate && npm run workflow:dbml",
"db:migrate": "MIGRATION_DB=1 tsx ./scripts/migrateServerDB/index.ts",
@@ -87,11 +87,11 @@
"start": "next start -p 3210",
"stylelint": "stylelint \"src/**/*.{js,jsx,ts,tsx}\" --fix",
"test": "npm run test-app && npm run test-server",
"test-app": "vitest run",
"test-app:coverage": "vitest --coverage --silent='passed-only'",
"test:e2e": "pnpm --filter @lobechat/e2e-tests test",
"test:e2e:smoke": "pnpm --filter @lobechat/e2e-tests test:smoke",
"test:update": "vitest -u",
"test-app": "vitest run",
"test-app:coverage": "vitest --coverage --silent='passed-only'",
"tunnel:cloudflare": "cloudflared tunnel --url http://localhost:3010",
"tunnel:ngrok": "ngrok http http://localhost:3011",
"type-check": "tsgo --noEmit",
@@ -133,6 +133,7 @@
]
},
"overrides": {
"pdfjs-dist": "5.4.530",
"stylelint-config-clean-order": "7.0.0"
},
"dependencies": {
@@ -206,11 +207,12 @@
"@lobehub/tts": "^4.0.2",
"@lobehub/ui": "^4.27.4",
"@modelcontextprotocol/sdk": "^1.25.1",
"@napi-rs/canvas": "^0.1.88",
"@neondatabase/serverless": "^1.0.2",
"@next/third-parties": "^16.1.1",
"@opentelemetry/exporter-jaeger": "^2.2.0",
"@opentelemetry/winston-transport": "^0.19.0",
"@react-pdf/renderer": "^4.3.1",
"@react-pdf/renderer": "^4.3.2",
"@react-three/drei": "^10.7.7",
"@react-three/fiber": "^9.4.2",
"@saintno/comfyui-sdk": "^0.2.49",
@@ -296,7 +298,7 @@
"path-browserify-esm": "^1.0.6",
"pathe": "^2.0.3",
"pdf-parse": "^1.1.4",
"pdfjs-dist": "4.8.69",
"pdfjs-dist": "5.4.530",
"pdfkit": "^0.17.2",
"pg": "^8.16.3",
"pino": "^10.1.0",
@@ -316,7 +318,7 @@
"react-hotkeys-hook": "^5.2.1",
"react-i18next": "^16.5.0",
"react-lazy-load": "^4.0.1",
"react-pdf": "^9.2.1",
"react-pdf": "^10.3.0",
"react-responsive": "^10.0.1",
"react-rnd": "^10.5.2",
"react-router-dom": "^7.11.0",

View File

@@ -30,7 +30,7 @@
"debug": "^4.4.3",
"mammoth": "^1.11.0",
"officeparser": "5.1.1",
"pdfjs-dist": "4.10.38",
"pdfjs-dist": "5.4.530",
"word-extractor": "^1.0.4",
"xlsx": "https://cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz",
"yauzl": "^3.2.0"

View File

@@ -2,10 +2,8 @@ import debug from 'debug';
import { stat } from 'node:fs/promises';
import * as path from 'node:path';
import { fileLoaders } from './loaders';
import { TextLoader } from './loaders/text';
import { FileDocument, FileMetadata, SupportedFileType } from './types';
import type { DocumentPage, FileLoaderInterface } from './types';
import { getFileLoader } from './loaders';
import type { DocumentPage, FileDocument, FileMetadata, SupportedFileType } from './types';
import { isTextReadableFile } from './utils/isTextReadableFile';
const log = debug('file-loaders:loadFile');
@@ -64,9 +62,6 @@ const getFileType = (filePath: string): SupportedFileType | undefined => {
}
};
// Default fallback loader class
const DefaultLoader = TextLoader;
/**
* Loads a file from the specified path, automatically detecting the file type
* and using the appropriate loader class.
@@ -113,18 +108,18 @@ export const loadFile = async (
source,
});
const paserType = getFileType(filePath);
log('Parser type determined as:', paserType);
const parserType = getFileType(filePath);
log('Parser type determined as:', parserType);
// Select the loader CLASS based on the determined fileType, fallback to DefaultLoader
const LoaderClass: new () => FileLoaderInterface = paserType
? fileLoaders[paserType]
: DefaultLoader;
// Use lazy loading to get the loader class - this prevents heavy dependencies
// like pdfjs-dist from being loaded until they're actually needed
const loaderType = parserType ?? 'txt';
const LoaderClass = await getFileLoader(loaderType);
log('Selected loader class:', LoaderClass.name);
if (!paserType) {
if (!parserType) {
console.warn(
`No specific loader found for file type '${fileType}'. Using default loader (${DefaultLoader.name}) as fallback.`,
`No specific loader found for file type '${fileType}'. Using default loader (TextLoader) as fallback.`,
);
}

View File

@@ -1,21 +1,70 @@
import { FileLoaderInterface, SupportedFileType } from '../types';
import { DocLoader } from './doc';
import { DocxLoader } from './docx';
// import { EpubLoader } from './epub';
import { ExcelLoader } from './excel';
import { PdfLoader } from './pdf';
import { PptxLoader } from './pptx';
import { TextLoader } from './text';
import type { FileLoaderInterface, SupportedFileType } from '../types';
// Loader configuration map
// Key: file extension (lowercase, without leading dot) or specific type name
// Value: Loader Class implementing FileLoaderInterface
export const fileLoaders: Record<SupportedFileType, new () => FileLoaderInterface> = {
doc: DocLoader,
docx: DocxLoader,
// epub: EpubLoader,
excel: ExcelLoader,
pdf: PdfLoader,
pptx: PptxLoader,
txt: TextLoader,
// Lazy loader factory type - returns a Promise that resolves to the loader class
type LazyLoaderFactory = () => Promise<new () => FileLoaderInterface>;
// Loader configuration map using lazy imports
// This prevents pdfjs-dist from being loaded at module initialization
// and only loads it when PDF files need to be processed
const lazyFileLoaders: Record<SupportedFileType, LazyLoaderFactory> = {
doc: async () => {
const { DocLoader } = await import('./doc');
return DocLoader;
},
docx: async () => {
const { DocxLoader } = await import('./docx');
return DocxLoader;
},
excel: async () => {
const { ExcelLoader } = await import('./excel');
return ExcelLoader;
},
pdf: async () => {
// Polyfill DOMMatrix for Node.js environment before importing pdfjs-dist
// pdfjs-dist 5.x uses DOMMatrix at module initialization which doesn't exist in Node.js
if (typeof globalThis.DOMMatrix === 'undefined') {
try {
// eslint-disable-next-line @typescript-eslint/no-require-imports
const canvas = require('@napi-rs/canvas');
globalThis.DOMMatrix = canvas.DOMMatrix;
globalThis.DOMPoint = canvas.DOMPoint;
globalThis.DOMRect = canvas.DOMRect;
globalThis.Path2D = canvas.Path2D;
} catch {
// @napi-rs/canvas not available, pdfjs-dist may fail if DOMMatrix is needed
}
}
const { PdfLoader } = await import('./pdf');
return PdfLoader;
},
pptx: async () => {
const { PptxLoader } = await import('./pptx');
return PptxLoader;
},
txt: async () => {
const { TextLoader } = await import('./text');
return TextLoader;
},
};
/**
* Get a file loader class for the specified file type.
* Uses dynamic imports to avoid loading heavy dependencies (like pdfjs-dist) until needed.
* Falls back to TextLoader if no specific loader is found.
*/
export const getFileLoader = async (
fileType: SupportedFileType | string,
): Promise<new () => FileLoaderInterface> => {
const loaderFactory = lazyFileLoaders[fileType as SupportedFileType];
if (!loaderFactory) {
// Fallback to TextLoader for unsupported file types
const { TextLoader } = await import('./text');
return TextLoader;
}
return loaderFactory();
};
// For backward compatibility - but prefer using getFileLoader for lazy loading
// This is kept to avoid breaking existing imports, but it will trigger immediate loading
// of all loaders. Consider migrating to getFileLoader.
export { lazyFileLoaders as fileLoaderFactories };

View File

@@ -52,7 +52,7 @@ exports[`PdfLoader > should attach document metadata correctly 1`] = `
"Title": "test",
},
"pdfMetadata": null,
"pdfVersion": "4.10.38",
"pdfVersion": "5.4.530",
}
`;

View File

@@ -25,7 +25,7 @@ exports[`loadFile Integration Tests > PDF Handling > should load content from a
"Title": "test",
},
"pdfMetadata": null,
"pdfVersion": "4.10.38",
"pdfVersion": "5.4.530",
},
},
"pages": [

View File

@@ -13,6 +13,7 @@ overrides:
'@swagger-api/apidom-reference': 1.1.0
jose: ^6.1.3
stylelint-config-clean-order: 7.0.0
pdfjs-dist: 5.4.530
patchedDependencies:
'@swagger-api/apidom-reference': patches/@swagger-api__apidom-reference.patch

View File

@@ -1,6 +1,6 @@
import { type StateCreator } from 'zustand/vanilla';
import { type ResouceManagerMode } from '@/features/ResourceManager';
import { type ResourceManagerMode } from '@/features/ResourceManager';
import { type FilesTabs, SortType } from '@/types/files';
import { type State, type ViewMode, initialState } from './initialState';
@@ -67,7 +67,7 @@ export interface Action {
/**
* Set the view mode
*/
setMode: (mode: ResouceManagerMode) => void;
setMode: (mode: ResourceManagerMode) => void;
/**
* Set the pending rename item ID
*/

View File

@@ -1,4 +1,4 @@
import { type ResouceManagerMode } from '@/features/ResourceManager';
import { type ResourceManagerMode } from '@/features/ResourceManager';
import { FilesTabs, SortType } from '@/types/files';
export type ViewMode = 'list' | 'masonry';
@@ -39,7 +39,7 @@ export interface State {
/**
* View mode for displaying resources
*/
mode: ResouceManagerMode;
mode: ResourceManagerMode;
/**
* ID of item currently being renamed (for inline editing)
*/

View File

@@ -1,6 +1,6 @@
import { type StateCreator } from 'zustand/vanilla';
import { type ResouceManagerMode } from '@/features/ResourceManager';
import { type ResourceManagerMode } from '@/features/ResourceManager';
import { type State, initialState } from './initialState';
@@ -12,7 +12,7 @@ export interface Action {
/**
* Set the view mode
*/
setMode: (mode: ResouceManagerMode) => void;
setMode: (mode: ResourceManagerMode) => void;
/**
* Set selected file IDs
*/

View File

@@ -1,8 +1,8 @@
import { type ResouceManagerMode } from '@/features/ResourceManager';
import { type ResourceManagerMode } from '@/features/ResourceManager';
export interface State {
currentViewItemId?: string;
mode: ResouceManagerMode;
mode: ResourceManagerMode;
selectedFileIds: string[];
}

View File

@@ -1,22 +1,19 @@
'use client';
import { Flexbox } from '@lobehub/ui';
import type { PDFDocumentProxy } from 'pdfjs-dist';
import { Fragment, memo, useCallback, useState } from 'react';
import { Document, Page, pdfjs } from 'react-pdf';
import 'react-pdf/dist/esm/Page/AnnotationLayer.css';
import 'react-pdf/dist/esm/Page/TextLayer.css';
import 'react-pdf/dist/Page/AnnotationLayer.css';
import 'react-pdf/dist/Page/TextLayer.css';
import NeuralNetworkLoading from '@/components/NeuralNetworkLoading';
import '@/libs/pdfjs/worker';
import { lambdaQuery } from '@/libs/trpc/client';
import HighlightLayer from './HighlightLayer';
import { styles } from './style';
import useResizeObserver from './useResizeObserver';
// 如果海外的地址: https://unpkg.com/pdfjs-dist@${pdfjs.version}/build/pdf.worker.min.mjs
pdfjs.GlobalWorkerOptions.workerSrc = `https://registry.npmmirror.com/pdfjs-dist/${pdfjs.version}/files/build/pdf.worker.min.mjs`;
const options = {
cMapUrl: `https://registry.npmmirror.com/pdfjs-dist/${pdfjs.version}/files/cmaps/`,
standardFontDataUrl: `https://registry.npmmirror.com/pdfjs-dist/${pdfjs.version}/files/standard_fonts/`,
@@ -46,8 +43,8 @@ const PDFViewer = memo<PDFViewerProps>(({ url, fileId }) => {
useResizeObserver(containerRef, onResize);
const onDocumentLoadSuccess = ({ numPages: nextNumPages }: PDFDocumentProxy) => {
setNumPages(nextNumPages);
const onDocumentLoadSuccess = (document: unknown) => {
setNumPages((document as { numPages: number }).numPages);
setIsLoaded(true);
};

View File

@@ -41,7 +41,6 @@ const styles = createStaticStyles(({ css }) => {
cursor: pointer;
min-width: 800px;
/* Hover effect for individual rows */
&:hover {
background: ${cssVar.colorFillTertiary};
}
@@ -194,7 +193,6 @@ const FileListItem = memo<FileListItemProps>(
const [isDragging, setIsDragging] = useState(false);
const [isOver, setIsOver] = useState(false);
// Memoize computed values that don't change
const computedValues = useMemo(() => {
const isPDF = fileType?.toLowerCase() === 'pdf' || name?.toLowerCase().endsWith('.pdf');
return {
@@ -208,7 +206,6 @@ const FileListItem = memo<FileListItemProps>(
const { isSupportedForChunking, isPage, isFolder, emoji } = computedValues;
// Memoize drag data to prevent recreation
const dragData = useMemo(
() => ({
fileType,
@@ -219,7 +216,6 @@ const FileListItem = memo<FileListItemProps>(
[fileType, isFolder, name, sourceType],
);
// Native HTML5 drag event handlers
const handleDragStart = useCallback(
(e: DragEvent) => {
if (!resourceManagerState.libraryId) {
@@ -264,7 +260,6 @@ const FileListItem = memo<FileListItemProps>(
}, []);
const handleDrop = useCallback(() => {
// Clear the highlight after drop
setIsOver(false);
}, []);
@@ -359,7 +354,6 @@ const FileListItem = memo<FileListItemProps>(
{ replace: true },
);
} else {
// Set mode to file and store the file ID
resourceManagerState.setCurrentViewItemId(id);
resourceManagerState.setMode('editor');
// Also update URL query parameter for shareable links
@@ -378,7 +372,6 @@ const FileListItem = memo<FileListItemProps>(
useEffect(() => {
if (pendingRenameItemId === id && isFolder && !isRenaming) {
handleRenameStart();
// Clear the pending rename item after triggering
resourceManagerState.setPendingRenameItemId(null);
}
}, [pendingRenameItemId, id, isFolder, resourceManagerState]);
@@ -575,7 +568,6 @@ const FileListItem = memo<FileListItemProps>(
},
// Custom comparison function to prevent unnecessary re-renders
(prevProps, nextProps) => {
// Only re-render if these critical props change
return (
prevProps.id === nextProps.id &&
prevProps.name === nextProps.name &&

View File

@@ -48,7 +48,7 @@ const styles = createStaticStyles(({ css, cssVar }) => {
};
});
export type ResouceManagerMode = 'editor' | 'explorer' | 'page';
export type ResourceManagerMode = 'editor' | 'explorer' | 'page';
/**
* Manage resources. Can be from a certian library.

View File

@@ -1,3 +1,5 @@
'use client';
import { LoadingOutlined } from '@ant-design/icons';
import { Button, Flexbox } from '@lobehub/ui';
import { Input, Modal, Spin } from 'antd';
@@ -5,15 +7,13 @@ import { createStaticStyles, cx } from 'antd-style';
import { ChevronLeft, ChevronRight, Expand, FileText } from 'lucide-react';
import { memo, useState } from 'react';
import { useTranslation } from 'react-i18next';
import { Document, Page, pdfjs } from 'react-pdf';
import { Document, Page } from 'react-pdf';
import { useIsMobile } from '@/hooks/useIsMobile';
import '@/libs/pdfjs/worker';
import { containerStyles } from '../style';
// Set PDF.js worker
pdfjs.GlobalWorkerOptions.workerSrc = `https://registry.npmmirror.com/pdfjs-dist/${pdfjs.version}/files/build/pdf.worker.min.mjs`;
const styles = createStaticStyles(({ css }) => ({
containerWrapper: css`
position: relative;

View File

@@ -17,16 +17,14 @@ interface CustomNextConfig {
export function defineConfig(config: CustomNextConfig) {
const isProd = process.env.NODE_ENV === 'production';
const buildWithDocker = process.env.DOCKER === 'true';
const isDesktop = process.env.NEXT_PUBLIC_IS_DESKTOP_APP === '1';
const enableReactScan = !!process.env.REACT_SCAN_MONITOR_API_KEY;
const shouldUseCSP = process.env.ENABLED_CSP === '1';
const isTest =
process.env.NODE_ENV === 'test' || process.env.TEST === '1' || process.env.E2E === '1';
// if you need to proxy the api endpoint to remote server
const isStandaloneMode = buildWithDocker || isDesktop;
const isStandaloneMode = buildWithDocker || process.env.NEXT_BUILD_STANDALONE === '1';
const standaloneConfig: NextConfig = {
output: 'standalone',
@@ -38,6 +36,7 @@ export function defineConfig(config: CustomNextConfig) {
const nextConfig: NextConfig = {
...(isStandaloneMode ? standaloneConfig : {}),
assetPrefix,
compiler: {
emotion: true,
},
@@ -321,13 +320,14 @@ export function defineConfig(config: CustomNextConfig) {
},
...(config.redirects ?? []),
],
// when external packages in dev mode with turbopack, this config will lead to bundle error
// @napi-rs/canvas is a native module that can't be bundled by Turbopack
// pdfjs-dist uses @napi-rs/canvas for DOMMatrix polyfill in Node.js environment
serverExternalPackages: config.serverExternalPackages
? config.serverExternalPackages
: ['pdfkit'],
: ['pdfkit', '@napi-rs/canvas', 'pdfjs-dist'],
transpilePackages: ['pdfjs-dist', 'mermaid', 'better-auth-harmony'],
transpilePackages: ['mermaid', 'better-auth-harmony'],
turbopack: {
rules: isTest
? void 0
@@ -406,14 +406,13 @@ export function defineConfig(config: CustomNextConfig) {
const withBundleAnalyzer = process.env.ANALYZE === 'true' ? analyzer() : noWrapper;
const withPWA =
isProd && !isDesktop
? withSerwistInit({
register: false,
swDest: 'public/sw.js',
swSrc: 'src/app/sw.ts',
})
: noWrapper;
const withPWA = isProd
? withSerwistInit({
register: false,
swDest: 'public/sw.js',
swSrc: 'src/app/sw.ts',
})
: noWrapper;
return withBundleAnalyzer(withPWA(nextConfig as NextConfig));
}

View File

@@ -0,0 +1 @@
import 'pdfjs-dist/build/pdf.worker.min.mjs';

12
src/libs/pdfjs/worker.ts Normal file
View File

@@ -0,0 +1,12 @@
'use client';
import { pdfjs } from 'react-pdf';
pdfjs.GlobalWorkerOptions.workerSrc = `https://registry.npmmirror.com/pdfjs-dist/${pdfjs.version}/files/build/pdf.worker.min.mjs`;
// TODO: Re-enable module worker when fully on Turbopack.
// if (typeof Worker !== 'undefined' && !pdfjs.GlobalWorkerOptions.workerPort) {
// pdfjs.GlobalWorkerOptions.workerPort = new Worker(new URL('./pdf.worker.ts', import.meta.url), {
// type: 'module',
// });
// }