diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index bd342385..bf2256a7 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -17,7 +17,7 @@ body: description: > Please search the MinerU [Readme](https://github.com/opendatalab/MinerU), [Issues](https://github.com/opendatalab/MinerU/issues) and [Discussions](https://github.com/opendatalab/MinerU/discussions) to see if a similar bug report already exists. options: - - label: I have searched the MinerU [Docs](https://github.com/opendatalab/MinerU) and found no similar bug report. + - label: I have searched the MinerU [Readme](https://github.com/opendatalab/MinerU) and found no similar bug report. required: true - label: I have searched the MinerU [Issues](https://github.com/opendatalab/MinerU/issues) and found no similar bug report. required: true diff --git a/magic_pdf/data/read_api.py b/magic_pdf/data/read_api.py index c76212e7..9e52af6d 100644 --- a/magic_pdf/data/read_api.py +++ b/magic_pdf/data/read_api.py @@ -116,7 +116,7 @@ def read_local_office(path: str) -> list[PymuDocDataset]: shutil.rmtree(temp_dir) return ret -def read_local_images(path: str, suffixes: list[str]=['.png', '.jpg']) -> list[ImageDataset]: +def read_local_images(path: str, suffixes: list[str]=['.png', '.jpg', '.jpeg']) -> list[ImageDataset]: """Read images from path or directory. Args: diff --git a/projects/README.md b/projects/README.md index 3eca3cec..915af527 100644 --- a/projects/README.md +++ b/projects/README.md @@ -4,6 +4,6 @@ - [llama_index_rag](./llama_index_rag/README.md): Build a lightweight RAG system based on llama_index - [gradio_app](./gradio_app/README.md): Build a web app based on gradio -- [web_demo](./web_demo/README.md): MinerU online [demo](https://opendatalab.com/OpenSourceTools/Extractor/PDF/) localized deployment version +- ~~[web_demo](./web_demo/README.md): MinerU online [demo](https://opendatalab.com/OpenSourceTools/Extractor/PDF/) localized deployment version~~(Deprecated) - [web_api](./web_api/README.md): Web API Based on FastAPI - [multi_gpu](./multi_gpu/README.md): Multi-GPU parallel processing based on LitServe diff --git a/projects/README_zh-CN.md b/projects/README_zh-CN.md index 96374cd3..870b5ea7 100644 --- a/projects/README_zh-CN.md +++ b/projects/README_zh-CN.md @@ -4,6 +4,6 @@ - [llama_index_rag](./llama_index_rag/README_zh-CN.md): 基于 llama_index 构建轻量级 RAG 系统 - [gradio_app](./gradio_app/README_zh-CN.md): 基于 Gradio 的 Web 应用 -- [web_demo](./web_demo/README_zh-CN.md): MinerU在线[demo](https://opendatalab.com/OpenSourceTools/Extractor/PDF/)本地化部署版本 +- ~~[web_demo](./web_demo/README_zh-CN.md): MinerU在线[demo](https://opendatalab.com/OpenSourceTools/Extractor/PDF/)本地化部署版本~~(已过时) - [web_api](./web_api/README.md): 基于 FastAPI 的 Web API - [multi_gpu](./multi_gpu/README.md): 基于 LitServe 的多 GPU 并行处理 diff --git a/projects/web_api/app.py b/projects/web_api/app.py index 5a67bdf7..2980e6a0 100644 --- a/projects/web_api/app.py +++ b/projects/web_api/app.py @@ -28,7 +28,7 @@ app = FastAPI() pdf_extensions = [".pdf"] office_extensions = [".ppt", ".pptx", ".doc", ".docx"] -image_extensions = [".png", ".jpg"] +image_extensions = [".png", ".jpg", ".jpeg"] class MemoryDataWriter(DataWriter): def __init__(self): @@ -128,7 +128,7 @@ def process_file( Tuple[InferenceResult, PipeResult]: Returns inference result and pipeline result """ - ds = Union[PymuDocDataset, ImageDataset] + ds: Union[PymuDocDataset, ImageDataset] = None if file_extension in pdf_extensions: ds = PymuDocDataset(file_bytes) elif file_extension in office_extensions: