diff --git a/.gitignore b/.gitignore index b6ab4538..be92906a 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,6 @@ debug_utils/ # sphinx docs _build/ + + +output/ \ No newline at end of file diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 09a7b6b1..974d040f 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -10,7 +10,7 @@ formats: python: install: - - requirements: next_docs/zh_cn/requirements.txt + - requirements: docs/zh_cn/requirements.txt sphinx: - configuration: next_docs/zh_cn/conf.py + configuration: docs/zh_cn/conf.py diff --git a/README.md b/README.md index 6ad661a1..5dcb386a 100644 --- a/README.md +++ b/README.md @@ -110,9 +110,7 @@ https://github.com/user-attachments/assets/4bea02c9-6d54-4cd6-97ed-dff14340982c ## Quick Start -If you encounter any installation issues, please first consult the FAQ. -If the parsing results are not as expected, refer to the Known Issues. -There are three different ways to experience MinerU: +There are multiple different ways to experience MinerU: - [Online Demo (No Installation Required)](#online-demo) - [Quick CPU Demo (Windows, Linux, Mac)](#quick-cpu-demo) @@ -172,7 +170,6 @@ You can modify certain configurations in this file to enable or disable features } ``` - ## Usage ### API diff --git a/README_zh-CN.md b/README_zh-CN.md index 2918bb5e..2c69307e 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -76,12 +76,10 @@
| 操作系统 | -|||||
| Ubuntu 22.04 LTS | -Windows 10 / 11 | -macOS 11+ | -|||
| CPU | -x86_64(暂不支持ARM Linux) | -x86_64(暂不支持ARM Windows) | -x86_64 / arm64 | -||
| 内存 | -大于等于16GB,推荐32G以上 | -||||
| python版本 | -3.10 (请务必通过conda创建3.10虚拟环境) | -||||
| Nvidia Driver 版本 | -latest(专有驱动) | -latest | -None | -||
| CUDA环境 | -自动安装[12.1(pytorch)+11.8(paddle)] | -11.8(手动安装)+cuDNN v8.7.0(手动安装) | -None | -||
| GPU硬件支持列表 | -最低要求 8G+显存 | -3060ti/3070/4060 - 8G显存可开启layout、公式识别和ocr加速 |
- None | -||
| 推荐配置 10G+显存 | -3080/3080ti/3090/3090ti/4070/4070ti/4070tisuper/4080/4090 - 10G显存及以上可以同时开启layout、公式识别和ocr加速和表格识别加速 - |
- ||||
+ 一站式、高质量的开源文档提取工具 + +
+ + + + +项目介绍 +-------------------- + +MinerU是一款将PDF转化为机器可读格式的工具(如markdown、json),可以很方便地抽取为任意格式。 +MinerU诞生于\ `书生-浦语| 操作系统 | +|||||
| Ubuntu 22.04 LTS | +Windows 10 / 11 | +macOS 11+ | +|||
| CPU | +x86_64(暂不支持ARM Linux) | +x86_64(暂不支持ARM Windows) | +x86_64 / arm64 | +||
| 内存 | +大于等于16GB,推荐32G以上 | +||||
| python版本 | +3.10 (请务必通过conda创建3.10虚拟环境) | +||||
| Nvidia Driver 版本 | +latest(专有驱动) | +latest | +None | +||
| CUDA环境 | +自动安装[12.1(pytorch)+11.8(paddle)] | +11.8(手动安装)+cuDNN v8.7.0(手动安装) | +None | +||
| GPU硬件支持列表 | +最低要求 8G+显存 | +3060ti/3070/4060 + 8G显存可开启layout、公式识别和ocr加速 |
+ None | +||
| 推荐配置 10G+显存 | +3080/3080ti/3090/3090ti/4070/4070ti/4070tisuper/4080/4090 + 10G显存及以上可以同时开启layout、公式识别和ocr加速和表格识别加速 + |
+ ||||
- 一站式开源高质量数据提取工具 - -
- - diff --git a/docs/FAQ_en_us.md b/old_docs/FAQ_en_us.md similarity index 100% rename from docs/FAQ_en_us.md rename to old_docs/FAQ_en_us.md diff --git a/docs/FAQ_zh_cn.md b/old_docs/FAQ_zh_cn.md similarity index 100% rename from docs/FAQ_zh_cn.md rename to old_docs/FAQ_zh_cn.md diff --git a/docs/README_Ubuntu_CUDA_Acceleration_en_US.md b/old_docs/README_Ubuntu_CUDA_Acceleration_en_US.md similarity index 100% rename from docs/README_Ubuntu_CUDA_Acceleration_en_US.md rename to old_docs/README_Ubuntu_CUDA_Acceleration_en_US.md diff --git a/docs/README_Ubuntu_CUDA_Acceleration_zh_CN.md b/old_docs/README_Ubuntu_CUDA_Acceleration_zh_CN.md similarity index 100% rename from docs/README_Ubuntu_CUDA_Acceleration_zh_CN.md rename to old_docs/README_Ubuntu_CUDA_Acceleration_zh_CN.md diff --git a/docs/README_Windows_CUDA_Acceleration_en_US.md b/old_docs/README_Windows_CUDA_Acceleration_en_US.md similarity index 100% rename from docs/README_Windows_CUDA_Acceleration_en_US.md rename to old_docs/README_Windows_CUDA_Acceleration_en_US.md diff --git a/docs/README_Windows_CUDA_Acceleration_zh_CN.md b/old_docs/README_Windows_CUDA_Acceleration_zh_CN.md similarity index 100% rename from docs/README_Windows_CUDA_Acceleration_zh_CN.md rename to old_docs/README_Windows_CUDA_Acceleration_zh_CN.md diff --git a/docs/chemical_knowledge_introduction/introduction.pdf b/old_docs/chemical_knowledge_introduction/introduction.pdf similarity index 100% rename from docs/chemical_knowledge_introduction/introduction.pdf rename to old_docs/chemical_knowledge_introduction/introduction.pdf diff --git a/docs/chemical_knowledge_introduction/introduction.xmind b/old_docs/chemical_knowledge_introduction/introduction.xmind similarity index 100% rename from docs/chemical_knowledge_introduction/introduction.xmind rename to old_docs/chemical_knowledge_introduction/introduction.xmind diff --git a/docs/download_models.py b/old_docs/download_models.py similarity index 100% rename from docs/download_models.py rename to old_docs/download_models.py diff --git a/docs/download_models_hf.py b/old_docs/download_models_hf.py similarity index 100% rename from docs/download_models_hf.py rename to old_docs/download_models_hf.py diff --git a/docs/how_to_download_models_en.md b/old_docs/how_to_download_models_en.md similarity index 100% rename from docs/how_to_download_models_en.md rename to old_docs/how_to_download_models_en.md diff --git a/docs/how_to_download_models_zh_cn.md b/old_docs/how_to_download_models_zh_cn.md similarity index 100% rename from docs/how_to_download_models_zh_cn.md rename to old_docs/how_to_download_models_zh_cn.md diff --git a/old_docs/images/MinerU-logo-hq.png b/old_docs/images/MinerU-logo-hq.png new file mode 100644 index 00000000..7b33fcda Binary files /dev/null and b/old_docs/images/MinerU-logo-hq.png differ diff --git a/old_docs/images/MinerU-logo.png b/old_docs/images/MinerU-logo.png new file mode 100644 index 00000000..2e6fdf3a Binary files /dev/null and b/old_docs/images/MinerU-logo.png differ diff --git a/old_docs/images/datalab_logo.png b/old_docs/images/datalab_logo.png new file mode 100644 index 00000000..5019ae7c Binary files /dev/null and b/old_docs/images/datalab_logo.png differ diff --git a/old_docs/images/flowchart_en.png b/old_docs/images/flowchart_en.png new file mode 100644 index 00000000..b490011e Binary files /dev/null and b/old_docs/images/flowchart_en.png differ diff --git a/old_docs/images/flowchart_zh_cn.png b/old_docs/images/flowchart_zh_cn.png new file mode 100644 index 00000000..32e0a142 Binary files /dev/null and b/old_docs/images/flowchart_zh_cn.png differ diff --git a/old_docs/images/layout_example.png b/old_docs/images/layout_example.png new file mode 100644 index 00000000..14e2116f Binary files /dev/null and b/old_docs/images/layout_example.png differ diff --git a/old_docs/images/poly.png b/old_docs/images/poly.png new file mode 100644 index 00000000..14af7726 Binary files /dev/null and b/old_docs/images/poly.png differ diff --git a/old_docs/images/project_panorama_en.png b/old_docs/images/project_panorama_en.png new file mode 100644 index 00000000..19616da6 Binary files /dev/null and b/old_docs/images/project_panorama_en.png differ diff --git a/old_docs/images/project_panorama_zh_cn.png b/old_docs/images/project_panorama_zh_cn.png new file mode 100644 index 00000000..3cd6843e Binary files /dev/null and b/old_docs/images/project_panorama_zh_cn.png differ diff --git a/old_docs/images/spans_example.png b/old_docs/images/spans_example.png new file mode 100644 index 00000000..14de87ed Binary files /dev/null and b/old_docs/images/spans_example.png differ diff --git a/old_docs/images/web_demo_1.png b/old_docs/images/web_demo_1.png new file mode 100644 index 00000000..04adff26 Binary files /dev/null and b/old_docs/images/web_demo_1.png differ diff --git a/docs/output_file_en_us.md b/old_docs/output_file_en_us.md similarity index 100% rename from docs/output_file_en_us.md rename to old_docs/output_file_en_us.md diff --git a/docs/output_file_zh_cn.md b/old_docs/output_file_zh_cn.md similarity index 100% rename from docs/output_file_zh_cn.md rename to old_docs/output_file_zh_cn.md