mirror of
https://github.com/opendatalab/MinerU.git
synced 2026-03-27 19:18:34 +07:00
Compare commits
38 Commits
release-2.
...
release-2.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e853563182 | ||
|
|
238bf86e6f | ||
|
|
e387233c7d | ||
|
|
868a7a5402 | ||
|
|
9b28ed8a7a | ||
|
|
5fe068d441 | ||
|
|
cdd7bef996 | ||
|
|
4156a2b89d | ||
|
|
2c702890a4 | ||
|
|
3c8385c2c6 | ||
|
|
d29cf4e076 | ||
|
|
ec85af39dc | ||
|
|
b40c432741 | ||
|
|
1cd683b944 | ||
|
|
6162ae2be1 | ||
|
|
fa9aaaa7b7 | ||
|
|
ac5db5d455 | ||
|
|
0031981e60 | ||
|
|
c47faa4d4f | ||
|
|
5c579d8919 | ||
|
|
e8865a679a | ||
|
|
68e8a00d8b | ||
|
|
c0cf62e4cc | ||
|
|
21ff17a65d | ||
|
|
cdf6e0cfd0 | ||
|
|
ec3adde809 | ||
|
|
d58b24b5dd | ||
|
|
bd5252d946 | ||
|
|
b398a2d2b8 | ||
|
|
bfaf07c69f | ||
|
|
c8904da6d3 | ||
|
|
3854bd0fa0 | ||
|
|
38dfe835e4 | ||
|
|
5b26a38726 | ||
|
|
80b5e4fe8a | ||
|
|
45a282fa4e | ||
|
|
e9175b1937 | ||
|
|
8dae3ff1ad |
7
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
7
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
@@ -109,14 +109,11 @@ body:
|
||||
- type: dropdown
|
||||
id: software_version
|
||||
attributes:
|
||||
label: Software version | 软件版本 (magic-pdf --version)
|
||||
label: Software version | 软件版本 (mineru --version)
|
||||
#multiple: false
|
||||
options:
|
||||
-
|
||||
- "1.0.x"
|
||||
- "1.1.x"
|
||||
- "1.2.x"
|
||||
- "1.3.x"
|
||||
- "2.0.x"
|
||||
validations:
|
||||
required: true
|
||||
|
||||
|
||||
21
README.md
21
README.md
@@ -10,16 +10,13 @@
|
||||
[](https://github.com/opendatalab/MinerU)
|
||||
[](https://github.com/opendatalab/MinerU/issues)
|
||||
[](https://github.com/opendatalab/MinerU/issues)
|
||||
|
||||
[](https://pypi.org/project/mineru/)
|
||||
[](https://pypi.org/project/mineru/)
|
||||
[](https://pepy.tech/project/mineru)
|
||||
[](https://pepy.tech/project/mineru)
|
||||
|
||||
[](https://mineru.net/OpenSourceTools/Extractor?source=github)
|
||||
[](https://huggingface.co/spaces/opendatalab/MinerU)
|
||||
[](https://www.modelscope.cn/studios/OpenDataLab/MinerU)
|
||||
|
||||
[](https://huggingface.co/spaces/opendatalab/mineru2)
|
||||
[](https://colab.research.google.com/gist/myhloli/3b3a00a4a0a61577b6c30f989092d20d/mineru_demo.ipynb)
|
||||
[](https://arxiv.org/abs/2409.18839)
|
||||
@@ -51,8 +48,12 @@ Easier to use: Just grab MinerU Desktop. No coding, no login, just a simple inte
|
||||
</div>
|
||||
|
||||
# Changelog
|
||||
- 2025/06/17 2.0.4 Released
|
||||
- 2025/06/20 2.0.6 Released
|
||||
- Fixed occasional parsing interruptions caused by invalid block content in `vlm` mode
|
||||
- Fixed parsing interruptions caused by incomplete table structures in `vlm` mode
|
||||
- 2025/06/17 2.0.5 Released
|
||||
- Fixed the issue where models were still required to be downloaded in the `sglang-client` mode
|
||||
- Fixed the issue where the `sglang-client` mode unnecessarily depended on packages like `torch` during runtime.
|
||||
- Fixed the issue where only the first instance would take effect when attempting to launch multiple `sglang-client` instances via multiple URLs within the same process
|
||||
- 2025/06/15 2.0.3 released
|
||||
- Fixed a configuration file key-value update error that occurred when downloading model type was set to `all`
|
||||
@@ -501,7 +502,11 @@ cd MinerU
|
||||
uv pip install -e .[core]
|
||||
```
|
||||
|
||||
#### 1.3 Install the Full Version (Supports sglang Acceleration)
|
||||
> [!TIP]
|
||||
> Linux and macOS systems automatically support CUDA/MPS acceleration after installation. For Windows users who want to use CUDA acceleration,
|
||||
> please visit the [PyTorch official website](https://pytorch.org/get-started/locally/) to install PyTorch with the appropriate CUDA version.
|
||||
|
||||
#### 1.3 Install Full Version (supports sglang acceleration) (requires device with Ampere or newer architecture and at least 24GB GPU memory)
|
||||
|
||||
If you need to use **sglang to accelerate VLM model inference**, you can choose any of the following methods to install the full version:
|
||||
|
||||
@@ -660,6 +665,12 @@ mineru -p <input_path> -o <output_path> -b vlm-sglang-engine
|
||||
mineru-sglang-server --port 30000
|
||||
```
|
||||
|
||||
> [!TIP]
|
||||
> sglang acceleration requires a GPU with Ampere architecture or newer, and at least 24GB VRAM. If you have two 12GB or 16GB GPUs, you can use Tensor Parallelism (TP) mode:
|
||||
> `mineru-sglang-server --port 30000 --tp 2`
|
||||
>
|
||||
> If you still encounter out-of-memory errors with two GPUs, or if you need to improve throughput or inference speed using multi-GPU parallelism, please refer to the [sglang official documentation](https://docs.sglang.ai/backend/server_arguments.html#common-launch-commands).
|
||||
|
||||
2. Use Client in another terminal:
|
||||
|
||||
```bash
|
||||
|
||||
@@ -10,16 +10,13 @@
|
||||
[](https://github.com/opendatalab/MinerU)
|
||||
[](https://github.com/opendatalab/MinerU/issues)
|
||||
[](https://github.com/opendatalab/MinerU/issues)
|
||||
|
||||
[](https://pypi.org/project/mineru/)
|
||||
[](https://pypi.org/project/mineru/)
|
||||
[](https://pepy.tech/project/mineru)
|
||||
[](https://pepy.tech/project/mineru)
|
||||
|
||||
[](https://mineru.net/OpenSourceTools/Extractor?source=github)
|
||||
[](https://www.modelscope.cn/studios/OpenDataLab/MinerU)
|
||||
[](https://huggingface.co/spaces/opendatalab/MinerU)
|
||||
|
||||
[](https://huggingface.co/spaces/opendatalab/mineru2)
|
||||
[](https://colab.research.google.com/gist/myhloli/3b3a00a4a0a61577b6c30f989092d20d/mineru_demo.ipynb)
|
||||
[](https://arxiv.org/abs/2409.18839)
|
||||
@@ -50,8 +47,12 @@
|
||||
</div>
|
||||
|
||||
# 更新记录
|
||||
- 2025/06/17 2.0.4发布
|
||||
- 2025/06/20 2.0.6发布
|
||||
- 修复`vlm`模式下,某些偶发的无效块内容导致解析中断问题
|
||||
- 修复`vlm`模式下,某些不完整的表结构导致的解析中断问题
|
||||
- 2025/06/17 2.0.5发布
|
||||
- 修复了`sglang-client`模式下依然需要下载模型的问题
|
||||
- 修复了`sglang-client`模式需要依赖`torch`等实际运行不需要的包的问题
|
||||
- 修复了同一进程内尝试通过多个url启动多个`sglang-client`实例时,只有第一个生效的问题
|
||||
- 2025/06/15 2.0.3发布
|
||||
- 修复了当下载模型类型设置为`all`时,配置文件出现键值更新错误的问题
|
||||
@@ -491,7 +492,11 @@ cd MinerU
|
||||
uv pip install -e .[core] -i https://mirrors.aliyun.com/pypi/simple
|
||||
```
|
||||
|
||||
#### 1.3 安装完整版(支持 sglang 加速)
|
||||
> [!TIP]
|
||||
> Linux和macOS系统安装后自动支持cuda/mps加速,Windows用户如需使用cuda加速,
|
||||
> 请前往 [Pytorch官网](https://pytorch.org/get-started/locally/) 选择合适的cuda版本安装pytorch。
|
||||
|
||||
#### 1.3 安装完整版(支持 sglang 加速)(需确保设备有Ampere及以后架构,24G显存及以上显卡)
|
||||
|
||||
如需使用 **sglang 加速 VLM 模型推理**,请选择合适的方式安装完整版本:
|
||||
|
||||
@@ -649,6 +654,12 @@ mineru -p <input_path> -o <output_path> -b vlm-sglang-engine
|
||||
mineru-sglang-server --port 30000
|
||||
```
|
||||
|
||||
> [!TIP]
|
||||
> sglang加速需设备有Ampere及以后架构,24G显存及以上显卡,如您有两张12G或16G显卡,可以通过张量并行(TP)模式使用:
|
||||
> `mineru-sglang-server --port 30000 --tp 2`
|
||||
>
|
||||
> 如使用两张卡仍出现显存不足错误或需要使用多卡并行增加吞吐量或推理速度,请参考 [sglang官方文档](https://docs.sglang.ai/backend/server_arguments.html#common-launch-commands)
|
||||
|
||||
2. 在另一个终端中使用 Client 调用:
|
||||
|
||||
```bash
|
||||
|
||||
@@ -2,15 +2,12 @@ import os
|
||||
import time
|
||||
from typing import List, Tuple
|
||||
import PIL.Image
|
||||
import torch
|
||||
from loguru import logger
|
||||
|
||||
from .model_init import MineruPipelineModel
|
||||
from mineru.utils.config_reader import get_device
|
||||
from ...utils.pdf_classify import classify
|
||||
from ...utils.pdf_image_tools import load_images_from_pdf
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from ...utils.model_utils import get_vram, clean_memory
|
||||
|
||||
|
||||
@@ -166,7 +163,7 @@ def batch_image_analyze(
|
||||
try:
|
||||
import torch_npu
|
||||
if torch_npu.npu.is_available():
|
||||
torch.npu.set_compile_mode(jit_compile=False)
|
||||
torch_npu.npu.set_compile_mode(jit_compile=False)
|
||||
except Exception as e:
|
||||
raise RuntimeError(
|
||||
"NPU is selected as device, but torch_npu is not available. "
|
||||
|
||||
@@ -8,7 +8,6 @@ from mineru.utils.pdf_image_tools import load_images_from_pdf
|
||||
from .base_predictor import BasePredictor
|
||||
from .predictor import get_predictor
|
||||
from .token_to_middle_json import result_to_middle_json
|
||||
from ...utils.enum_class import ModelPath
|
||||
from ...utils.models_download_utils import auto_download_and_get_model_root_path
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import re
|
||||
from typing import Literal
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from mineru.utils.boxbase import bbox_distance, is_in
|
||||
from mineru.utils.enum_class import ContentType, BlockType, SplitFlag
|
||||
from mineru.backend.vlm.vlm_middle_json_mkcontent import merge_para_with_text
|
||||
@@ -22,25 +24,30 @@ class MagicModel:
|
||||
# 解析每个块
|
||||
for index, block_info in enumerate(block_infos):
|
||||
block_bbox = block_info[0].strip()
|
||||
x1, y1, x2, y2 = map(int, block_bbox.split())
|
||||
x_1, y_1, x_2, y_2 = (
|
||||
int(x1 * width / 1000),
|
||||
int(y1 * height / 1000),
|
||||
int(x2 * width / 1000),
|
||||
int(y2 * height / 1000),
|
||||
)
|
||||
if x_2 < x_1:
|
||||
x_1, x_2 = x_2, x_1
|
||||
if y_2 < y_1:
|
||||
y_1, y_2 = y_2, y_1
|
||||
block_bbox = (x_1, y_1, x_2, y_2)
|
||||
block_type = block_info[1].strip()
|
||||
block_content = block_info[2].strip()
|
||||
try:
|
||||
x1, y1, x2, y2 = map(int, block_bbox.split())
|
||||
x_1, y_1, x_2, y_2 = (
|
||||
int(x1 * width / 1000),
|
||||
int(y1 * height / 1000),
|
||||
int(x2 * width / 1000),
|
||||
int(y2 * height / 1000),
|
||||
)
|
||||
if x_2 < x_1:
|
||||
x_1, x_2 = x_2, x_1
|
||||
if y_2 < y_1:
|
||||
y_1, y_2 = y_2, y_1
|
||||
block_bbox = (x_1, y_1, x_2, y_2)
|
||||
block_type = block_info[1].strip()
|
||||
block_content = block_info[2].strip()
|
||||
|
||||
# print(f"坐标: {block_bbox}")
|
||||
# print(f"类型: {block_type}")
|
||||
# print(f"内容: {block_content}")
|
||||
# print("-" * 50)
|
||||
# print(f"坐标: {block_bbox}")
|
||||
# print(f"类型: {block_type}")
|
||||
# print(f"内容: {block_content}")
|
||||
# print("-" * 50)
|
||||
except Exception as e:
|
||||
# 如果解析失败,可能是因为格式不正确,跳过这个块
|
||||
logger.warning(f"Invalid block format: {block_info}, error: {e}")
|
||||
continue
|
||||
|
||||
span_type = "unknown"
|
||||
if block_type in [
|
||||
|
||||
@@ -9,7 +9,6 @@ from mineru.utils.model_utils import get_vram
|
||||
from ..version import __version__
|
||||
from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.version_option(__version__,
|
||||
'--version',
|
||||
@@ -139,25 +138,26 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
|
||||
|
||||
def main(input_path, output_dir, method, backend, lang, server_url, start_page_id, end_page_id, formula_enable, table_enable, device_mode, virtual_vram, model_source):
|
||||
|
||||
def get_device_mode() -> str:
|
||||
if device_mode is not None:
|
||||
return device_mode
|
||||
else:
|
||||
return get_device()
|
||||
if os.getenv('MINERU_DEVICE_MODE', None) is None:
|
||||
os.environ['MINERU_DEVICE_MODE'] = get_device_mode()
|
||||
if not backend.endswith('-client'):
|
||||
def get_device_mode() -> str:
|
||||
if device_mode is not None:
|
||||
return device_mode
|
||||
else:
|
||||
return get_device()
|
||||
if os.getenv('MINERU_DEVICE_MODE', None) is None:
|
||||
os.environ['MINERU_DEVICE_MODE'] = get_device_mode()
|
||||
|
||||
def get_virtual_vram_size() -> int:
|
||||
if virtual_vram is not None:
|
||||
return virtual_vram
|
||||
if get_device_mode().startswith("cuda") or get_device_mode().startswith("npu"):
|
||||
return round(get_vram(get_device_mode()))
|
||||
return 1
|
||||
if os.getenv('MINERU_VIRTUAL_VRAM_SIZE', None) is None:
|
||||
os.environ['MINERU_VIRTUAL_VRAM_SIZE']= str(get_virtual_vram_size())
|
||||
def get_virtual_vram_size() -> int:
|
||||
if virtual_vram is not None:
|
||||
return virtual_vram
|
||||
if get_device_mode().startswith("cuda") or get_device_mode().startswith("npu"):
|
||||
return round(get_vram(get_device_mode()))
|
||||
return 1
|
||||
if os.getenv('MINERU_VIRTUAL_VRAM_SIZE', None) is None:
|
||||
os.environ['MINERU_VIRTUAL_VRAM_SIZE']= str(get_virtual_vram_size())
|
||||
|
||||
if os.getenv('MINERU_MODEL_SOURCE', None) is None:
|
||||
os.environ['MINERU_MODEL_SOURCE'] = model_source
|
||||
if os.getenv('MINERU_MODEL_SOURCE', None) is None:
|
||||
os.environ['MINERU_MODEL_SOURCE'] = model_source
|
||||
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
|
||||
@@ -8,15 +8,12 @@ from pathlib import Path
|
||||
import pypdfium2 as pdfium
|
||||
from loguru import logger
|
||||
|
||||
from mineru.backend.pipeline.pipeline_middle_json_mkcontent import union_make as pipeline_union_make
|
||||
from mineru.backend.pipeline.model_json_to_middle_json import result_to_middle_json as pipeline_result_to_middle_json
|
||||
from mineru.backend.vlm.vlm_middle_json_mkcontent import union_make as vlm_union_make
|
||||
from mineru.backend.vlm.vlm_analyze import doc_analyze as vlm_doc_analyze
|
||||
from mineru.backend.pipeline.pipeline_analyze import doc_analyze as pipeline_doc_analyze
|
||||
from mineru.data.data_reader_writer import FileBasedDataWriter
|
||||
from mineru.utils.draw_bbox import draw_layout_bbox, draw_span_bbox
|
||||
from mineru.utils.enum_class import MakeMode
|
||||
from mineru.utils.pdf_image_tools import images_bytes_to_pdf_bytes
|
||||
from mineru.backend.vlm.vlm_middle_json_mkcontent import union_make as vlm_union_make
|
||||
from mineru.backend.vlm.vlm_analyze import doc_analyze as vlm_doc_analyze
|
||||
|
||||
pdf_suffixes = [".pdf"]
|
||||
image_suffixes = [".png", ".jpeg", ".jpg"]
|
||||
@@ -99,6 +96,11 @@ def do_parse(
|
||||
):
|
||||
|
||||
if backend == "pipeline":
|
||||
|
||||
from mineru.backend.pipeline.pipeline_middle_json_mkcontent import union_make as pipeline_union_make
|
||||
from mineru.backend.pipeline.model_json_to_middle_json import result_to_middle_json as pipeline_result_to_middle_json
|
||||
from mineru.backend.pipeline.pipeline_analyze import doc_analyze as pipeline_doc_analyze
|
||||
|
||||
for idx, pdf_bytes in enumerate(pdf_bytes_list):
|
||||
new_pdf_bytes = convert_pdf_bytes_to_bytes_by_pypdfium2(pdf_bytes, start_page_id, end_page_id)
|
||||
pdf_bytes_list[idx] = new_pdf_bytes
|
||||
@@ -163,6 +165,7 @@ def do_parse(
|
||||
|
||||
logger.info(f"local output dir is {local_md_dir}")
|
||||
else:
|
||||
|
||||
if backend.startswith("vlm-"):
|
||||
backend = backend[4:]
|
||||
|
||||
|
||||
@@ -58,7 +58,7 @@ class PytorchPaddleOCR(TextSystem):
|
||||
|
||||
device = get_device()
|
||||
if device == 'cpu' and self.lang in ['ch', 'ch_server', 'japan', 'chinese_cht']:
|
||||
logger.warning("The current device in use is CPU. To ensure the speed of parsing, the language is automatically switched to ch_lite.")
|
||||
# logger.warning("The current device in use is CPU. To ensure the speed of parsing, the language is automatically switched to ch_lite.")
|
||||
self.lang = 'ch_lite'
|
||||
|
||||
if self.lang in latin_lang:
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import os
|
||||
import html
|
||||
import cv2
|
||||
import numpy as np
|
||||
from loguru import logger
|
||||
@@ -8,6 +9,11 @@ from mineru.utils.enum_class import ModelPath
|
||||
from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
|
||||
|
||||
|
||||
def escape_html(input_string):
|
||||
"""Escape HTML Entities."""
|
||||
return html.escape(input_string)
|
||||
|
||||
|
||||
class RapidTableModel(object):
|
||||
def __init__(self, ocr_engine):
|
||||
slanet_plus_model_path = os.path.join(auto_download_and_get_model_root_path(ModelPath.slanet_plus), ModelPath.slanet_plus)
|
||||
@@ -63,7 +69,7 @@ class RapidTableModel(object):
|
||||
# Continue with OCR on potentially rotated image
|
||||
ocr_result = self.ocr_engine.ocr(bgr_image)[0]
|
||||
if ocr_result:
|
||||
ocr_result = [[item[0], item[1][0], item[1][1]] for item in ocr_result if
|
||||
ocr_result = [[item[0], escape_html(item[1][0]), item[1][1]] for item in ocr_result if
|
||||
len(item) == 2 and isinstance(item[1], tuple)]
|
||||
else:
|
||||
ocr_result = None
|
||||
|
||||
@@ -62,7 +62,7 @@ class Mineru2QwenForCausalLM(nn.Module):
|
||||
|
||||
# load vision tower
|
||||
mm_vision_tower = self.config.mm_vision_tower
|
||||
model_root_path = auto_download_and_get_model_root_path("/", "vlm")
|
||||
model_root_path = auto_download_and_get_model_root_path(mm_vision_tower, "vlm")
|
||||
mm_vision_tower = f"{model_root_path}/{mm_vision_tower}"
|
||||
|
||||
if "clip" in mm_vision_tower:
|
||||
|
||||
@@ -1,10 +1,15 @@
|
||||
# Copyright (c) Opendatalab. All rights reserved.
|
||||
import json
|
||||
import os
|
||||
|
||||
import torch
|
||||
from loguru import logger
|
||||
|
||||
try:
|
||||
import torch
|
||||
import torch_npu
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
# 定义配置文件名常量
|
||||
CONFIG_FILE_NAME = os.getenv('MINERU_TOOLS_CONFIG_JSON', 'mineru.json')
|
||||
|
||||
@@ -78,7 +83,6 @@ def get_device():
|
||||
return "mps"
|
||||
else:
|
||||
try:
|
||||
import torch_npu
|
||||
if torch_npu.npu.is_available():
|
||||
return "npu"
|
||||
except Exception as e:
|
||||
|
||||
@@ -132,6 +132,35 @@ def otsl_parse_texts(texts, tokens):
|
||||
r_idx = 0
|
||||
c_idx = 0
|
||||
|
||||
# Check and complete the matrix
|
||||
if split_row_tokens:
|
||||
max_cols = max(len(row) for row in split_row_tokens)
|
||||
|
||||
# Insert additional <ecel> to tags
|
||||
for row_idx, row in enumerate(split_row_tokens):
|
||||
while len(row) < max_cols:
|
||||
row.append(OTSL_ECEL)
|
||||
|
||||
# Insert additional <ecel> to texts
|
||||
new_texts = []
|
||||
text_idx = 0
|
||||
|
||||
for row_idx, row in enumerate(split_row_tokens):
|
||||
for col_idx, token in enumerate(row):
|
||||
new_texts.append(token)
|
||||
if text_idx < len(texts) and texts[text_idx] == token:
|
||||
text_idx += 1
|
||||
if (text_idx < len(texts) and
|
||||
texts[text_idx] not in [OTSL_NL, OTSL_FCEL, OTSL_ECEL, OTSL_LCEL, OTSL_UCEL, OTSL_XCEL]):
|
||||
new_texts.append(texts[text_idx])
|
||||
text_idx += 1
|
||||
|
||||
new_texts.append(OTSL_NL)
|
||||
if text_idx < len(texts) and texts[text_idx] == OTSL_NL:
|
||||
text_idx += 1
|
||||
|
||||
texts = new_texts
|
||||
|
||||
def count_right(tokens, c_idx, r_idx, which_tokens):
|
||||
span = 0
|
||||
c_idx_iter = c_idx
|
||||
@@ -235,10 +264,11 @@ def export_to_html(table_data: TableData):
|
||||
|
||||
body = ""
|
||||
|
||||
grid = table_data.grid
|
||||
for i in range(nrows):
|
||||
body += "<tr>"
|
||||
for j in range(ncols):
|
||||
cell: TableCell = table_data.grid[i][j]
|
||||
cell: TableCell = grid[i][j]
|
||||
|
||||
rowspan, rowstart = (
|
||||
cell.row_span,
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import time
|
||||
import torch
|
||||
import gc
|
||||
from PIL import Image
|
||||
from loguru import logger
|
||||
@@ -7,6 +6,12 @@ import numpy as np
|
||||
|
||||
from mineru.utils.boxbase import get_minbox_if_overlap_by_ratio
|
||||
|
||||
try:
|
||||
import torch
|
||||
import torch_npu
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
def crop_img(input_res, input_img, crop_paste_x=0, crop_paste_y=0):
|
||||
|
||||
@@ -303,7 +308,6 @@ def clean_memory(device='cuda'):
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.ipc_collect()
|
||||
elif str(device).startswith("npu"):
|
||||
import torch_npu
|
||||
if torch_npu.npu.is_available():
|
||||
torch_npu.npu.empty_cache()
|
||||
elif str(device).startswith("mps"):
|
||||
@@ -325,7 +329,6 @@ def get_vram(device):
|
||||
total_memory = torch.cuda.get_device_properties(device).total_memory / (1024 ** 3) # 将字节转换为 GB
|
||||
return total_memory
|
||||
elif str(device).startswith("npu"):
|
||||
import torch_npu
|
||||
if torch_npu.npu.is_available():
|
||||
total_memory = torch_npu.npu.get_device_properties(device).total_memory / (1024 ** 3) # 转为 GB
|
||||
return total_memory
|
||||
|
||||
@@ -57,8 +57,12 @@ def auto_download_and_get_model_root_path(relative_path: str, repo_mode='pipelin
|
||||
relative_path = relative_path.strip('/')
|
||||
cache_dir = snapshot_download(repo, allow_patterns=[relative_path, relative_path+"/*"])
|
||||
elif repo_mode == 'vlm':
|
||||
# VLM 模式下,直接下载整个模型目录
|
||||
cache_dir = snapshot_download(repo)
|
||||
# VLM 模式下,根据 relative_path 的不同处理方式
|
||||
if relative_path == "/":
|
||||
cache_dir = snapshot_download(repo)
|
||||
else:
|
||||
relative_path = relative_path.strip('/')
|
||||
cache_dir = snapshot_download(repo, allow_patterns=[relative_path, relative_path+"/*"])
|
||||
|
||||
if not cache_dir:
|
||||
raise FileNotFoundError(f"Failed to download model: {relative_path} from {repo}")
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = "2.0.3"
|
||||
__version__ = "2.0.5"
|
||||
|
||||
@@ -311,6 +311,22 @@
|
||||
"created_at": "2025-06-13T14:02:16Z",
|
||||
"repoId": 765083837,
|
||||
"pullRequestNo": 2634
|
||||
},
|
||||
{
|
||||
"name": "hotelll",
|
||||
"id": 45009029,
|
||||
"comment_id": 2978780331,
|
||||
"created_at": "2025-06-17T03:09:54Z",
|
||||
"repoId": 765083837,
|
||||
"pullRequestNo": 2676
|
||||
},
|
||||
{
|
||||
"name": "hsia",
|
||||
"id": 654127,
|
||||
"comment_id": 2979415817,
|
||||
"created_at": "2025-06-17T17:35:10Z",
|
||||
"repoId": 765083837,
|
||||
"pullRequestNo": 2699
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user