feat: remove outdated dependencies from pipeline in pyproject.toml

This commit is contained in:
myhloli
2026-03-20 02:19:35 +08:00
parent fa3bc49f3a
commit 07701638ed
4 changed files with 0 additions and 237 deletions

View File

@@ -1,119 +0,0 @@
import os
from typing import List, Dict, Union
from doclayout_yolo import YOLOv10
from tqdm import tqdm
import numpy as np
from PIL import Image, ImageDraw
from mineru.utils.enum_class import ModelPath
from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
class DocLayoutYOLOModel:
def __init__(
self,
weight: str,
device: str = "cuda",
imgsz: int = 1280,
conf: float = 0.1,
iou: float = 0.45,
):
self.model = YOLOv10(weight).to(device)
self.device = device
self.imgsz = imgsz
self.conf = conf
self.iou = iou
def _parse_prediction(self, prediction) -> List[Dict]:
layout_res = []
# 容错处理
if not hasattr(prediction, "boxes") or prediction.boxes is None:
return layout_res
for xyxy, conf, cls in zip(
prediction.boxes.xyxy.cpu(),
prediction.boxes.conf.cpu(),
prediction.boxes.cls.cpu(),
):
coords = list(map(int, xyxy.tolist()))
xmin, ymin, xmax, ymax = coords
layout_res.append({
"category_id": int(cls.item()),
"poly": [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax],
"score": round(float(conf.item()), 3),
})
return layout_res
def predict(self, image: Union[np.ndarray, Image.Image]) -> List[Dict]:
prediction = self.model.predict(
image,
imgsz=self.imgsz,
conf=self.conf,
iou=self.iou,
verbose=False
)[0]
return self._parse_prediction(prediction)
def batch_predict(
self,
images: List[Union[np.ndarray, Image.Image]],
batch_size: int = 4
) -> List[List[Dict]]:
results = []
with tqdm(total=len(images), desc="Layout Predict") as pbar:
for idx in range(0, len(images), batch_size):
batch = images[idx: idx + batch_size]
if batch_size == 1:
conf = 0.9 * self.conf
else:
conf = self.conf
predictions = self.model.predict(
batch,
imgsz=self.imgsz,
conf=conf,
iou=self.iou,
verbose=False,
)
for pred in predictions:
results.append(self._parse_prediction(pred))
pbar.update(len(batch))
return results
def visualize(
self,
image: Union[np.ndarray, Image.Image],
results: List
) -> Image.Image:
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
draw = ImageDraw.Draw(image)
for res in results:
poly = res['poly']
xmin, ymin, xmax, ymax = poly[0], poly[1], poly[4], poly[5]
print(
f"Detected box: {xmin}, {ymin}, {xmax}, {ymax}, Category ID: {res['category_id']}, Score: {res['score']}")
# 使用PIL在图像上画框
draw.rectangle([xmin, ymin, xmax, ymax], outline="red", width=2)
# 在框旁边画置信度
draw.text((xmax + 10, ymin + 10), f"{res['score']:.2f}", fill="red", font_size=22)
return image
if __name__ == '__main__':
image_path = r"C:\Users\zhaoxiaomeng\Downloads\下载1.jpg"
doclayout_yolo_weights = os.path.join(auto_download_and_get_model_root_path(ModelPath.doclayout_yolo), ModelPath.doclayout_yolo)
device = 'cuda'
model = DocLayoutYOLOModel(
weight=doclayout_yolo_weights,
device=device,
)
image = Image.open(image_path)
results = model.predict(image)
image = model.visualize(image, results)
image.show() # 显示图像

View File

@@ -1 +0,0 @@
# Copyright (c) Opendatalab. All rights reserved.

View File

@@ -1,114 +0,0 @@
import os
from typing import List, Union
import torch
from tqdm import tqdm
from ultralytics import YOLO
import numpy as np
from PIL import Image, ImageDraw
from mineru.utils.enum_class import ModelPath
from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
class YOLOv8MFDModel:
def __init__(
self,
weight: str,
device: str = "cpu",
imgsz: int = 1888,
conf: float = 0.25,
iou: float = 0.45,
):
self.device = torch.device(device)
self.model = YOLO(weight).to(self.device)
self.imgsz = imgsz
self.conf = conf
self.iou = iou
def _run_predict(
self,
inputs: Union[np.ndarray, Image.Image, List],
is_batch: bool = False,
conf: float = None,
) -> List:
preds = self.model.predict(
inputs,
imgsz=self.imgsz,
conf=conf if conf is not None else self.conf,
iou=self.iou,
verbose=False,
device=self.device
)
return [pred.cpu() for pred in preds] if is_batch else preds[0].cpu()
def predict(
self,
image: Union[np.ndarray, Image.Image],
conf: float = None,
):
return self._run_predict(image, is_batch=False, conf=conf)
def batch_predict(
self,
images: List[Union[np.ndarray, Image.Image]],
batch_size: int = 4,
conf: float = None,
) -> List:
results = []
with tqdm(total=len(images), desc="MFD Predict") as pbar:
for idx in range(0, len(images), batch_size):
batch = images[idx: idx + batch_size]
batch_preds = self._run_predict(batch, is_batch=True, conf=conf)
results.extend(batch_preds)
pbar.update(len(batch))
return results
def visualize(
self,
image: Union[np.ndarray, Image.Image],
results: List
) -> Image.Image:
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
formula_list = []
for xyxy, conf, cla in zip(
results.boxes.xyxy.cpu(), results.boxes.conf.cpu(), results.boxes.cls.cpu()
):
xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy]
new_item = {
"category_id": 13 + int(cla.item()),
"poly": [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax],
"score": round(float(conf.item()), 2),
}
formula_list.append(new_item)
draw = ImageDraw.Draw(image)
for res in formula_list:
poly = res['poly']
xmin, ymin, xmax, ymax = poly[0], poly[1], poly[4], poly[5]
print(
f"Detected box: {xmin}, {ymin}, {xmax}, {ymax}, Category ID: {res['category_id']}, Score: {res['score']}")
# 使用PIL在图像上画框
draw.rectangle([xmin, ymin, xmax, ymax], outline="red", width=2)
# 在框旁边画置信度
draw.text((xmax + 10, ymin + 10), f"{res['score']:.2f}", fill="red", font_size=22)
return image
if __name__ == '__main__':
image_path = r"C:\Users\zhaoxiaomeng\Downloads\screenshot-20250821-192948.png"
yolo_v8_mfd_weights = os.path.join(auto_download_and_get_model_root_path(ModelPath.yolo_v8_mfd),
ModelPath.yolo_v8_mfd)
device = 'cuda'
model = YOLOv8MFDModel(
weight=yolo_v8_mfd_weights,
device=device,
)
image = Image.open(image_path)
results = model.predict(image)
image = model.visualize(image, results)
image.show() # 显示图像

View File

@@ -73,9 +73,6 @@ mlx = [
"mlx-vlm>=0.3.3,<0.4",
]
pipeline = [
"matplotlib>=3.10,<4",
"ultralytics>=8.3.48,<9",
"doclayout_yolo==0.0.4",
"dill>=0.3.8,<1",
"PyYAML>=6.0.1,<7",
"ftfy>=6.3.1,<7",