feat: remove outdated dependencies from pipeline in pyproject.toml

2026-03-27 11:08:32 +07:00 · 2026-03-20 02:19:35 +08:00
parent fa3bc49f3a
commit 07701638ed
4 changed files with 0 additions and 237 deletions
--- a/mineru/model/layout/doclayoutyolo.py
+++ b/mineru/model/layout/doclayoutyolo.py
@@ -1,119 +0,0 @@
-import os
-from typing import List, Dict, Union
-
-from doclayout_yolo import YOLOv10
-from tqdm import tqdm
-import numpy as np
-from PIL import Image, ImageDraw
-
-from mineru.utils.enum_class import ModelPath
-from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
-
-
-class DocLayoutYOLOModel:
-    def __init__(
-        self,
-        weight: str,
-        device: str = "cuda",
-        imgsz: int = 1280,
-        conf: float = 0.1,
-        iou: float = 0.45,
-    ):
-        self.model = YOLOv10(weight).to(device)
-        self.device = device
-        self.imgsz = imgsz
-        self.conf = conf
-        self.iou = iou
-
-    def _parse_prediction(self, prediction) -> List[Dict]:
-        layout_res = []
-
-        # 容错处理
-        if not hasattr(prediction, "boxes") or prediction.boxes is None:
-            return layout_res
-
-        for xyxy, conf, cls in zip(
-            prediction.boxes.xyxy.cpu(),
-            prediction.boxes.conf.cpu(),
-            prediction.boxes.cls.cpu(),
-        ):
-            coords = list(map(int, xyxy.tolist()))
-            xmin, ymin, xmax, ymax = coords
-            layout_res.append({
-                "category_id": int(cls.item()),
-                "poly": [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax],
-                "score": round(float(conf.item()), 3),
-            })
-        return layout_res
-
-    def predict(self, image: Union[np.ndarray, Image.Image]) -> List[Dict]:
-        prediction = self.model.predict(
-            image,
-            imgsz=self.imgsz,
-            conf=self.conf,
-            iou=self.iou,
-            verbose=False
-        )[0]
-        return self._parse_prediction(prediction)
-
-    def batch_predict(
-        self,
-        images: List[Union[np.ndarray, Image.Image]],
-        batch_size: int = 4
-    ) -> List[List[Dict]]:
-        results = []
-        with tqdm(total=len(images), desc="Layout Predict") as pbar:
-            for idx in range(0, len(images), batch_size):
-                batch = images[idx: idx + batch_size]
-                if batch_size == 1:
-                    conf = 0.9 * self.conf
-                else:
-                    conf = self.conf
-                predictions = self.model.predict(
-                    batch,
-                    imgsz=self.imgsz,
-                    conf=conf,
-                    iou=self.iou,
-                    verbose=False,
-                )
-                for pred in predictions:
-                    results.append(self._parse_prediction(pred))
-                pbar.update(len(batch))
-        return results
-
-    def visualize(
-            self,
-            image: Union[np.ndarray, Image.Image],
-            results: List
-    ) -> Image.Image:
-
-        if isinstance(image, np.ndarray):
-            image = Image.fromarray(image)
-
-        draw = ImageDraw.Draw(image)
-        for res in results:
-            poly = res['poly']
-            xmin, ymin, xmax, ymax = poly[0], poly[1], poly[4], poly[5]
-            print(
-                f"Detected box: {xmin}, {ymin}, {xmax}, {ymax}, Category ID: {res['category_id']}, Score: {res['score']}")
-            # 使用PIL在图像上画框
-            draw.rectangle([xmin, ymin, xmax, ymax], outline="red", width=2)
-            # 在框旁边画置信度
-            draw.text((xmax + 10, ymin + 10), f"{res['score']:.2f}", fill="red", font_size=22)
-        return image
-
-
-if __name__ == '__main__':
-    image_path = r"C:\Users\zhaoxiaomeng\Downloads\下载1.jpg"
-    doclayout_yolo_weights = os.path.join(auto_download_and_get_model_root_path(ModelPath.doclayout_yolo), ModelPath.doclayout_yolo)
-    device = 'cuda'
-    model = DocLayoutYOLOModel(
-        weight=doclayout_yolo_weights,
-        device=device,
-    )
-    image = Image.open(image_path)
-    results = model.predict(image)
-
-    image = model.visualize(image, results)
-
-    image.show()  # 显示图像
--- a/mineru/model/mfd/init.py
+++ b/mineru/model/mfd/init.py
@@ -1 +0,0 @@
-# Copyright (c) Opendatalab. All rights reserved.
--- a/mineru/model/mfd/yolo_v8.py
+++ b/mineru/model/mfd/yolo_v8.py
@@ -1,114 +0,0 @@
-import os
-from typing import List, Union
-
-import torch
-from tqdm import tqdm
-from ultralytics import YOLO
-import numpy as np
-from PIL import Image, ImageDraw
-
-from mineru.utils.enum_class import ModelPath
-from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
-
-
-class YOLOv8MFDModel:
-    def __init__(
-        self,
-        weight: str,
-        device: str = "cpu",
-        imgsz: int = 1888,
-        conf: float = 0.25,
-        iou: float = 0.45,
-    ):
-        self.device = torch.device(device)
-        self.model = YOLO(weight).to(self.device)
-        self.imgsz = imgsz
-        self.conf = conf
-        self.iou = iou
-
-    def _run_predict(
-        self,
-        inputs: Union[np.ndarray, Image.Image, List],
-        is_batch: bool = False,
-        conf: float = None,
-    ) -> List:
-        preds = self.model.predict(
-            inputs,
-            imgsz=self.imgsz,
-            conf=conf if conf is not None else self.conf,
-            iou=self.iou,
-            verbose=False,
-            device=self.device
-        )
-        return [pred.cpu() for pred in preds] if is_batch else preds[0].cpu()
-
-    def predict(
-            self,
-            image: Union[np.ndarray, Image.Image],
-            conf: float = None,
-    ):
-        return self._run_predict(image, is_batch=False, conf=conf)
-
-    def batch_predict(
-        self,
-        images: List[Union[np.ndarray, Image.Image]],
-        batch_size: int = 4,
-        conf: float = None,
-    ) -> List:
-        results = []
-        with tqdm(total=len(images), desc="MFD Predict") as pbar:
-            for idx in range(0, len(images), batch_size):
-                batch = images[idx: idx + batch_size]
-                batch_preds = self._run_predict(batch, is_batch=True, conf=conf)
-                results.extend(batch_preds)
-                pbar.update(len(batch))
-        return results
-
-    def visualize(
-        self,
-        image: Union[np.ndarray, Image.Image],
-        results: List
-    ) -> Image.Image:
-
-        if isinstance(image, np.ndarray):
-            image = Image.fromarray(image)
-
-        formula_list = []
-        for xyxy, conf, cla in zip(
-                results.boxes.xyxy.cpu(), results.boxes.conf.cpu(), results.boxes.cls.cpu()
-        ):
-            xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy]
-            new_item = {
-                "category_id": 13 + int(cla.item()),
-                "poly": [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax],
-                "score": round(float(conf.item()), 2),
-            }
-            formula_list.append(new_item)
-
-        draw = ImageDraw.Draw(image)
-        for res in formula_list:
-            poly = res['poly']
-            xmin, ymin, xmax, ymax = poly[0], poly[1], poly[4], poly[5]
-            print(
-                f"Detected box: {xmin}, {ymin}, {xmax}, {ymax}, Category ID: {res['category_id']}, Score: {res['score']}")
-            # 使用PIL在图像上画框
-            draw.rectangle([xmin, ymin, xmax, ymax], outline="red", width=2)
-            # 在框旁边画置信度
-            draw.text((xmax + 10, ymin + 10), f"{res['score']:.2f}", fill="red", font_size=22)
-        return image
-
-if __name__ == '__main__':
-    image_path = r"C:\Users\zhaoxiaomeng\Downloads\screenshot-20250821-192948.png"
-    yolo_v8_mfd_weights = os.path.join(auto_download_and_get_model_root_path(ModelPath.yolo_v8_mfd),
-                                          ModelPath.yolo_v8_mfd)
-    device = 'cuda'
-    model = YOLOv8MFDModel(
-        weight=yolo_v8_mfd_weights,
-        device=device,
-    )
-    image = Image.open(image_path)
-    results = model.predict(image)
-
-    image = model.visualize(image, results)
-
-    image.show()  # 显示图像
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -73,9 +73,6 @@ mlx = [
    "mlx-vlm>=0.3.3,<0.4",
 ]
 pipeline = [
-    "matplotlib>=3.10,<4",
-    "ultralytics>=8.3.48,<9",
-    "doclayout_yolo==0.0.4",
    "dill>=0.3.8,<1",
    "PyYAML>=6.0.1,<7",
    "ftfy>=6.3.1,<7",
				`@@ -1 +0,0 @@`
				`# Copyright (c) Opendatalab. All rights reserved.`