Compare commits

..

8 Commits

Author SHA1 Message Date
Xiaomeng Zhao
af53a46311 Merge pull request #2264 from myhloli/dev
refactor(office_to_pdf): simplify font checking and add logging
2025-04-17 11:29:20 +08:00
myhloli
2e5e55cfe2 refactor(office_to_pdf): simplify font checking and add logging
- Remove specific Chinese font list and detailed font checking
- Add logging warning if no Chinese fonts are detected
- Make font checking more robust and less platform-specific
2025-04-17 10:52:08 +08:00
myhloli
658e6bc768 refactor(utils): comment out Chinese font check on Windows
- Temporarily disable Chinese font check for Windows systems
- This change allows bypassing the font check when the required fonts are not present
2025-04-17 00:54:28 +08:00
myhloli
4641264e12 build(docker): update magic-pdf installation and add dependencies
- Update magic-pdf installation to include specific version with full dependencies
- Add numpy, decorator, attrs, absl-py, cloudpickle, ml-dtypes, tornado, and einops as separate packages
- Specify numpy version to be less than 2
2025-04-17 00:16:20 +08:00
Xiaomeng Zhao
4bd3381c92 Merge pull request #2256 from myhloli/dev
fix(test_table): update image path to use relative path
2025-04-16 18:24:37 +08:00
myhloli
f5a56bf157 fix(test_table): update image path to use relative path
- Replace hardcoded image path with dynamic path generation
- Use os.path.join to create platform-independent file paths
- Improve code maintainability and portability across different environments
2025-04-16 18:23:13 +08:00
Xiaomeng Zhao
78d11172e3 Merge pull request #2255 from opendatalab/master
master->dev
2025-04-16 18:12:06 +08:00
myhloli
a2b07bfde4 Update version.py with new version 2025-04-16 10:02:13 +00:00
4 changed files with 17 additions and 25 deletions

View File

@@ -36,7 +36,7 @@ RUN /bin/bash -c "wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/m
source /opt/mineru_venv/bin/activate && \
pip3 install --upgrade pip -i https://mirrors.aliyun.com/pypi/simple && \
pip3 install torch==2.3.1 torchvision==0.18.1 -i https://mirrors.aliyun.com/pypi/simple && \
pip3 install -U magic-pdf[full] -i https://mirrors.aliyun.com/pypi/simple && \
pip3 install -U magic-pdf[full] 'numpy<2' decorator attrs absl-py cloudpickle ml-dtypes tornado einops -i https://mirrors.aliyun.com/pypi/simple && \
wget https://gitee.com/ascend/pytorch/releases/download/v6.0.rc2-pytorch2.3.1/torch_npu-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl && \
pip3 install torch_npu-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl"

View File

@@ -1 +1 @@
__version__ = "1.3.3"
__version__ = "1.3.4"

View File

@@ -4,6 +4,8 @@ import platform
from pathlib import Path
import shutil
from loguru import logger
class ConvertToPdfError(Exception):
def __init__(self, msg):
@@ -11,35 +13,24 @@ class ConvertToPdfError(Exception):
super().__init__(self.msg)
# Chinese font list
REQUIRED_CHS_FONTS = ['SimSun', 'Microsoft YaHei', 'Noto Sans CJK SC']
def check_fonts_installed():
"""Check if required Chinese fonts are installed."""
system_type = platform.system()
if system_type == 'Windows':
# Windows: check fonts via registry or system font folder
font_dir = Path("C:/Windows/Fonts")
installed_fonts = [f.name for f in font_dir.glob("*.ttf")]
if any(font for font in REQUIRED_CHS_FONTS if any(font in f for f in installed_fonts)):
return True
raise EnvironmentError(
f"Missing Chinese font. Please install at least one of: {', '.join(REQUIRED_CHS_FONTS)}"
)
if system_type in ['Windows', 'Darwin']:
pass
else:
# Linux/macOS: use fc-list
# Linux: use fc-list
try:
output = subprocess.check_output(['fc-list', ':lang=zh'], encoding='utf-8')
for font in REQUIRED_CHS_FONTS:
if font in output:
return True
raise EnvironmentError(
f"Missing Chinese font. Please install at least one of: {', '.join(REQUIRED_CHS_FONTS)}"
)
except Exception as e:
raise EnvironmentError(f"Font detection failed. Please install 'fontconfig' and fonts: {str(e)}")
if output.strip(): # 只要有任何输出(非空)
return True
else:
logger.warning(
f"No Chinese fonts were detected, the converted document may not display Chinese content properly."
)
except Exception:
pass
def get_soffice_command():

View File

@@ -1,4 +1,5 @@
import unittest
import os
from PIL import Image
from lxml import etree
@@ -8,7 +9,7 @@ from magic_pdf.model.sub_modules.table.rapidtable.rapid_table import RapidTableM
class TestppTableModel(unittest.TestCase):
def test_image2html(self):
img = Image.open("assets/table.jpg")
img = Image.open(os.path.join(os.path.dirname(__file__), "assets/table.jpg"))
atom_model_manager = AtomModelSingleton()
ocr_engine = atom_model_manager.get_atom_model(
atom_model_name='ocr',