mirror of
https://github.com/opendatalab/MinerU.git
synced 2026-03-27 02:58:54 +07:00
feat: add magic-pdf-dev case
This commit is contained in:
3
.github/workflows/cli.yml
vendored
3
.github/workflows/cli.yml
vendored
@@ -37,7 +37,8 @@ jobs:
|
||||
run: |
|
||||
echo $GITHUB_WORKSPACE && sh tests/retry_env.sh
|
||||
- name: unit test
|
||||
run: |
|
||||
run: |
|
||||
cd $GITHUB_WORKSPACE && python tests/clean_coverage.py
|
||||
cd $GITHUB_WORKSPACE && export PYTHONPATH=. && coverage run -m pytest tests/test_unit.py --cov=magic_pdf/ --cov-report term-missing --cov-report html
|
||||
cd $GITHUB_WORKSPACE && python tests/get_coverage.py
|
||||
- name: cli test
|
||||
|
||||
BIN
magic_pdf/__pycache__/__init__.cpython-39.pyc
Normal file
BIN
magic_pdf/__pycache__/__init__.cpython-39.pyc
Normal file
Binary file not shown.
BIN
magic_pdf/libs/__pycache__/__init__.cpython-39.pyc
Normal file
BIN
magic_pdf/libs/__pycache__/__init__.cpython-39.pyc
Normal file
Binary file not shown.
BIN
magic_pdf/libs/__pycache__/version.cpython-39.pyc
Normal file
BIN
magic_pdf/libs/__pycache__/version.cpython-39.pyc
Normal file
Binary file not shown.
24
tests/clean_covrage.py
Normal file
24
tests/clean_covrage.py
Normal file
@@ -0,0 +1,24 @@
|
||||
"""
|
||||
clean coverage
|
||||
"""
|
||||
import os
|
||||
import shutil
|
||||
|
||||
def delete_file(path):
|
||||
"""delete file."""
|
||||
if not os.path.exists(path):
|
||||
if os.path.isfile(path):
|
||||
try:
|
||||
os.remove(path)
|
||||
print(f"File '{path}' deleted.")
|
||||
except TypeError as e:
|
||||
print(f"Error deleting file '{path}': {e}")
|
||||
elif os.path.isdir(path):
|
||||
try:
|
||||
shutil.rmtree(path)
|
||||
print(f"Directory '{path}' and its contents deleted.")
|
||||
except TypeError as e:
|
||||
print(f"Error deleting directory '{path}': {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
delete_file("htmlcov")
|
||||
@@ -2,7 +2,7 @@
|
||||
get cov
|
||||
"""
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
import shutil
|
||||
def get_covrage():
|
||||
"""get covrage"""
|
||||
# 发送请求获取网页内容
|
||||
|
||||
1472
tests/test_cli/pdf_dev/test_model.json
Normal file
1472
tests/test_cli/pdf_dev/test_model.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -182,7 +182,7 @@ class TestCli:
|
||||
def test_pdf_dev_cli_local_jsonl_txt(self):
|
||||
"""magic_pdf_dev cli local txt."""
|
||||
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
|
||||
cmd = 'magic-pdf-dev --jsonl %s -m %s' % (jsonl_path, "txt")
|
||||
cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, "txt")
|
||||
logging.info(cmd)
|
||||
os.system(cmd)
|
||||
|
||||
@@ -191,7 +191,7 @@ class TestCli:
|
||||
def test_pdf_dev_cli_local_jsonl_ocr(self):
|
||||
"""magic_pdf_dev cli local ocr."""
|
||||
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
|
||||
cmd = 'magic-pdf-dev --jsonl %s -m %s' % (jsonl_path, 'ocr')
|
||||
cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, 'ocr')
|
||||
logging.info(cmd)
|
||||
os.system(cmd)
|
||||
|
||||
@@ -199,7 +199,7 @@ class TestCli:
|
||||
def test_pdf_dev_cli_local_jsonl_auto(self):
|
||||
"""magic_pdf_dev cli local auto."""
|
||||
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
|
||||
cmd = 'magic-pdf-dev --jsonl %s -m %s' % (jsonl_path, 'auto')
|
||||
cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, 'auto')
|
||||
logging.info(cmd)
|
||||
os.system(cmd)
|
||||
|
||||
@@ -207,7 +207,7 @@ class TestCli:
|
||||
def test_pdf_dev_cli_s3_jsonl_txt(self):
|
||||
"""magic_pdf_dev cli s3 txt."""
|
||||
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
|
||||
cmd = 'magic-pdf-dev --jsonl %s -m %s' % (jsonl_path, "txt")
|
||||
cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, "txt")
|
||||
logging.info(cmd)
|
||||
os.system(cmd)
|
||||
|
||||
@@ -216,7 +216,7 @@ class TestCli:
|
||||
def test_pdf_dev_cli_s3_jsonl_ocr(self):
|
||||
"""magic_pdf_dev cli s3 ocr."""
|
||||
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
|
||||
cmd = 'magic-pdf-dev --jsonl %s -m %s' % (jsonl_path, 'ocr')
|
||||
cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, 'ocr')
|
||||
logging.info(cmd)
|
||||
os.system(cmd)
|
||||
|
||||
@@ -229,5 +229,26 @@ class TestCli:
|
||||
os.system(cmd)
|
||||
|
||||
|
||||
@pytest.mark.P1
|
||||
def test_pdf_dev_cli_pdf_json_auto(self):
|
||||
"""magic_pdf_dev cli pdf+json auto."""
|
||||
json_path = os.path.join(pdf_dev_path, 'test_model.json')
|
||||
pdf_path = os.path.join(pdf_dev_path, 'pdf', 'research_report_1f978cd81fb7260c8f7644039ec2c054.pdf')
|
||||
cmd = 'magic-pdf-dev --pdf %s --json %s --method %s' % (pdf_path, json_path, 'auto')
|
||||
logging.info(cmd)
|
||||
os.system(cmd)
|
||||
|
||||
@pytest.mark.P1
|
||||
def test_pdf_dev_cli_pdf_json_ocr(self):
|
||||
"""magic_pdf_dev cli pdf+json ocr."""
|
||||
json_path = os.path.join(pdf_dev_path, 'test_model.json')
|
||||
pdf_path = os.path.join(pdf_dev_path, 'pdf', 'research_report_1f978cd81fb7260c8f7644039ec2c054.pdf')
|
||||
cmd = 'magic-pdf-dev --pdf %s --json %s --method %s' % (pdf_path, json_path, 'auto')
|
||||
logging.info(cmd)
|
||||
os.system(cmd)
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main()
|
||||
|
||||
Binary file not shown.
Reference in New Issue
Block a user