mirror of
https://github.com/opendatalab/MinerU.git
synced 2026-03-27 11:08:32 +07:00
test: update test.pdf
This commit is contained in:
Binary file not shown.
@@ -72,7 +72,7 @@ def test_pipeline_with_two_config():
|
||||
res_json_path = (
|
||||
Path(__file__).parent / "output" / "test" / "txt" / "test_content_list.json"
|
||||
).as_posix()
|
||||
assert_content(res_json_path)
|
||||
assert_content(res_json_path, parse_method="txt")
|
||||
infer_results, all_image_lists, all_pdf_docs, lang_list, ocr_enabled_list = (
|
||||
pipeline_doc_analyze(
|
||||
pdf_bytes_list,
|
||||
@@ -93,7 +93,7 @@ def test_pipeline_with_two_config():
|
||||
res_json_path = (
|
||||
Path(__file__).parent / "output" / "test" / "ocr" / "test_content_list.json"
|
||||
).as_posix()
|
||||
assert_content(res_json_path)
|
||||
assert_content(res_json_path, parse_method="ocr")
|
||||
|
||||
|
||||
def test_vlm_transformers_with_default_config():
|
||||
@@ -164,7 +164,7 @@ def test_vlm_transformers_with_default_config():
|
||||
res_json_path = (
|
||||
Path(__file__).parent / "output" / "test" / "vlm" / "test_content_list.json"
|
||||
).as_posix()
|
||||
assert_content(res_json_path)
|
||||
assert_content(res_json_path, parse_method="vlm")
|
||||
|
||||
|
||||
def write_infer_result(
|
||||
@@ -238,7 +238,7 @@ def validate_html(html_content):
|
||||
return False
|
||||
|
||||
|
||||
def assert_content(content_path):
|
||||
def assert_content(content_path, parse_method="txt"):
|
||||
content_list = []
|
||||
with open(content_path, "r", encoding="utf-8") as file:
|
||||
content_list = json.load(file)
|
||||
@@ -267,19 +267,15 @@ def assert_content(content_path):
|
||||
)
|
||||
assert validate_html(content_dict["table_body"])
|
||||
target_str_list = [
|
||||
"Linear Regression",
|
||||
"Model",
|
||||
"Testing",
|
||||
"Error",
|
||||
"Linear",
|
||||
"Regression",
|
||||
"0.98740",
|
||||
"1321.2",
|
||||
"2-order Polynomial",
|
||||
"0.99906",
|
||||
"26.4",
|
||||
"3-order Polynomial",
|
||||
"0.99913",
|
||||
"101.2",
|
||||
"4-order Polynomial",
|
||||
"0.99914",
|
||||
"94.1",
|
||||
"Gray Prediction",
|
||||
"Gray",
|
||||
"Prediction",
|
||||
"0.00617",
|
||||
"687",
|
||||
]
|
||||
@@ -287,8 +283,12 @@ def assert_content(content_path):
|
||||
for target_str in target_str_list:
|
||||
if target_str in content_dict["table_body"]:
|
||||
correct_count += 1
|
||||
|
||||
assert correct_count > 0.9 * len(target_str_list)
|
||||
if parse_method == "txt" or parse_method == "ocr":
|
||||
assert correct_count > 0.9 * len(target_str_list)
|
||||
elif parse_method == "vlm":
|
||||
assert correct_count > 0.7 * len(target_str_list)
|
||||
else:
|
||||
assert False
|
||||
# 公式校验,检测是否含有公式元素
|
||||
case "equation":
|
||||
type_set.add("equation")
|
||||
|
||||
Reference in New Issue
Block a user