test: update test.pdf

This commit is contained in:
Sidney233
2025-08-28 12:07:11 +08:00
parent 43e5b8da0e
commit 98a7d66d28
2 changed files with 17 additions and 17 deletions

Binary file not shown.

View File

@@ -72,7 +72,7 @@ def test_pipeline_with_two_config():
res_json_path = (
Path(__file__).parent / "output" / "test" / "txt" / "test_content_list.json"
).as_posix()
assert_content(res_json_path)
assert_content(res_json_path, parse_method="txt")
infer_results, all_image_lists, all_pdf_docs, lang_list, ocr_enabled_list = (
pipeline_doc_analyze(
pdf_bytes_list,
@@ -93,7 +93,7 @@ def test_pipeline_with_two_config():
res_json_path = (
Path(__file__).parent / "output" / "test" / "ocr" / "test_content_list.json"
).as_posix()
assert_content(res_json_path)
assert_content(res_json_path, parse_method="ocr")
def test_vlm_transformers_with_default_config():
@@ -164,7 +164,7 @@ def test_vlm_transformers_with_default_config():
res_json_path = (
Path(__file__).parent / "output" / "test" / "vlm" / "test_content_list.json"
).as_posix()
assert_content(res_json_path)
assert_content(res_json_path, parse_method="vlm")
def write_infer_result(
@@ -238,7 +238,7 @@ def validate_html(html_content):
return False
def assert_content(content_path):
def assert_content(content_path, parse_method="txt"):
content_list = []
with open(content_path, "r", encoding="utf-8") as file:
content_list = json.load(file)
@@ -267,19 +267,15 @@ def assert_content(content_path):
)
assert validate_html(content_dict["table_body"])
target_str_list = [
"Linear Regression",
"Model",
"Testing",
"Error",
"Linear",
"Regression",
"0.98740",
"1321.2",
"2-order Polynomial",
"0.99906",
"26.4",
"3-order Polynomial",
"0.99913",
"101.2",
"4-order Polynomial",
"0.99914",
"94.1",
"Gray Prediction",
"Gray",
"Prediction",
"0.00617",
"687",
]
@@ -287,8 +283,12 @@ def assert_content(content_path):
for target_str in target_str_list:
if target_str in content_dict["table_body"]:
correct_count += 1
assert correct_count > 0.9 * len(target_str_list)
if parse_method == "txt" or parse_method == "ocr":
assert correct_count > 0.9 * len(target_str_list)
elif parse_method == "vlm":
assert correct_count > 0.7 * len(target_str_list)
else:
assert False
# 公式校验,检测是否含有公式元素
case "equation":
type_set.add("equation")