diff --git a/.github/workflows/huigui.yml b/.github/workflows/huigui.yml index 95ceffc8..4d2f5a25 100644 --- a/.github/workflows/huigui.yml +++ b/.github/workflows/huigui.yml @@ -12,7 +12,7 @@ on: - "**.md" jobs: cli-test: - if: github.repository == 'opendatalab/MinerU' + # if: github.repository == 'opendatalab/MinerU' runs-on: pdf timeout-minutes: 240 strategy: @@ -30,32 +30,32 @@ jobs: conda env list pip show coverage cd $GITHUB_WORKSPACE && sh tests/retry_env.sh - # cd $GITHUB_WORKSPACE && python tests/clean_coverage.py - # cd $GITHUB_WORKSPACE && coverage run -m pytest tests/unittest/ --cov=magic_pdf/ --cov-report html --cov-report term-missing - # cd $GITHUB_WORKSPACE && python tests/get_coverage.py + cd $GITHUB_WORKSPACE && python tests/clean_coverage.py + cd $GITHUB_WORKSPACE && coverage run + cd $GITHUB_WORKSPACE && python tests/get_coverage.py cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_cli_sdk.py - notify_to_feishu: - if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure')}} - needs: cli-test - runs-on: pdf - steps: - - name: get_actor - run: | - metion_list="dt-yy" - echo $GITHUB_ACTOR - if [[ $GITHUB_ACTOR == "drunkpig" ]]; then - metion_list="xuchao" - elif [[ $GITHUB_ACTOR == "myhloli" ]]; then - metion_list="zhaoxiaomeng" - elif [[ $GITHUB_ACTOR == "icecraft" ]]; then - metion_list="xurui1" - fi - echo $metion_list - echo "METIONS=$metion_list" >> "$GITHUB_ENV" - echo ${{ env.METIONS }} - - - name: notify - run: | - #echo ${{ secrets.USER_ID }} - curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"'${{ github.repository }}' GitHubAction Failed","content":[[{"tag":"text","text":""},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'$USER_ID'"}]]}}}}' $WEBHOOK_URL +# notify_to_feishu: +# if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure')}} +# needs: cli-test +# runs-on: pdf +# steps: +# - name: get_actor +# run: | +# metion_list="dt-yy" +# echo $GITHUB_ACTOR +# if [[ $GITHUB_ACTOR == "drunkpig" ]]; then +# metion_list="xuchao" +# elif [[ $GITHUB_ACTOR == "myhloli" ]]; then +# metion_list="zhaoxiaomeng" +# elif [[ $GITHUB_ACTOR == "icecraft" ]]; then +# metion_list="xurui1" +# fi +# echo $metion_list +# echo "METIONS=$metion_list" >> "$GITHUB_ENV" +# echo ${{ env.METIONS }} +# +# - name: notify +# run: | +# #echo ${{ secrets.USER_ID }} +# curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"'${{ github.repository }}' GitHubAction Failed","content":[[{"tag":"text","text":""},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'$USER_ID'"}]]}}}}' $WEBHOOK_URL diff --git a/pyproject.toml b/pyproject.toml index 17444785..e1cc7d45 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "mineru" dynamic = ["version"] -license = {text = "AGPL-3.0"} +license = { text = "AGPL-3.0" } description = "A practical tool for converting PDF to Markdown" readme = "README.md" requires-python = ">=3.10,<3.14" @@ -38,6 +38,12 @@ dependencies = [ ] [project.optional-dependencies] +test = [ + "mineru[core]", + "pytest", + "pytest-cov", + "beautifulsoup4" +] vlm = [ "transformers>=4.51.1", "torch>=2.6.0", @@ -112,7 +118,7 @@ mineru-api = "mineru.cli.fast_api:main" mineru-gradio = "mineru.cli.gradio_app:main" [tool.setuptools.dynamic] -version = {attr = "mineru.version.__version__"} +version = { attr = "mineru.version.__version__" } [tool.setuptools.packages.find] include = ["mineru*"] @@ -125,3 +131,35 @@ namespaces = false [tool.setuptools] include-package-data = true zip-safe = false + +[tool.pytest.ini_options] +addopts = "-s --cov=mineru --cov-report html" + +[tool.coverage.run] +command_line = "-m pytest tests/unittest/test_e2e.py" +source = ["mineru/"] +omit = [ + "*/vlm_sglang_model/*", + "*/gradio_app.py", + "*/models_download.py", + "*/fast_api.py", + "*/cli/client.py", + "*/sglang_engine_predictor.py", + "*/vlm_sglang_server.py", + "*/cli_parser.py", + "*/run_async.py" +] +[tool.coverage.html] +exclude_also = [ + 'def __repr__', + 'if self.debug:', + 'if settings.DEBUG', + 'raise AssertionError', + 'raise NotImplementedError', + 'if 0:', + 'if __name__ == .__main__.:', + 'if TYPE_CHECKING:', + 'class .*\bProtocol\):', + '@(abc\.)?abstractmethod', +] +directory = "tests/htmelcov" \ No newline at end of file diff --git a/tests/retry_env.sh b/tests/retry_env.sh deleted file mode 100644 index 0fbecbd3..00000000 --- a/tests/retry_env.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash - -max_retries=5 -retry_count=0 - -while true; do - # prepare env - #python -m pip install -r requirements-qa.txt - #python -m pip install -U magic-pdf[full] --extra-index-url https://wheels.myhloli.com -i https://mirrors.aliyun.com/pypi/simple - pip install -e . - python -m pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/ - pip install modelscope - wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/scripts/download_models.py -O download_models.py - python download_models.py - exit_code=$? - if [ $exit_code -eq 0 ]; then - echo "test.sh 成功执行!" - break - else - let retry_count+=1 - if [ $retry_count -ge $max_retries ]; then - echo "达到最大重试次数 ($max_retries),放弃重试。" - exit 1 - fi - echo "test.sh 执行失败 (退出码: $exit_code)。尝试第 $retry_count 次重试..." - sleep 5 - fi -done diff --git a/tests/test_cli/conf/__init__py b/tests/test_cli/conf/__init__py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/test_cli/conf/conf.py b/tests/test_cli/conf/conf.py deleted file mode 100644 index 628a1088..00000000 --- a/tests/test_cli/conf/conf.py +++ /dev/null @@ -1,10 +0,0 @@ -import os -conf = { -"code_path": os.environ.get('GITHUB_WORKSPACE'), -"pdf_dev_path" : os.environ.get('GITHUB_WORKSPACE') + "/tests/test_cli/pdf_dev", -#"code_path": "/home/quyuan/ci/actions-runner/MinerU", -#"pdf_dev_path": "/home/quyuan/ci/actions-runner/MinerU/tests/test_cli/pdf_dev", -"pdf_res_path": "/tmp/magic-pdf", -"jsonl_path": "s3://llm-qatest-pnorm/mineru/test/line1.jsonl", -"s3_pdf_path": "s3://llm-qatest-pnorm/mineru/test/test_rearch_report.pdf" -} diff --git a/tests/test_cli/conftest.py b/tests/test_cli/conftest.py deleted file mode 100644 index 373b5867..00000000 --- a/tests/test_cli/conftest.py +++ /dev/null @@ -1,10 +0,0 @@ -import pytest -import torch - -def clear_gpu_memory(): - ''' - clear GPU memory - ''' - torch.cuda.empty_cache() - print("GPU memory cleared.") - diff --git a/tests/test_cli/lib/__init__.py b/tests/test_cli/lib/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/test_cli/lib/calculate_score.py b/tests/test_cli/lib/calculate_score.py deleted file mode 100644 index a7c140d6..00000000 --- a/tests/test_cli/lib/calculate_score.py +++ /dev/null @@ -1,116 +0,0 @@ -""" -calculate_score -""" -import os -import re -import json -from Levenshtein import distance -from lib import scoring -from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction -from nltk.tokenize import word_tokenize -import nltk -nltk.download('punkt') - -class Scoring: - """ - calculate_score - """ - def __init__(self, result_path): - """ - init - """ - self.edit_distances = [] - self.bleu_scores = [] - self.sim_scores = [] - self.filenames = [] - self.score_dict = {} - self.anntion_cnt = 0 - self.fw = open(result_path, "w+", encoding='utf-8') - - def simple_bleu_score(self, candidate, reference): - """ - get bleu score - """ - candidate_tokens = word_tokenize(candidate) - reference_tokens = word_tokenize(reference) - return sentence_bleu([reference_tokens], candidate_tokens, smoothing_function=SmoothingFunction().method1) - - - def preprocess_string(self, s): - """ - preprocess_string - """ - sub_enter = re.sub(r'\n+', '\n', s) - return re.sub(r' ', ' ', sub_enter) - - def calculate_similarity(self, annotion, actual, tool_type): - """ - calculate_similarity - """ - class_dict = {} - edit_distances = [] - bleu_scores = [] - sim_scores = list() - total_file = 0 - for filename in os.listdir(annotion): - if filename.endswith('.md') and not filename.startswith('.'): - total_file = total_file + 1 - with open(os.path.join(annotion, filename), 'r', encoding='utf-8') as file_a: - content_a = file_a.read() - self.anntion_cnt = self.anntion_cnt + 1 - filepath_b = os.path.join(actual, filename) - if os.path.exists(filepath_b): - with open(filepath_b, 'r', encoding='utf-8') as file_b: - content_b = file_b.read() - self.filenames.append(filename) - edit_dist = distance(self.preprocess_string(content_b),self.preprocess_string(content_a)) / max(len(content_a), len(content_b)) - self.edit_distances.append(edit_dist) - edit_distances.append(edit_dist) - bleu_score = self.simple_bleu_score(content_b, content_a) - bleu_scores.append(bleu_score) - self.bleu_scores.append(bleu_score) - score = scoring.score_text(content_b, content_a) - sim_scores.append(score) - self.sim_scores.append(score) - class_dict[filename] = {"edit_dist": edit_dist, "bleu_score": bleu_score, "sim_score": score} - self.score_dict[filename] = {"edit_dist": edit_dist, "bleu_score": bleu_score, "sim_score": score} - else: - print(f"File {filename} not found in actual directory.") - class_average_edit_distance = sum(edit_distances) / len(edit_distances) if edit_distances else 0 - class_average_bleu_score = sum(bleu_scores) / len(bleu_scores) if bleu_scores else 0 - class_average_sim_score = sum(sim_scores) / len(sim_scores) if sim_scores else 0 - self.fw.write(json.dumps(class_dict, ensure_ascii=False) + "\n") - ratio = len(class_dict)/total_file - self.fw.write(f"{tool_type} extract ratio: {ratio}" + "\n") - self.fw.write(f"{tool_type} Average Levenshtein Distance: {class_average_edit_distance}" + "\n") - self.fw.write(f"{tool_type} Average BLEU Score: {class_average_bleu_score}" + "\n") - self.fw.write(f"{tool_type} Average Sim Score: {class_average_sim_score}" + "\n") - print (f"{tool_type} extract ratio: {ratio}") - print (f"{tool_type} Average Levenshtein Distance: {class_average_edit_distance}") - print (f"{tool_type} Average BLEU Score: {class_average_bleu_score}") - print (f"{tool_type} Average Sim Score: {class_average_sim_score}") - return self.score_dict - - def summary_scores(self): - """ - calculate the average of edit distance, bleu score and sim score - """ - over_all_dict = dict() - average_edit_distance = sum(self.edit_distances) / len(self.edit_distances) if self.edit_distances else 0 - average_bleu_score = sum(self.bleu_scores) / len(self.bleu_scores) if self.bleu_scores else 0 - average_sim_score = sum(self.sim_scores) / len(self.sim_scores) if self.sim_scores else 0 - over_all_dict["average_edit_distance"] = average_edit_distance - over_all_dict["average_bleu_score"] = average_bleu_score - over_all_dict["average_sim_score"] = average_sim_score - self.fw.write(json.dumps(over_all_dict, ensure_ascii=False) + "\n") - return over_all_dict - - def calculate_similarity_total(self, tool_type, download_dir): - """ - calculate the average of edit distance, bleu score and sim score - """ - annotion = os.path.join(download_dir, "annotations", "cleaned") - actual = os.path.join(download_dir, tool_type, "cleaned") - score = self.calculate_similarity(annotion, actual, tool_type) - return score - diff --git a/tests/test_cli/lib/common.py b/tests/test_cli/lib/common.py deleted file mode 100644 index 439acf1a..00000000 --- a/tests/test_cli/lib/common.py +++ /dev/null @@ -1,90 +0,0 @@ -"""common definitions.""" -import os -import shutil -import re -import json -import torch - -def clear_gpu_memory(): - ''' - clear GPU memory - ''' - torch.cuda.empty_cache() - print("GPU memory cleared.") - -def check_shell(cmd): - """shell successful.""" - res = os.system(cmd) - assert res == 0 - -def update_config_file(file_path, key, value): - """update config file.""" - with open(file_path, 'r', encoding="utf-8") as fr: - config = json.loads(fr.read()) - config[key] = value - # 保存修改后的内容 - with open(file_path, 'w', encoding='utf-8') as fw: - json.dump(config, fw, ensure_ascii=False, indent=4) - -def cli_count_folders_and_check_contents(file_path): - """" count cli files.""" - if os.path.exists(file_path): - for files in os.listdir(file_path): - folder_count = os.path.getsize(os.path.join(file_path, files)) - assert folder_count > 0 - assert len(os.listdir(file_path)) > 5 - -def sdk_count_folders_and_check_contents(file_path): - """count folders.""" - if os.path.exists(file_path): - file_count = os.path.getsize(file_path) - assert file_count > 0 - else: - exit(1) - - - -def delete_file(path): - """delete file.""" - if not os.path.exists(path): - if os.path.isfile(path): - try: - os.remove(path) - print(f"File '{path}' deleted.") - except TypeError as e: - print(f"Error deleting file '{path}': {e}") - elif os.path.isdir(path): - try: - shutil.rmtree(path) - print(f"Directory '{path}' and its contents deleted.") - except TypeError as e: - print(f"Error deleting directory '{path}': {e}") - -def check_latex_table_exists(file_path): - """check latex table exists.""" - pattern = r'\\begin\{tabular\}.*?\\end\{tabular\}' - with open(file_path, 'r', encoding='utf-8') as file: - content = file.read() - matches = re.findall(pattern, content, re.DOTALL) - return len(matches) > 0 - -def check_html_table_exists(file_path): - """check html table exists.""" - pattern = r'
| element" - - # 检查具体的表格内容 - headers = tree.xpath('//table/tr[1]/td') - assert len(headers) == 5, "Thead should have 5 columns" - assert headers[0].text and headers[0].text.strip() == "Methods", "First header should be 'Methods'" - assert headers[1].text and headers[1].text.strip() == "R", "Second header should be 'R'" - assert headers[2].text and headers[2].text.strip() == "P", "Third header should be 'P'" - assert headers[3].text and headers[3].text.strip() == "F", "Fourth header should be 'F'" - assert headers[4].text and headers[4].text.strip() == "FPS", "Fifth header should be 'FPS'" - - # 检查第一行数据 - first_row = tree.xpath('//table/tr[2]/td') - assert len(first_row) == 5, "First row should have 5 cells" - assert first_row[0].text and 'SegLink' in first_row[0].text.strip(), "First cell should be 'SegLink [26]'" - assert first_row[1].text and first_row[1].text.strip() == "70.0", "Second cell should be '70.0'" - assert first_row[2].text and first_row[2].text.strip() == "86.0", "Third cell should be '86.0'" - assert first_row[3].text and first_row[3].text.strip() == "77.0", "Fourth cell should be '77.0'" - assert first_row[4].text and first_row[4].text.strip() == "8.9", "Fifth cell should be '8.9'" - - # 检查倒数第二行数据 - second_last_row = tree.xpath('//table/tr[position()=last()-1]/td') - assert len(second_last_row) == 5, "second_last_row should have 5 cells" - assert second_last_row[0].text and second_last_row[0].text.strip() == "Ours (SynText)", "First cell should be 'Ours (SynText)'" - assert second_last_row[1].text and second_last_row[1].text.strip() == "80.68", "Second cell should be '80.68'" - assert second_last_row[2].text and second_last_row[2].text.strip() == "85.40", "Third cell should be '85.40'" - # assert second_last_row[3].text and second_last_row[3].text.strip() == "82.97", "Fourth cell should be '82.97'" - # assert second_last_row[3].text and second_last_row[4].text.strip() == "12.68", "Fifth cell should be '12.68'" - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unittest/test_tools/__init__.py b/tests/unittest/test_tools/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/unittest/test_tools/assets/cli/path/cli_test_01.pdf b/tests/unittest/test_tools/assets/cli/path/cli_test_01.pdf deleted file mode 100644 index 229be9ce..00000000 Binary files a/tests/unittest/test_tools/assets/cli/path/cli_test_01.pdf and /dev/null differ diff --git a/tests/unittest/test_tools/assets/cli/path/cli_test_02.pdf b/tests/unittest/test_tools/assets/cli/path/cli_test_02.pdf deleted file mode 100644 index 1adcc01c..00000000 Binary files a/tests/unittest/test_tools/assets/cli/path/cli_test_02.pdf and /dev/null differ diff --git a/tests/unittest/test_tools/assets/cli/pdf/cli_test_01.pdf b/tests/unittest/test_tools/assets/cli/pdf/cli_test_01.pdf deleted file mode 100644 index 229be9ce..00000000 Binary files a/tests/unittest/test_tools/assets/cli/pdf/cli_test_01.pdf and /dev/null differ diff --git a/tests/unittest/test_tools/assets/cli_dev/cli_test_01.jsonl b/tests/unittest/test_tools/assets/cli_dev/cli_test_01.jsonl deleted file mode 100644 index 9bcbbe88..00000000 --- a/tests/unittest/test_tools/assets/cli_dev/cli_test_01.jsonl +++ /dev/null @@ -1 +0,0 @@ -{"file_location":"tests/unittest/test_tools/assets/cli_dev/cli_test_01.pdf","doc_layout_result":[{"layout_dets":[{"category_id":1,"poly":[882.4013061523438,169.93817138671875,1552.350341796875,169.93817138671875,1552.350341796875,625.8263549804688,882.4013061523438,625.8263549804688],"score":0.999992311000824},{"category_id":1,"poly":[882.474853515625,1450.92822265625,1551.4490966796875,1450.92822265625,1551.4490966796875,1877.5712890625,882.474853515625,1877.5712890625],"score":0.9999903440475464},{"category_id":1,"poly":[881.6513061523438,626.2058715820312,1552.1400146484375,626.2058715820312,1552.1400146484375,1450.604736328125,881.6513061523438,1450.604736328125],"score":0.9999856352806091},{"category_id":1,"poly":[149.41075134277344,232.1595001220703,819.0465087890625,232.1595001220703,819.0465087890625,625.8865356445312,149.41075134277344,625.8865356445312],"score":0.99998539686203},{"category_id":1,"poly":[149.3945770263672,1215.5172119140625,817.8850708007812,1215.5172119140625,817.8850708007812,1304.873291015625,149.3945770263672,1304.873291015625],"score":0.9999765157699585},{"category_id":1,"poly":[882.6979370117188,1880.13916015625,1552.15185546875,1880.13916015625,1552.15185546875,2031.339599609375,882.6979370117188,2031.339599609375],"score":0.9999744892120361},{"category_id":1,"poly":[148.96054077148438,743.3055419921875,818.6231689453125,743.3055419921875,818.6231689453125,1074.2369384765625,148.96054077148438,1074.2369384765625],"score":0.9999669790267944},{"category_id":1,"poly":[148.8435516357422,1791.14306640625,818.6885375976562,1791.14306640625,818.6885375976562,2030.794189453125,148.8435516357422,2030.794189453125],"score":0.9999618530273438},{"category_id":0,"poly":[150.7009735107422,684.0087890625,623.5106201171875,684.0087890625,623.5106201171875,717.03662109375,150.7009735107422,717.03662109375],"score":0.9999415278434753},{"category_id":8,"poly":[146.48068237304688,1331.6737060546875,317.2640075683594,1331.6737060546875,317.2640075683594,1400.1722412109375,146.48068237304688,1400.1722412109375],"score":0.9998958110809326},{"category_id":1,"poly":[149.42420959472656,1430.8782958984375,818.9042358398438,1430.8782958984375,818.9042358398438,1672.7386474609375,149.42420959472656,1672.7386474609375],"score":0.9998599290847778},{"category_id":1,"poly":[149.18746948242188,172.10252380371094,818.5662231445312,172.10252380371094,818.5662231445312,230.4594268798828,149.18746948242188,230.4594268798828],"score":0.9997718334197998},{"category_id":0,"poly":[149.0175018310547,1732.1090087890625,702.1005859375,1732.1090087890625,702.1005859375,1763.6046142578125,149.0175018310547,1763.6046142578125],"score":0.9997085928916931},{"category_id":2,"poly":[1519.802490234375,98.59099578857422,1551.985107421875,98.59099578857422,1551.985107421875,119.48420715332031,1519.802490234375,119.48420715332031],"score":0.9995552897453308},{"category_id":8,"poly":[146.9109649658203,1100.156494140625,544.2803344726562,1100.156494140625,544.2803344726562,1184.929443359375,146.9109649658203,1184.929443359375],"score":0.9995207786560059},{"category_id":2,"poly":[148.11611938476562,99.87767791748047,318.926025390625,99.87767791748047,318.926025390625,120.70393371582031,148.11611938476562,120.70393371582031],"score":0.999351441860199},{"category_id":9,"poly":[791.7642211914062,1130.056396484375,818.6940307617188,1130.056396484375,818.6940307617188,1161.1080322265625,791.7642211914062,1161.1080322265625],"score":0.9908884763717651},{"category_id":9,"poly":[788.37060546875,1346.8450927734375,818.5010986328125,1346.8450927734375,818.5010986328125,1377.370361328125,788.37060546875,1377.370361328125],"score":0.9873985052108765},{"category_id":14,"poly":[146,1103,543,1103,543,1184,146,1184],"score":0.94,"latex":"E\\!\\left(W\\right)\\!=\\!\\frac{E\\!\\left[H^{2}\\right]}{2E\\!\\left[H\\right]}\\!=\\!\\frac{E\\!\\left[H\\right]}{2}\\!\\!\\left(1\\!+\\!\\operatorname{CV}\\!\\left(H\\right)^{2}\\right)"},{"category_id":13,"poly":[1196,354,1278,354,1278,384,1196,384],"score":0.91,"latex":"p(1-q)"},{"category_id":13,"poly":[881,415,1020,415,1020,444,881,444],"score":0.91,"latex":"(1-p)(1-q)"},{"category_id":14,"poly":[147,1333,318,1333,318,1400,147,1400],"score":0.91,"latex":"\\mathrm{CV}\\big(H\\big)\\!=\\!\\frac{\\sigma_{_H}}{E\\big[H\\big]}"},{"category_id":13,"poly":[1197,657,1263,657,1263,686,1197,686],"score":0.9,"latex":"(1-p)"},{"category_id":13,"poly":[213,1217,263,1217,263,1244,213,1244],"score":0.88,"latex":"E[X]"},{"category_id":13,"poly":[214,1434,245,1434,245,1459,214,1459],"score":0.87,"latex":"\\upsigma_{H}"},{"category_id":13,"poly":[324,2002,373,2002,373,2028,324,2028],"score":0.84,"latex":"30\\%"},{"category_id":13,"poly":[1209,693,1225,693,1225,717,1209,717],"score":0.83,"latex":"p"},{"category_id":13,"poly":[990,449,1007,449,1007,474,990,474],"score":0.81,"latex":"p"},{"category_id":13,"poly":[346,1277,369,1277,369,1301,346,1301],"score":0.81,"latex":"H"},{"category_id":13,"poly":[1137,661,1154,661,1154,686,1137,686],"score":0.81,"latex":"p"},{"category_id":13,"poly":[522,1432,579,1432,579,1459,522,1459],"score":0.81,"latex":"H\\left(4\\right)"},{"category_id":13,"poly":[944,540,962,540,962,565,944,565],"score":0.8,"latex":"p"},{"category_id":13,"poly":[1444,936,1461,936,1461,961,1444,961],"score":0.79,"latex":"p"},{"category_id":13,"poly":[602,1247,624,1247,624,1270,602,1270],"score":0.78,"latex":"H"},{"category_id":13,"poly":[147,1247,167,1247,167,1271,147,1271],"score":0.77,"latex":"X"},{"category_id":13,"poly":[210,1246,282,1246,282,1274,210,1274],"score":0.77,"latex":"\\mathrm{CV}(H)"},{"category_id":13,"poly":[1346,268,1361,268,1361,292,1346,292],"score":0.76,"latex":"q"},{"category_id":13,"poly":[215,957,238,957,238,981,215,981],"score":0.74,"latex":"H"},{"category_id":13,"poly":[149,956,173,956,173,981,149,981],"score":0.63,"latex":"W"},{"category_id":13,"poly":[924,841,1016,841,1016,868,924,868],"score":0.56,"latex":"8{\\mathrm{:}}00\\;\\mathrm{a.m}"},{"category_id":13,"poly":[956,871,1032,871,1032,898,956,898],"score":0.43,"latex":"20\\ \\mathrm{min}"},{"category_id":13,"poly":[1082,781,1112,781,1112,808,1082,808],"score":0.41,"latex":"(I)"},{"category_id":13,"poly":[697,1821,734,1821,734,1847,697,1847],"score":0.3,"latex":"1\\,\\mathrm{~h~}"},{"category_id":15,"poly":[881.0,174.0,1552.0,174.0,1552.0,204.0,881.0,204.0],"score":1.0,"text":"model. They also found that the empirical distributions of passenger"},{"category_id":15,"poly":[880.0,205.0,1552.0,205.0,1552.0,236.0,880.0,236.0],"score":0.99,"text":"incidence times (by time of day) had peaks just before the respec-"},{"category_id":15,"poly":[880.0,234.0,1553.0,234.0,1553.0,264.0,880.0,264.0],"score":0.99,"text":"tive average bus departure times. They hypothesized the existence"},{"category_id":15,"poly":[881.0,264.0,1345.0,264.0,1345.0,296.0,881.0,296.0],"score":0.98,"text":"of three classes of passengers: with proportion"},{"category_id":15,"poly":[1362.0,264.0,1552.0,264.0,1552.0,296.0,1362.0,296.0],"score":0.95,"text":"passengers whose"},{"category_id":15,"poly":[880.0,295.0,1552.0,295.0,1552.0,325.0,880.0,325.0],"score":1.0,"text":"time of incidence is causally coincident with that of a bus departure"},{"category_id":15,"poly":[880.0,326.0,1555.0,326.0,1555.0,355.0,880.0,355.0],"score":0.99,"text":"(e.g., because they saw the approaching bus from their home or a"},{"category_id":15,"poly":[881.0,356.0,1195.0,356.0,1195.0,388.0,881.0,388.0],"score":0.99,"text":"shop window); with proportion"},{"category_id":15,"poly":[1279.0,356.0,1553.0,356.0,1553.0,388.0,1279.0,388.0],"score":0.99,"text":", passengers who time their"},{"category_id":15,"poly":[882.0,388.0,1552.0,388.0,1552.0,416.0,882.0,416.0],"score":0.99,"text":"arrivals to minimize expected waiting time; and with proportion"},{"category_id":15,"poly":[1021.0,418.0,1553.0,418.0,1553.0,447.0,1021.0,447.0],"score":1.0,"text":", passengers who are randomly incident. The authors"},{"category_id":15,"poly":[881.0,448.0,989.0,448.0,989.0,477.0,881.0,477.0],"score":1.0,"text":"found that"},{"category_id":15,"poly":[1008.0,448.0,1553.0,448.0,1553.0,477.0,1008.0,477.0],"score":1.0,"text":"was positively correlated with the potential reduction"},{"category_id":15,"poly":[880.0,479.0,1552.0,479.0,1552.0,507.0,880.0,507.0],"score":1.0,"text":"in waiting time (compared with arriving randomly) that resulted"},{"category_id":15,"poly":[882.0,510.0,1551.0,510.0,1551.0,536.0,882.0,536.0],"score":0.97,"text":"from knowledge of the timetable and of service reliability. They also"},{"category_id":15,"poly":[881.0,539.0,943.0,539.0,943.0,568.0,881.0,568.0],"score":1.0,"text":"found"},{"category_id":15,"poly":[963.0,539.0,1553.0,539.0,1553.0,568.0,963.0,568.0],"score":0.99,"text":"to be higher in the peak commuting periods rather than in"},{"category_id":15,"poly":[881.0,568.0,1554.0,568.0,1554.0,599.0,881.0,599.0],"score":0.98,"text":"the off-peak periods, indicating more awareness of the timetable or"},{"category_id":15,"poly":[881.0,599.0,1323.0,599.0,1323.0,627.0,881.0,627.0],"score":0.98,"text":"historical reliability, or both, by commuters."},{"category_id":15,"poly":[905.0,1452.0,1551.0,1452.0,1551.0,1483.0,905.0,1483.0],"score":0.99,"text":"Furth and Muller study the issue in a theoretical context and gener-"},{"category_id":15,"poly":[883.0,1485.0,1553.0,1485.0,1553.0,1514.0,883.0,1514.0],"score":1.0,"text":"ally agree with the above findings (2). They are primarily concerned"},{"category_id":15,"poly":[882.0,1513.0,1553.0,1513.0,1553.0,1545.0,882.0,1545.0],"score":0.99,"text":"with the use of data from automatic vehicle-tracking systems to assess"},{"category_id":15,"poly":[880.0,1545.0,1553.0,1545.0,1553.0,1574.0,880.0,1574.0],"score":0.99,"text":"the impacts of reliability on passenger incidence behavior and wait-"},{"category_id":15,"poly":[881.0,1577.0,1551.0,1577.0,1551.0,1606.0,881.0,1606.0],"score":0.98,"text":"ing times. They propose that passengers will react to unreliability by"},{"category_id":15,"poly":[883.0,1608.0,1551.0,1608.0,1551.0,1637.0,883.0,1637.0],"score":1.0,"text":"departing earlier than they would with reliable services. Randomly"},{"category_id":15,"poly":[880.0,1636.0,1554.0,1636.0,1554.0,1669.0,880.0,1669.0],"score":1.0,"text":"incident unaware passengers will experience unreliability as a more"},{"category_id":15,"poly":[882.0,1669.0,1553.0,1669.0,1553.0,1697.0,882.0,1697.0],"score":0.99,"text":"dispersed distribution of headways and simply allocate additional"},{"category_id":15,"poly":[880.0,1699.0,1551.0,1699.0,1551.0,1726.0,880.0,1726.0],"score":0.97,"text":"time to their trip plan to improve the chance of arriving at their des-"},{"category_id":15,"poly":[881.0,1730.0,1551.0,1730.0,1551.0,1759.0,881.0,1759.0],"score":0.98,"text":"tination on time. Aware passengers, whose incidence is not entirely"},{"category_id":15,"poly":[880.0,1760.0,1552.0,1760.0,1552.0,1789.0,880.0,1789.0],"score":0.99,"text":"random, will react by timing their incidence somewhat earlier than"},{"category_id":15,"poly":[882.0,1792.0,1550.0,1792.0,1550.0,1818.0,882.0,1818.0],"score":0.99,"text":"the scheduled departure time to increase their chance of catching the"},{"category_id":15,"poly":[883.0,1823.0,1552.0,1823.0,1552.0,1849.0,883.0,1849.0],"score":0.99,"text":"desired service. The authors characterize these reactions as the costs"},{"category_id":15,"poly":[883.0,1853.0,1031.0,1853.0,1031.0,1880.0,883.0,1880.0],"score":0.95,"text":"of unreliability."},{"category_id":15,"poly":[907.0,630.0,1553.0,630.0,1553.0,658.0,907.0,658.0],"score":1.0,"text":"Bowman and Turnquist built on the concept of aware and unaware"},{"category_id":15,"poly":[881.0,662.0,1136.0,662.0,1136.0,690.0,881.0,690.0],"score":0.99,"text":"passengers of proportions"},{"category_id":15,"poly":[1155.0,662.0,1196.0,662.0,1196.0,690.0,1155.0,690.0],"score":1.0,"text":"and"},{"category_id":15,"poly":[1264.0,662.0,1553.0,662.0,1553.0,690.0,1264.0,690.0],"score":0.99,"text":",respectively. They proposed"},{"category_id":15,"poly":[881.0,692.0,1208.0,692.0,1208.0,719.0,881.0,719.0],"score":0.99,"text":"a utility-based model to estimate"},{"category_id":15,"poly":[1226.0,692.0,1552.0,692.0,1552.0,719.0,1226.0,719.0],"score":1.0,"text":"and the distribution of incidence"},{"category_id":15,"poly":[880.0,721.0,1554.0,721.0,1554.0,751.0,880.0,751.0],"score":0.99,"text":"times, and thus the mean waiting time, of aware passengers over"},{"category_id":15,"poly":[880.0,752.0,1553.0,752.0,1553.0,780.0,880.0,780.0],"score":0.98,"text":"a given headway as a function of the headway and reliability of"},{"category_id":15,"poly":[880.0,782.0,1081.0,782.0,1081.0,812.0,880.0,812.0],"score":0.99,"text":"bus departure times"},{"category_id":15,"poly":[1113.0,782.0,1552.0,782.0,1552.0,812.0,1113.0,812.0],"score":0.99,"text":". They observed seven bus stops in Chicago,"},{"category_id":15,"poly":[882.0,813.0,1553.0,813.0,1553.0,841.0,882.0,841.0],"score":0.98,"text":"Illinois, each served by a single (different) bus route, between 6:00"},{"category_id":15,"poly":[882.0,844.0,923.0,844.0,923.0,871.0,882.0,871.0],"score":1.0,"text":"and"},{"category_id":15,"poly":[1017.0,844.0,1550.0,844.0,1550.0,871.0,1017.0,871.0],"score":0.97,"text":".for 5 to 10 days each. The bus routes had headways"},{"category_id":15,"poly":[882.0,874.0,955.0,874.0,955.0,902.0,882.0,902.0],"score":0.95,"text":"of 5to"},{"category_id":15,"poly":[1033.0,874.0,1553.0,874.0,1553.0,902.0,1033.0,902.0],"score":0.98,"text":"and a range of reliabilities. The authors found that"},{"category_id":15,"poly":[882.0,906.0,1553.0,906.0,1553.0,933.0,882.0,933.0],"score":0.99,"text":"actual average waiting time was substantially less than predicted"},{"category_id":15,"poly":[881.0,935.0,1443.0,935.0,1443.0,963.0,881.0,963.0],"score":1.0,"text":"by the random incidence model. They estimated that"},{"category_id":15,"poly":[1462.0,935.0,1553.0,935.0,1553.0,963.0,1462.0,963.0],"score":0.96,"text":"was not"},{"category_id":15,"poly":[881.0,966.0,1552.0,966.0,1552.0,994.0,881.0,994.0],"score":0.98,"text":"statistically significantly different from 1.0, which they explain by"},{"category_id":15,"poly":[880.0,994.0,1552.0,994.0,1552.0,1025.0,880.0,1025.0],"score":0.99,"text":"the fact that all observations were taken during peak commuting"},{"category_id":15,"poly":[880.0,1027.0,1552.0,1027.0,1552.0,1054.0,880.0,1054.0],"score":0.99,"text":"times. Their model predicts that the longer the headway and the"},{"category_id":15,"poly":[881.0,1058.0,1554.0,1058.0,1554.0,1086.0,881.0,1086.0],"score":0.99,"text":"more reliable the departures, the more peaked the distribution of"},{"category_id":15,"poly":[881.0,1088.0,1553.0,1088.0,1553.0,1115.0,881.0,1115.0],"score":0.98,"text":"incidence times will be and the closer that peak will be to the next"},{"category_id":15,"poly":[882.0,1119.0,1552.0,1119.0,1552.0,1148.0,882.0,1148.0],"score":1.0,"text":"scheduled departure time. This prediction demonstrates what they"},{"category_id":15,"poly":[882.0,1149.0,1552.0,1149.0,1552.0,1176.0,882.0,1176.0],"score":0.99,"text":"refer to as a safety margin that passengers add to reduce the chance"},{"category_id":15,"poly":[883.0,1181.0,1552.0,1181.0,1552.0,1206.0,883.0,1206.0],"score":0.98,"text":"of missing their bus when the service is known to be somewhat"},{"category_id":15,"poly":[882.0,1210.0,1551.0,1210.0,1551.0,1238.0,882.0,1238.0],"score":0.98,"text":"unreliable. Such a safety margin can also result from unreliability in"},{"category_id":15,"poly":[881.0,1242.0,1553.0,1242.0,1553.0,1269.0,881.0,1269.0],"score":0.99,"text":"passengers' journeys to the public transport stop or station. Bowman"},{"category_id":15,"poly":[882.0,1271.0,1553.0,1271.0,1553.0,1299.0,882.0,1299.0],"score":0.99,"text":"and Turnquist conclude from their model that the random incidence"},{"category_id":15,"poly":[880.0,1301.0,1551.0,1301.0,1551.0,1331.0,880.0,1331.0],"score":0.99,"text":"model underestimates the waiting time benefits of improving reli-"},{"category_id":15,"poly":[882.0,1332.0,1552.0,1332.0,1552.0,1362.0,882.0,1362.0],"score":0.99,"text":"ability and overestimates the waiting time benefits of increasing ser-"},{"category_id":15,"poly":[883.0,1363.0,1552.0,1363.0,1552.0,1392.0,883.0,1392.0],"score":0.99,"text":"vice frequency. This is because as reliability increases passengers"},{"category_id":15,"poly":[882.0,1394.0,1552.0,1394.0,1552.0,1422.0,882.0,1422.0],"score":0.99,"text":"can better predict departure times and so can time their incidence to"},{"category_id":15,"poly":[882.0,1423.0,1159.0,1423.0,1159.0,1452.0,882.0,1452.0],"score":0.99,"text":"decrease their waiting time."},{"category_id":15,"poly":[175.0,235.0,819.0,235.0,819.0,264.0,175.0,264.0],"score":0.99,"text":"After briefly introducing the random incidence model, which is"},{"category_id":15,"poly":[149.0,265.0,818.0,265.0,818.0,295.0,149.0,295.0],"score":0.98,"text":"often assumed to hold at short headways, the balance of this section"},{"category_id":15,"poly":[148.0,298.0,818.0,298.0,818.0,324.0,148.0,324.0],"score":0.98,"text":"reviews six studies of passenger incidence behavior that are moti-"},{"category_id":15,"poly":[148.0,327.0,818.0,327.0,818.0,356.0,148.0,356.0],"score":1.0,"text":"vated by understanding the relationships between service headway,"},{"category_id":15,"poly":[146.0,355.0,820.0,355.0,820.0,388.0,146.0,388.0],"score":0.99,"text":"service reliability, passenger incidence behavior, and passenger"},{"category_id":15,"poly":[149.0,388.0,818.0,388.0,818.0,414.0,149.0,414.0],"score":1.0,"text":"waiting time in a more nuanced fashion than is embedded in the"},{"category_id":15,"poly":[149.0,419.0,818.0,419.0,818.0,445.0,149.0,445.0],"score":1.0,"text":"random incidence assumption (2). Three of these studies depend on"},{"category_id":15,"poly":[147.0,447.0,818.0,447.0,818.0,477.0,147.0,477.0],"score":0.99,"text":"manually collected data, two studies use data from AFC systems,"},{"category_id":15,"poly":[148.0,479.0,819.0,479.0,819.0,507.0,148.0,507.0],"score":0.99,"text":"and one study analyzes the issue purely theoretically. These studies"},{"category_id":15,"poly":[147.0,509.0,819.0,509.0,819.0,537.0,147.0,537.0],"score":0.99,"text":"reveal much about passenger incidence behavior, but all are found"},{"category_id":15,"poly":[147.0,538.0,820.0,538.0,820.0,567.0,147.0,567.0],"score":0.99,"text":"to be limited in their general applicability by the methods with"},{"category_id":15,"poly":[150.0,569.0,818.0,569.0,818.0,597.0,150.0,597.0],"score":0.99,"text":"which they collect information about passengers and the services"},{"category_id":15,"poly":[147.0,599.0,458.0,599.0,458.0,630.0,147.0,630.0],"score":1.0,"text":"those passengers intend to use."},{"category_id":15,"poly":[150.0,1219.0,212.0,1219.0,212.0,1247.0,150.0,1247.0],"score":1.0,"text":"where"},{"category_id":15,"poly":[264.0,1219.0,817.0,1219.0,817.0,1247.0,264.0,1247.0],"score":0.99,"text":"is the probabilistic expectation of some random variable"},{"category_id":15,"poly":[168.0,1248.0,209.0,1248.0,209.0,1275.0,168.0,1275.0],"score":1.0,"text":"and"},{"category_id":15,"poly":[283.0,1248.0,601.0,1248.0,601.0,1275.0,283.0,1275.0],"score":0.97,"text":"is the coefficient of variation of"},{"category_id":15,"poly":[625.0,1248.0,818.0,1248.0,818.0,1275.0,625.0,1275.0],"score":0.96,"text":".a unitless measure"},{"category_id":15,"poly":[148.0,1277.0,345.0,1277.0,345.0,1307.0,148.0,1307.0],"score":0.97,"text":"of the variability of"},{"category_id":15,"poly":[370.0,1277.0,477.0,1277.0,477.0,1307.0,370.0,1307.0],"score":0.99,"text":"defined as"},{"category_id":15,"poly":[906.0,1883.0,1552.0,1883.0,1552.0,1910.0,906.0,1910.0],"score":0.98,"text":"Luethi et al. continued with the analysis of manually collected"},{"category_id":15,"poly":[880.0,1909.0,1552.0,1909.0,1552.0,1945.0,880.0,1945.0],"score":0.99,"text":"data on actual passenger behavior (6). They use the language"},{"category_id":15,"poly":[883.0,1945.0,1552.0,1945.0,1552.0,1972.0,883.0,1972.0],"score":0.99,"text":"of probability to describe two classes of passengers. The first is"},{"category_id":15,"poly":[881.0,1973.0,1552.0,1973.0,1552.0,2003.0,881.0,2003.0],"score":1.0,"text":"timetable-dependent passengers (i.e., the aware passengers), whose"},{"category_id":15,"poly":[881.0,2006.0,1552.0,2006.0,1552.0,2033.0,881.0,2033.0],"score":1.0,"text":"incidence behavior is affected by awareness (possibly gained"},{"category_id":15,"poly":[149.0,748.0,817.0,748.0,817.0,774.0,149.0,774.0],"score":1.0,"text":"One characterization of passenger incidence behavior is that of ran-"},{"category_id":15,"poly":[148.0,777.0,818.0,777.0,818.0,806.0,148.0,806.0],"score":0.99,"text":"dom incidence (3). The key assumption underlying the random inci-"},{"category_id":15,"poly":[148.0,807.0,818.0,807.0,818.0,836.0,148.0,836.0],"score":0.99,"text":"dence model is that the process of passenger arrivals to the public"},{"category_id":15,"poly":[148.0,837.0,819.0,837.0,819.0,866.0,148.0,866.0],"score":0.99,"text":"transport service is independent from the vehicle departure process"},{"category_id":15,"poly":[148.0,868.0,818.0,868.0,818.0,897.0,148.0,897.0],"score":1.0,"text":"of the service. This implies that passengers become incident to the"},{"category_id":15,"poly":[149.0,899.0,817.0,899.0,817.0,925.0,149.0,925.0],"score":0.99,"text":"service at a random time, and thus the instantaneous rate of passen-"},{"category_id":15,"poly":[148.0,928.0,820.0,928.0,820.0,957.0,148.0,957.0],"score":1.0,"text":"ger arrivals to the service is uniform over a given period of time. Let"},{"category_id":15,"poly":[174.0,956.0,214.0,956.0,214.0,990.0,174.0,990.0],"score":1.0,"text":"and"},{"category_id":15,"poly":[239.0,956.0,818.0,956.0,818.0,990.0,239.0,990.0],"score":0.99,"text":"be random variables representing passenger waiting times"},{"category_id":15,"poly":[148.0,988.0,818.0,988.0,818.0,1016.0,148.0,1016.0],"score":1.0,"text":"and service headways, respectively. Under the random incidence"},{"category_id":15,"poly":[149.0,1019.0,818.0,1019.0,818.0,1048.0,149.0,1048.0],"score":0.98,"text":"assumption and the assumption that vehicle capacity is not a binding"},{"category_id":15,"poly":[149.0,1050.0,726.0,1050.0,726.0,1076.0,149.0,1076.0],"score":0.99,"text":"constraint, a classic result of transportation science is that"},{"category_id":15,"poly":[146.0,1793.0,818.0,1793.0,818.0,1822.0,146.0,1822.0],"score":0.98,"text":" Jolliffe and Hutchinson studied bus passenger incidence in South"},{"category_id":15,"poly":[147.0,1825.0,696.0,1825.0,696.0,1852.0,147.0,1852.0],"score":0.97,"text":"London suburbs (5). They observed 10 bus stops for"},{"category_id":15,"poly":[735.0,1825.0,817.0,1825.0,817.0,1852.0,735.0,1852.0],"score":1.0,"text":"perday"},{"category_id":15,"poly":[148.0,1855.0,819.0,1855.0,819.0,1881.0,148.0,1881.0],"score":1.0,"text":"over 8 days, recording the times of passenger incidence and actual"},{"category_id":15,"poly":[148.0,1884.0,819.0,1884.0,819.0,1912.0,148.0,1912.0],"score":0.98,"text":"and scheduled bus departures. They limited their stop selection to"},{"category_id":15,"poly":[146.0,1913.0,819.0,1913.0,819.0,1945.0,146.0,1945.0],"score":1.0,"text":"those served by only a single bus route with a single service pat-"},{"category_id":15,"poly":[147.0,1945.0,819.0,1945.0,819.0,1974.0,147.0,1974.0],"score":0.98,"text":"tern so as to avoid ambiguity about which service a passenger was"},{"category_id":15,"poly":[147.0,1972.0,820.0,1972.0,820.0,2006.0,147.0,2006.0],"score":0.98,"text":"waiting for. The authors found that the actual average passenger"},{"category_id":15,"poly":[149.0,2005.0,323.0,2005.0,323.0,2033.0,149.0,2033.0],"score":0.96,"text":"waitingtimewas"},{"category_id":15,"poly":[374.0,2005.0,819.0,2005.0,819.0,2033.0,374.0,2033.0],"score":1.0,"text":"less than predicted by the random incidence"},{"category_id":15,"poly":[148.0,686.0,625.0,686.0,625.0,721.0,148.0,721.0],"score":0.99,"text":"Random Passenger Incidence Behavior"},{"category_id":15,"poly":[151.0,1434.0,213.0,1434.0,213.0,1462.0,151.0,1462.0],"score":0.99,"text":"where"},{"category_id":15,"poly":[246.0,1434.0,521.0,1434.0,521.0,1462.0,246.0,1462.0],"score":0.98,"text":"is the standard deviation of"},{"category_id":15,"poly":[580.0,1434.0,816.0,1434.0,816.0,1462.0,580.0,1462.0],"score":0.96,"text":".The second expression"},{"category_id":15,"poly":[148.0,1466.0,819.0,1466.0,819.0,1493.0,148.0,1493.0],"score":0.99,"text":"in Equation 1 is particularly useful because it expresses the mean"},{"category_id":15,"poly":[146.0,1496.0,819.0,1496.0,819.0,1525.0,146.0,1525.0],"score":0.99,"text":"passenger waiting time as the sum of two components: the waiting"},{"category_id":15,"poly":[148.0,1526.0,818.0,1526.0,818.0,1553.0,148.0,1553.0],"score":0.98,"text":"time caused by the mean headway (i.e., the reciprocal of service fre-"},{"category_id":15,"poly":[147.0,1557.0,819.0,1557.0,819.0,1584.0,147.0,1584.0],"score":0.99,"text":"quency) and the waiting time caused by the variability of the head-"},{"category_id":15,"poly":[148.0,1588.0,818.0,1588.0,818.0,1612.0,148.0,1612.0],"score":0.97,"text":"ways (which is one measure of service reliability). When the service"},{"category_id":15,"poly":[148.0,1617.0,817.0,1617.0,817.0,1644.0,148.0,1644.0],"score":1.0,"text":"is perfectly reliable with constant headways, the mean waiting time"},{"category_id":15,"poly":[148.0,1646.0,472.0,1646.0,472.0,1677.0,148.0,1677.0],"score":0.99,"text":"will be simply half the headway."},{"category_id":15,"poly":[151.0,176.0,817.0,176.0,817.0,204.0,151.0,204.0],"score":0.99,"text":"dependent on the service headway and the reliability of the departure"},{"category_id":15,"poly":[147.0,205.0,652.0,205.0,652.0,236.0,147.0,236.0],"score":0.99,"text":"time of the service to which passengers are incident."},{"category_id":15,"poly":[149.0,1735.0,702.0,1735.0,702.0,1767.0,149.0,1767.0],"score":0.98,"text":"More Behaviorally Realistic Incidence Models"},{"category_id":15,"poly":[1519.0,98.0,1554.0,98.0,1554.0,125.0,1519.0,125.0],"score":1.0,"text":"53"},{"category_id":15,"poly":[148.0,98.0,322.0,98.0,322.0,123.0,148.0,123.0],"score":1.0,"text":"Frumin and Zhao"}],"page_info":{"page_no":0,"height":2200,"width":1700}}]} diff --git a/tests/unittest/test_tools/assets/cli_dev/cli_test_01.model.json b/tests/unittest/test_tools/assets/cli_dev/cli_test_01.model.json deleted file mode 100644 index 522b78dc..00000000 --- a/tests/unittest/test_tools/assets/cli_dev/cli_test_01.model.json +++ /dev/null @@ -1 +0,0 @@ -[{"layout_dets":[{"category_id":1,"poly":[882.4013061523438,169.93817138671875,1552.350341796875,169.93817138671875,1552.350341796875,625.8263549804688,882.4013061523438,625.8263549804688],"score":0.999992311000824},{"category_id":1,"poly":[882.474853515625,1450.92822265625,1551.4490966796875,1450.92822265625,1551.4490966796875,1877.5712890625,882.474853515625,1877.5712890625],"score":0.9999903440475464},{"category_id":1,"poly":[881.6513061523438,626.2058715820312,1552.1400146484375,626.2058715820312,1552.1400146484375,1450.604736328125,881.6513061523438,1450.604736328125],"score":0.9999856352806091},{"category_id":1,"poly":[149.41075134277344,232.1595001220703,819.0465087890625,232.1595001220703,819.0465087890625,625.8865356445312,149.41075134277344,625.8865356445312],"score":0.99998539686203},{"category_id":1,"poly":[149.3945770263672,1215.5172119140625,817.8850708007812,1215.5172119140625,817.8850708007812,1304.873291015625,149.3945770263672,1304.873291015625],"score":0.9999765157699585},{"category_id":1,"poly":[882.6979370117188,1880.13916015625,1552.15185546875,1880.13916015625,1552.15185546875,2031.339599609375,882.6979370117188,2031.339599609375],"score":0.9999744892120361},{"category_id":1,"poly":[148.96054077148438,743.3055419921875,818.6231689453125,743.3055419921875,818.6231689453125,1074.2369384765625,148.96054077148438,1074.2369384765625],"score":0.9999669790267944},{"category_id":1,"poly":[148.8435516357422,1791.14306640625,818.6885375976562,1791.14306640625,818.6885375976562,2030.794189453125,148.8435516357422,2030.794189453125],"score":0.9999618530273438},{"category_id":0,"poly":[150.7009735107422,684.0087890625,623.5106201171875,684.0087890625,623.5106201171875,717.03662109375,150.7009735107422,717.03662109375],"score":0.9999415278434753},{"category_id":8,"poly":[146.48068237304688,1331.6737060546875,317.2640075683594,1331.6737060546875,317.2640075683594,1400.1722412109375,146.48068237304688,1400.1722412109375],"score":0.9998958110809326},{"category_id":1,"poly":[149.42420959472656,1430.8782958984375,818.9042358398438,1430.8782958984375,818.9042358398438,1672.7386474609375,149.42420959472656,1672.7386474609375],"score":0.9998599290847778},{"category_id":1,"poly":[149.18746948242188,172.10252380371094,818.5662231445312,172.10252380371094,818.5662231445312,230.4594268798828,149.18746948242188,230.4594268798828],"score":0.9997718334197998},{"category_id":0,"poly":[149.0175018310547,1732.1090087890625,702.1005859375,1732.1090087890625,702.1005859375,1763.6046142578125,149.0175018310547,1763.6046142578125],"score":0.9997085928916931},{"category_id":2,"poly":[1519.802490234375,98.59099578857422,1551.985107421875,98.59099578857422,1551.985107421875,119.48420715332031,1519.802490234375,119.48420715332031],"score":0.9995552897453308},{"category_id":8,"poly":[146.9109649658203,1100.156494140625,544.2803344726562,1100.156494140625,544.2803344726562,1184.929443359375,146.9109649658203,1184.929443359375],"score":0.9995207786560059},{"category_id":2,"poly":[148.11611938476562,99.87767791748047,318.926025390625,99.87767791748047,318.926025390625,120.70393371582031,148.11611938476562,120.70393371582031],"score":0.999351441860199},{"category_id":9,"poly":[791.7642211914062,1130.056396484375,818.6940307617188,1130.056396484375,818.6940307617188,1161.1080322265625,791.7642211914062,1161.1080322265625],"score":0.9908884763717651},{"category_id":9,"poly":[788.37060546875,1346.8450927734375,818.5010986328125,1346.8450927734375,818.5010986328125,1377.370361328125,788.37060546875,1377.370361328125],"score":0.9873985052108765},{"category_id":14,"poly":[146,1103,543,1103,543,1184,146,1184],"score":0.94,"latex":"E\\!\\left(W\\right)\\!=\\!\\frac{E\\!\\left[H^{2}\\right]}{2E\\!\\left[H\\right]}\\!=\\!\\frac{E\\!\\left[H\\right]}{2}\\!\\!\\left(1\\!+\\!\\operatorname{CV}\\!\\left(H\\right)^{2}\\right)"},{"category_id":13,"poly":[1196,354,1278,354,1278,384,1196,384],"score":0.91,"latex":"p(1-q)"},{"category_id":13,"poly":[881,415,1020,415,1020,444,881,444],"score":0.91,"latex":"(1-p)(1-q)"},{"category_id":14,"poly":[147,1333,318,1333,318,1400,147,1400],"score":0.91,"latex":"\\mathrm{CV}\\big(H\\big)\\!=\\!\\frac{\\sigma_{_H}}{E\\big[H\\big]}"},{"category_id":13,"poly":[1197,657,1263,657,1263,686,1197,686],"score":0.9,"latex":"(1-p)"},{"category_id":13,"poly":[213,1217,263,1217,263,1244,213,1244],"score":0.88,"latex":"E[X]"},{"category_id":13,"poly":[214,1434,245,1434,245,1459,214,1459],"score":0.87,"latex":"\\upsigma_{H}"},{"category_id":13,"poly":[324,2002,373,2002,373,2028,324,2028],"score":0.84,"latex":"30\\%"},{"category_id":13,"poly":[1209,693,1225,693,1225,717,1209,717],"score":0.83,"latex":"p"},{"category_id":13,"poly":[990,449,1007,449,1007,474,990,474],"score":0.81,"latex":"p"},{"category_id":13,"poly":[346,1277,369,1277,369,1301,346,1301],"score":0.81,"latex":"H"},{"category_id":13,"poly":[1137,661,1154,661,1154,686,1137,686],"score":0.81,"latex":"p"},{"category_id":13,"poly":[522,1432,579,1432,579,1459,522,1459],"score":0.81,"latex":"H\\left(4\\right)"},{"category_id":13,"poly":[944,540,962,540,962,565,944,565],"score":0.8,"latex":"p"},{"category_id":13,"poly":[1444,936,1461,936,1461,961,1444,961],"score":0.79,"latex":"p"},{"category_id":13,"poly":[602,1247,624,1247,624,1270,602,1270],"score":0.78,"latex":"H"},{"category_id":13,"poly":[147,1247,167,1247,167,1271,147,1271],"score":0.77,"latex":"X"},{"category_id":13,"poly":[210,1246,282,1246,282,1274,210,1274],"score":0.77,"latex":"\\mathrm{CV}(H)"},{"category_id":13,"poly":[1346,268,1361,268,1361,292,1346,292],"score":0.76,"latex":"q"},{"category_id":13,"poly":[215,957,238,957,238,981,215,981],"score":0.74,"latex":"H"},{"category_id":13,"poly":[149,956,173,956,173,981,149,981],"score":0.63,"latex":"W"},{"category_id":13,"poly":[924,841,1016,841,1016,868,924,868],"score":0.56,"latex":"8{\\mathrm{:}}00\\;\\mathrm{a.m}"},{"category_id":13,"poly":[956,871,1032,871,1032,898,956,898],"score":0.43,"latex":"20\\ \\mathrm{min}"},{"category_id":13,"poly":[1082,781,1112,781,1112,808,1082,808],"score":0.41,"latex":"(I)"},{"category_id":13,"poly":[697,1821,734,1821,734,1847,697,1847],"score":0.3,"latex":"1\\,\\mathrm{~h~}"},{"category_id":15,"poly":[881.0,174.0,1552.0,174.0,1552.0,204.0,881.0,204.0],"score":1.0,"text":"model. They also found that the empirical distributions of passenger"},{"category_id":15,"poly":[880.0,205.0,1552.0,205.0,1552.0,236.0,880.0,236.0],"score":0.99,"text":"incidence times (by time of day) had peaks just before the respec-"},{"category_id":15,"poly":[880.0,234.0,1553.0,234.0,1553.0,264.0,880.0,264.0],"score":0.99,"text":"tive average bus departure times. They hypothesized the existence"},{"category_id":15,"poly":[881.0,264.0,1345.0,264.0,1345.0,296.0,881.0,296.0],"score":0.98,"text":"of three classes of passengers: with proportion"},{"category_id":15,"poly":[1362.0,264.0,1552.0,264.0,1552.0,296.0,1362.0,296.0],"score":0.95,"text":"passengers whose"},{"category_id":15,"poly":[880.0,295.0,1552.0,295.0,1552.0,325.0,880.0,325.0],"score":1.0,"text":"time of incidence is causally coincident with that of a bus departure"},{"category_id":15,"poly":[880.0,326.0,1555.0,326.0,1555.0,355.0,880.0,355.0],"score":0.99,"text":"(e.g., because they saw the approaching bus from their home or a"},{"category_id":15,"poly":[881.0,356.0,1195.0,356.0,1195.0,388.0,881.0,388.0],"score":0.99,"text":"shop window); with proportion"},{"category_id":15,"poly":[1279.0,356.0,1553.0,356.0,1553.0,388.0,1279.0,388.0],"score":0.99,"text":", passengers who time their"},{"category_id":15,"poly":[882.0,388.0,1552.0,388.0,1552.0,416.0,882.0,416.0],"score":0.99,"text":"arrivals to minimize expected waiting time; and with proportion"},{"category_id":15,"poly":[1021.0,418.0,1553.0,418.0,1553.0,447.0,1021.0,447.0],"score":1.0,"text":", passengers who are randomly incident. The authors"},{"category_id":15,"poly":[881.0,448.0,989.0,448.0,989.0,477.0,881.0,477.0],"score":1.0,"text":"found that"},{"category_id":15,"poly":[1008.0,448.0,1553.0,448.0,1553.0,477.0,1008.0,477.0],"score":1.0,"text":"was positively correlated with the potential reduction"},{"category_id":15,"poly":[880.0,479.0,1552.0,479.0,1552.0,507.0,880.0,507.0],"score":1.0,"text":"in waiting time (compared with arriving randomly) that resulted"},{"category_id":15,"poly":[882.0,510.0,1551.0,510.0,1551.0,536.0,882.0,536.0],"score":0.97,"text":"from knowledge of the timetable and of service reliability. They also"},{"category_id":15,"poly":[881.0,539.0,943.0,539.0,943.0,568.0,881.0,568.0],"score":1.0,"text":"found"},{"category_id":15,"poly":[963.0,539.0,1553.0,539.0,1553.0,568.0,963.0,568.0],"score":0.99,"text":"to be higher in the peak commuting periods rather than in"},{"category_id":15,"poly":[881.0,568.0,1554.0,568.0,1554.0,599.0,881.0,599.0],"score":0.98,"text":"the off-peak periods, indicating more awareness of the timetable or"},{"category_id":15,"poly":[881.0,599.0,1323.0,599.0,1323.0,627.0,881.0,627.0],"score":0.98,"text":"historical reliability, or both, by commuters."},{"category_id":15,"poly":[905.0,1452.0,1551.0,1452.0,1551.0,1483.0,905.0,1483.0],"score":0.99,"text":"Furth and Muller study the issue in a theoretical context and gener-"},{"category_id":15,"poly":[883.0,1485.0,1553.0,1485.0,1553.0,1514.0,883.0,1514.0],"score":1.0,"text":"ally agree with the above findings (2). They are primarily concerned"},{"category_id":15,"poly":[882.0,1513.0,1553.0,1513.0,1553.0,1545.0,882.0,1545.0],"score":0.99,"text":"with the use of data from automatic vehicle-tracking systems to assess"},{"category_id":15,"poly":[880.0,1545.0,1553.0,1545.0,1553.0,1574.0,880.0,1574.0],"score":0.99,"text":"the impacts of reliability on passenger incidence behavior and wait-"},{"category_id":15,"poly":[881.0,1577.0,1551.0,1577.0,1551.0,1606.0,881.0,1606.0],"score":0.98,"text":"ing times. They propose that passengers will react to unreliability by"},{"category_id":15,"poly":[883.0,1608.0,1551.0,1608.0,1551.0,1637.0,883.0,1637.0],"score":1.0,"text":"departing earlier than they would with reliable services. Randomly"},{"category_id":15,"poly":[880.0,1636.0,1554.0,1636.0,1554.0,1669.0,880.0,1669.0],"score":1.0,"text":"incident unaware passengers will experience unreliability as a more"},{"category_id":15,"poly":[882.0,1669.0,1553.0,1669.0,1553.0,1697.0,882.0,1697.0],"score":0.99,"text":"dispersed distribution of headways and simply allocate additional"},{"category_id":15,"poly":[880.0,1699.0,1551.0,1699.0,1551.0,1726.0,880.0,1726.0],"score":0.97,"text":"time to their trip plan to improve the chance of arriving at their des-"},{"category_id":15,"poly":[881.0,1730.0,1551.0,1730.0,1551.0,1759.0,881.0,1759.0],"score":0.98,"text":"tination on time. Aware passengers, whose incidence is not entirely"},{"category_id":15,"poly":[880.0,1760.0,1552.0,1760.0,1552.0,1789.0,880.0,1789.0],"score":0.99,"text":"random, will react by timing their incidence somewhat earlier than"},{"category_id":15,"poly":[882.0,1792.0,1550.0,1792.0,1550.0,1818.0,882.0,1818.0],"score":0.99,"text":"the scheduled departure time to increase their chance of catching the"},{"category_id":15,"poly":[883.0,1823.0,1552.0,1823.0,1552.0,1849.0,883.0,1849.0],"score":0.99,"text":"desired service. The authors characterize these reactions as the costs"},{"category_id":15,"poly":[883.0,1853.0,1031.0,1853.0,1031.0,1880.0,883.0,1880.0],"score":0.95,"text":"of unreliability."},{"category_id":15,"poly":[907.0,630.0,1553.0,630.0,1553.0,658.0,907.0,658.0],"score":1.0,"text":"Bowman and Turnquist built on the concept of aware and unaware"},{"category_id":15,"poly":[881.0,662.0,1136.0,662.0,1136.0,690.0,881.0,690.0],"score":0.99,"text":"passengers of proportions"},{"category_id":15,"poly":[1155.0,662.0,1196.0,662.0,1196.0,690.0,1155.0,690.0],"score":1.0,"text":"and"},{"category_id":15,"poly":[1264.0,662.0,1553.0,662.0,1553.0,690.0,1264.0,690.0],"score":0.99,"text":",respectively. They proposed"},{"category_id":15,"poly":[881.0,692.0,1208.0,692.0,1208.0,719.0,881.0,719.0],"score":0.99,"text":"a utility-based model to estimate"},{"category_id":15,"poly":[1226.0,692.0,1552.0,692.0,1552.0,719.0,1226.0,719.0],"score":1.0,"text":"and the distribution of incidence"},{"category_id":15,"poly":[880.0,721.0,1554.0,721.0,1554.0,751.0,880.0,751.0],"score":0.99,"text":"times, and thus the mean waiting time, of aware passengers over"},{"category_id":15,"poly":[880.0,752.0,1553.0,752.0,1553.0,780.0,880.0,780.0],"score":0.98,"text":"a given headway as a function of the headway and reliability of"},{"category_id":15,"poly":[880.0,782.0,1081.0,782.0,1081.0,812.0,880.0,812.0],"score":0.99,"text":"bus departure times"},{"category_id":15,"poly":[1113.0,782.0,1552.0,782.0,1552.0,812.0,1113.0,812.0],"score":0.99,"text":". They observed seven bus stops in Chicago,"},{"category_id":15,"poly":[882.0,813.0,1553.0,813.0,1553.0,841.0,882.0,841.0],"score":0.98,"text":"Illinois, each served by a single (different) bus route, between 6:00"},{"category_id":15,"poly":[882.0,844.0,923.0,844.0,923.0,871.0,882.0,871.0],"score":1.0,"text":"and"},{"category_id":15,"poly":[1017.0,844.0,1550.0,844.0,1550.0,871.0,1017.0,871.0],"score":0.97,"text":".for 5 to 10 days each. The bus routes had headways"},{"category_id":15,"poly":[882.0,874.0,955.0,874.0,955.0,902.0,882.0,902.0],"score":0.95,"text":"of 5to"},{"category_id":15,"poly":[1033.0,874.0,1553.0,874.0,1553.0,902.0,1033.0,902.0],"score":0.98,"text":"and a range of reliabilities. The authors found that"},{"category_id":15,"poly":[882.0,906.0,1553.0,906.0,1553.0,933.0,882.0,933.0],"score":0.99,"text":"actual average waiting time was substantially less than predicted"},{"category_id":15,"poly":[881.0,935.0,1443.0,935.0,1443.0,963.0,881.0,963.0],"score":1.0,"text":"by the random incidence model. They estimated that"},{"category_id":15,"poly":[1462.0,935.0,1553.0,935.0,1553.0,963.0,1462.0,963.0],"score":0.96,"text":"was not"},{"category_id":15,"poly":[881.0,966.0,1552.0,966.0,1552.0,994.0,881.0,994.0],"score":0.98,"text":"statistically significantly different from 1.0, which they explain by"},{"category_id":15,"poly":[880.0,994.0,1552.0,994.0,1552.0,1025.0,880.0,1025.0],"score":0.99,"text":"the fact that all observations were taken during peak commuting"},{"category_id":15,"poly":[880.0,1027.0,1552.0,1027.0,1552.0,1054.0,880.0,1054.0],"score":0.99,"text":"times. Their model predicts that the longer the headway and the"},{"category_id":15,"poly":[881.0,1058.0,1554.0,1058.0,1554.0,1086.0,881.0,1086.0],"score":0.99,"text":"more reliable the departures, the more peaked the distribution of"},{"category_id":15,"poly":[881.0,1088.0,1553.0,1088.0,1553.0,1115.0,881.0,1115.0],"score":0.98,"text":"incidence times will be and the closer that peak will be to the next"},{"category_id":15,"poly":[882.0,1119.0,1552.0,1119.0,1552.0,1148.0,882.0,1148.0],"score":1.0,"text":"scheduled departure time. This prediction demonstrates what they"},{"category_id":15,"poly":[882.0,1149.0,1552.0,1149.0,1552.0,1176.0,882.0,1176.0],"score":0.99,"text":"refer to as a safety margin that passengers add to reduce the chance"},{"category_id":15,"poly":[883.0,1181.0,1552.0,1181.0,1552.0,1206.0,883.0,1206.0],"score":0.98,"text":"of missing their bus when the service is known to be somewhat"},{"category_id":15,"poly":[882.0,1210.0,1551.0,1210.0,1551.0,1238.0,882.0,1238.0],"score":0.98,"text":"unreliable. Such a safety margin can also result from unreliability in"},{"category_id":15,"poly":[881.0,1242.0,1553.0,1242.0,1553.0,1269.0,881.0,1269.0],"score":0.99,"text":"passengers' journeys to the public transport stop or station. Bowman"},{"category_id":15,"poly":[882.0,1271.0,1553.0,1271.0,1553.0,1299.0,882.0,1299.0],"score":0.99,"text":"and Turnquist conclude from their model that the random incidence"},{"category_id":15,"poly":[880.0,1301.0,1551.0,1301.0,1551.0,1331.0,880.0,1331.0],"score":0.99,"text":"model underestimates the waiting time benefits of improving reli-"},{"category_id":15,"poly":[882.0,1332.0,1552.0,1332.0,1552.0,1362.0,882.0,1362.0],"score":0.99,"text":"ability and overestimates the waiting time benefits of increasing ser-"},{"category_id":15,"poly":[883.0,1363.0,1552.0,1363.0,1552.0,1392.0,883.0,1392.0],"score":0.99,"text":"vice frequency. This is because as reliability increases passengers"},{"category_id":15,"poly":[882.0,1394.0,1552.0,1394.0,1552.0,1422.0,882.0,1422.0],"score":0.99,"text":"can better predict departure times and so can time their incidence to"},{"category_id":15,"poly":[882.0,1423.0,1159.0,1423.0,1159.0,1452.0,882.0,1452.0],"score":0.99,"text":"decrease their waiting time."},{"category_id":15,"poly":[175.0,235.0,819.0,235.0,819.0,264.0,175.0,264.0],"score":0.99,"text":"After briefly introducing the random incidence model, which is"},{"category_id":15,"poly":[149.0,265.0,818.0,265.0,818.0,295.0,149.0,295.0],"score":0.98,"text":"often assumed to hold at short headways, the balance of this section"},{"category_id":15,"poly":[148.0,298.0,818.0,298.0,818.0,324.0,148.0,324.0],"score":0.98,"text":"reviews six studies of passenger incidence behavior that are moti-"},{"category_id":15,"poly":[148.0,327.0,818.0,327.0,818.0,356.0,148.0,356.0],"score":1.0,"text":"vated by understanding the relationships between service headway,"},{"category_id":15,"poly":[146.0,355.0,820.0,355.0,820.0,388.0,146.0,388.0],"score":0.99,"text":"service reliability, passenger incidence behavior, and passenger"},{"category_id":15,"poly":[149.0,388.0,818.0,388.0,818.0,414.0,149.0,414.0],"score":1.0,"text":"waiting time in a more nuanced fashion than is embedded in the"},{"category_id":15,"poly":[149.0,419.0,818.0,419.0,818.0,445.0,149.0,445.0],"score":1.0,"text":"random incidence assumption (2). Three of these studies depend on"},{"category_id":15,"poly":[147.0,447.0,818.0,447.0,818.0,477.0,147.0,477.0],"score":0.99,"text":"manually collected data, two studies use data from AFC systems,"},{"category_id":15,"poly":[148.0,479.0,819.0,479.0,819.0,507.0,148.0,507.0],"score":0.99,"text":"and one study analyzes the issue purely theoretically. These studies"},{"category_id":15,"poly":[147.0,509.0,819.0,509.0,819.0,537.0,147.0,537.0],"score":0.99,"text":"reveal much about passenger incidence behavior, but all are found"},{"category_id":15,"poly":[147.0,538.0,820.0,538.0,820.0,567.0,147.0,567.0],"score":0.99,"text":"to be limited in their general applicability by the methods with"},{"category_id":15,"poly":[150.0,569.0,818.0,569.0,818.0,597.0,150.0,597.0],"score":0.99,"text":"which they collect information about passengers and the services"},{"category_id":15,"poly":[147.0,599.0,458.0,599.0,458.0,630.0,147.0,630.0],"score":1.0,"text":"those passengers intend to use."},{"category_id":15,"poly":[150.0,1219.0,212.0,1219.0,212.0,1247.0,150.0,1247.0],"score":1.0,"text":"where"},{"category_id":15,"poly":[264.0,1219.0,817.0,1219.0,817.0,1247.0,264.0,1247.0],"score":0.99,"text":"is the probabilistic expectation of some random variable"},{"category_id":15,"poly":[168.0,1248.0,209.0,1248.0,209.0,1275.0,168.0,1275.0],"score":1.0,"text":"and"},{"category_id":15,"poly":[283.0,1248.0,601.0,1248.0,601.0,1275.0,283.0,1275.0],"score":0.97,"text":"is the coefficient of variation of"},{"category_id":15,"poly":[625.0,1248.0,818.0,1248.0,818.0,1275.0,625.0,1275.0],"score":0.96,"text":".a unitless measure"},{"category_id":15,"poly":[148.0,1277.0,345.0,1277.0,345.0,1307.0,148.0,1307.0],"score":0.97,"text":"of the variability of"},{"category_id":15,"poly":[370.0,1277.0,477.0,1277.0,477.0,1307.0,370.0,1307.0],"score":0.99,"text":"defined as"},{"category_id":15,"poly":[906.0,1883.0,1552.0,1883.0,1552.0,1910.0,906.0,1910.0],"score":0.98,"text":"Luethi et al. continued with the analysis of manually collected"},{"category_id":15,"poly":[880.0,1909.0,1552.0,1909.0,1552.0,1945.0,880.0,1945.0],"score":0.99,"text":"data on actual passenger behavior (6). They use the language"},{"category_id":15,"poly":[883.0,1945.0,1552.0,1945.0,1552.0,1972.0,883.0,1972.0],"score":0.99,"text":"of probability to describe two classes of passengers. The first is"},{"category_id":15,"poly":[881.0,1973.0,1552.0,1973.0,1552.0,2003.0,881.0,2003.0],"score":1.0,"text":"timetable-dependent passengers (i.e., the aware passengers), whose"},{"category_id":15,"poly":[881.0,2006.0,1552.0,2006.0,1552.0,2033.0,881.0,2033.0],"score":1.0,"text":"incidence behavior is affected by awareness (possibly gained"},{"category_id":15,"poly":[149.0,748.0,817.0,748.0,817.0,774.0,149.0,774.0],"score":1.0,"text":"One characterization of passenger incidence behavior is that of ran-"},{"category_id":15,"poly":[148.0,777.0,818.0,777.0,818.0,806.0,148.0,806.0],"score":0.99,"text":"dom incidence (3). The key assumption underlying the random inci-"},{"category_id":15,"poly":[148.0,807.0,818.0,807.0,818.0,836.0,148.0,836.0],"score":0.99,"text":"dence model is that the process of passenger arrivals to the public"},{"category_id":15,"poly":[148.0,837.0,819.0,837.0,819.0,866.0,148.0,866.0],"score":0.99,"text":"transport service is independent from the vehicle departure process"},{"category_id":15,"poly":[148.0,868.0,818.0,868.0,818.0,897.0,148.0,897.0],"score":1.0,"text":"of the service. This implies that passengers become incident to the"},{"category_id":15,"poly":[149.0,899.0,817.0,899.0,817.0,925.0,149.0,925.0],"score":0.99,"text":"service at a random time, and thus the instantaneous rate of passen-"},{"category_id":15,"poly":[148.0,928.0,820.0,928.0,820.0,957.0,148.0,957.0],"score":1.0,"text":"ger arrivals to the service is uniform over a given period of time. Let"},{"category_id":15,"poly":[174.0,956.0,214.0,956.0,214.0,990.0,174.0,990.0],"score":1.0,"text":"and"},{"category_id":15,"poly":[239.0,956.0,818.0,956.0,818.0,990.0,239.0,990.0],"score":0.99,"text":"be random variables representing passenger waiting times"},{"category_id":15,"poly":[148.0,988.0,818.0,988.0,818.0,1016.0,148.0,1016.0],"score":1.0,"text":"and service headways, respectively. Under the random incidence"},{"category_id":15,"poly":[149.0,1019.0,818.0,1019.0,818.0,1048.0,149.0,1048.0],"score":0.98,"text":"assumption and the assumption that vehicle capacity is not a binding"},{"category_id":15,"poly":[149.0,1050.0,726.0,1050.0,726.0,1076.0,149.0,1076.0],"score":0.99,"text":"constraint, a classic result of transportation science is that"},{"category_id":15,"poly":[146.0,1793.0,818.0,1793.0,818.0,1822.0,146.0,1822.0],"score":0.98,"text":" Jolliffe and Hutchinson studied bus passenger incidence in South"},{"category_id":15,"poly":[147.0,1825.0,696.0,1825.0,696.0,1852.0,147.0,1852.0],"score":0.97,"text":"London suburbs (5). They observed 10 bus stops for"},{"category_id":15,"poly":[735.0,1825.0,817.0,1825.0,817.0,1852.0,735.0,1852.0],"score":1.0,"text":"perday"},{"category_id":15,"poly":[148.0,1855.0,819.0,1855.0,819.0,1881.0,148.0,1881.0],"score":1.0,"text":"over 8 days, recording the times of passenger incidence and actual"},{"category_id":15,"poly":[148.0,1884.0,819.0,1884.0,819.0,1912.0,148.0,1912.0],"score":0.98,"text":"and scheduled bus departures. They limited their stop selection to"},{"category_id":15,"poly":[146.0,1913.0,819.0,1913.0,819.0,1945.0,146.0,1945.0],"score":1.0,"text":"those served by only a single bus route with a single service pat-"},{"category_id":15,"poly":[147.0,1945.0,819.0,1945.0,819.0,1974.0,147.0,1974.0],"score":0.98,"text":"tern so as to avoid ambiguity about which service a passenger was"},{"category_id":15,"poly":[147.0,1972.0,820.0,1972.0,820.0,2006.0,147.0,2006.0],"score":0.98,"text":"waiting for. The authors found that the actual average passenger"},{"category_id":15,"poly":[149.0,2005.0,323.0,2005.0,323.0,2033.0,149.0,2033.0],"score":0.96,"text":"waitingtimewas"},{"category_id":15,"poly":[374.0,2005.0,819.0,2005.0,819.0,2033.0,374.0,2033.0],"score":1.0,"text":"less than predicted by the random incidence"},{"category_id":15,"poly":[148.0,686.0,625.0,686.0,625.0,721.0,148.0,721.0],"score":0.99,"text":"Random Passenger Incidence Behavior"},{"category_id":15,"poly":[151.0,1434.0,213.0,1434.0,213.0,1462.0,151.0,1462.0],"score":0.99,"text":"where"},{"category_id":15,"poly":[246.0,1434.0,521.0,1434.0,521.0,1462.0,246.0,1462.0],"score":0.98,"text":"is the standard deviation of"},{"category_id":15,"poly":[580.0,1434.0,816.0,1434.0,816.0,1462.0,580.0,1462.0],"score":0.96,"text":".The second expression"},{"category_id":15,"poly":[148.0,1466.0,819.0,1466.0,819.0,1493.0,148.0,1493.0],"score":0.99,"text":"in Equation 1 is particularly useful because it expresses the mean"},{"category_id":15,"poly":[146.0,1496.0,819.0,1496.0,819.0,1525.0,146.0,1525.0],"score":0.99,"text":"passenger waiting time as the sum of two components: the waiting"},{"category_id":15,"poly":[148.0,1526.0,818.0,1526.0,818.0,1553.0,148.0,1553.0],"score":0.98,"text":"time caused by the mean headway (i.e., the reciprocal of service fre-"},{"category_id":15,"poly":[147.0,1557.0,819.0,1557.0,819.0,1584.0,147.0,1584.0],"score":0.99,"text":"quency) and the waiting time caused by the variability of the head-"},{"category_id":15,"poly":[148.0,1588.0,818.0,1588.0,818.0,1612.0,148.0,1612.0],"score":0.97,"text":"ways (which is one measure of service reliability). When the service"},{"category_id":15,"poly":[148.0,1617.0,817.0,1617.0,817.0,1644.0,148.0,1644.0],"score":1.0,"text":"is perfectly reliable with constant headways, the mean waiting time"},{"category_id":15,"poly":[148.0,1646.0,472.0,1646.0,472.0,1677.0,148.0,1677.0],"score":0.99,"text":"will be simply half the headway."},{"category_id":15,"poly":[151.0,176.0,817.0,176.0,817.0,204.0,151.0,204.0],"score":0.99,"text":"dependent on the service headway and the reliability of the departure"},{"category_id":15,"poly":[147.0,205.0,652.0,205.0,652.0,236.0,147.0,236.0],"score":0.99,"text":"time of the service to which passengers are incident."},{"category_id":15,"poly":[149.0,1735.0,702.0,1735.0,702.0,1767.0,149.0,1767.0],"score":0.98,"text":"More Behaviorally Realistic Incidence Models"},{"category_id":15,"poly":[1519.0,98.0,1554.0,98.0,1554.0,125.0,1519.0,125.0],"score":1.0,"text":"53"},{"category_id":15,"poly":[148.0,98.0,322.0,98.0,322.0,123.0,148.0,123.0],"score":1.0,"text":"Frumin and Zhao"}],"page_info":{"page_no":0,"height":2200,"width":1700}}] \ No newline at end of file diff --git a/tests/unittest/test_tools/assets/cli_dev/cli_test_01.pdf b/tests/unittest/test_tools/assets/cli_dev/cli_test_01.pdf deleted file mode 100644 index 229be9ce..00000000 Binary files a/tests/unittest/test_tools/assets/cli_dev/cli_test_01.pdf and /dev/null differ diff --git a/tests/unittest/test_tools/assets/common/cli_test_01.pdf b/tests/unittest/test_tools/assets/common/cli_test_01.pdf deleted file mode 100644 index 229be9ce..00000000 Binary files a/tests/unittest/test_tools/assets/common/cli_test_01.pdf and /dev/null differ diff --git a/tests/unittest/test_tools/test_cli.py b/tests/unittest/test_tools/test_cli.py deleted file mode 100644 index 741a6ab3..00000000 --- a/tests/unittest/test_tools/test_cli.py +++ /dev/null @@ -1,126 +0,0 @@ -import os -import shutil -import tempfile - -from click.testing import CliRunner - -from magic_pdf.tools.cli import cli - - -def test_cli_pdf(): - # setup - unitest_dir = '/tmp/magic_pdf/unittest/tools' - filename = 'cli_test_01' - os.makedirs(unitest_dir, exist_ok=True) - temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools') - - # run - runner = CliRunner() - result = runner.invoke( - cli, - [ - '-p', - 'tests/unittest/test_tools/assets/cli/pdf/cli_test_01.pdf', - '-o', - temp_output_dir, - ], - ) - - # check - assert result.exit_code == 0 - - base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto') - - r = os.stat(os.path.join(base_output_dir, f'{filename}.md')) - assert r.st_size > 7000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json')) - assert r.st_size > 200000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json')) - assert r.st_size > 15000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf')) - assert r.st_size > 400000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf')) - assert r.st_size > 400000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf')) - assert r.st_size > 400000 - - assert os.path.exists(os.path.join(base_output_dir, 'images')) is True - assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True - assert os.path.exists(os.path.join(base_output_dir, f'{filename}_content_list.json')) is True - - # teardown - shutil.rmtree(temp_output_dir) - - -def test_cli_path(): - # setup - unitest_dir = '/tmp/magic_pdf/unittest/tools' - os.makedirs(unitest_dir, exist_ok=True) - temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools') - - # run - runner = CliRunner() - result = runner.invoke( - cli, ['-p', 'tests/unittest/test_tools/assets/cli/path', '-o', temp_output_dir] - ) - - # check - assert result.exit_code == 0 - - filename = 'cli_test_01' - base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto') - - r = os.stat(os.path.join(base_output_dir, f'{filename}.md')) - assert r.st_size > 7000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json')) - assert r.st_size > 200000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json')) - assert r.st_size > 15000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf')) - assert r.st_size > 400000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf')) - assert r.st_size > 400000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf')) - assert r.st_size > 400000 - - assert os.path.exists(os.path.join(base_output_dir, 'images')) is True - assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True - assert os.path.exists(os.path.join(base_output_dir, f'{filename}_content_list.json')) is True - - base_output_dir = os.path.join(temp_output_dir, 'cli_test_02/auto') - filename = 'cli_test_02' - - r = os.stat(os.path.join(base_output_dir, f'{filename}.md')) - assert r.st_size > 5000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json')) - assert r.st_size > 200000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json')) - assert r.st_size > 15000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf')) - assert r.st_size > 400000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf')) - assert r.st_size > 400000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf')) - assert r.st_size > 400000 - - assert os.path.exists(os.path.join(base_output_dir, 'images')) is True - assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True - assert os.path.exists(os.path.join(base_output_dir, f'{filename}_content_list.json')) is True - - # teardown - shutil.rmtree(temp_output_dir) diff --git a/tests/unittest/test_tools/test_cli_dev.py b/tests/unittest/test_tools/test_cli_dev.py deleted file mode 100644 index 33ccfba5..00000000 --- a/tests/unittest/test_tools/test_cli_dev.py +++ /dev/null @@ -1,120 +0,0 @@ -import os -import shutil -import tempfile - -from click.testing import CliRunner - -from magic_pdf.tools import cli_dev - - -def test_cli_pdf(): - # setup - unitest_dir = '/tmp/magic_pdf/unittest/tools' - filename = 'cli_test_01' - os.makedirs(unitest_dir, exist_ok=True) - temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools') - - # run - runner = CliRunner() - result = runner.invoke( - cli_dev.cli, - [ - 'pdf', - '-p', - 'tests/unittest/test_tools/assets/cli/pdf/cli_test_01.pdf', - '-j', - 'tests/unittest/test_tools/assets/cli_dev/cli_test_01.model.json', - '-o', - temp_output_dir, - ], - ) - - # check - assert result.exit_code == 0 - - base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto') - - r = os.stat(os.path.join(base_output_dir, f'{filename}_content_list.json')) - assert r.st_size > 5000 - r = os.stat(os.path.join(base_output_dir, f'{filename}.md')) - assert r.st_size > 7000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json')) - assert r.st_size > 200000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json')) - assert r.st_size > 15000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf')) - assert r.st_size > 400000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf')) - assert r.st_size > 400000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf')) - assert r.st_size > 400000 - - assert os.path.exists(os.path.join(base_output_dir, 'images')) is True - assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True - - # teardown - shutil.rmtree(temp_output_dir) - - -def test_cli_jsonl(): - # setup - unitest_dir = '/tmp/magic_pdf/unittest/tools' - filename = 'cli_test_01' - os.makedirs(unitest_dir, exist_ok=True) - temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools') - - def mock_read_s3_path(s3path): - with open(s3path, 'rb') as f: - return f.read() - - cli_dev.read_s3_path = mock_read_s3_path # mock - - # run - runner = CliRunner() - result = runner.invoke( - cli_dev.cli, - [ - 'jsonl', - '-j', - 'tests/unittest/test_tools/assets/cli_dev/cli_test_01.jsonl', - '-o', - temp_output_dir, - ], - ) - - # check - assert result.exit_code == 0 - - base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto') - - r = os.stat(os.path.join(base_output_dir, f'{filename}_content_list.json')) - assert r.st_size > 5000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}.md')) - assert r.st_size > 7000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json')) - assert r.st_size > 200000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json')) - assert r.st_size > 15000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf')) - assert r.st_size > 400000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf')) - assert r.st_size > 400000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf')) - assert r.st_size > 400000 - - assert os.path.exists(os.path.join(base_output_dir, 'images')) is True - assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True - - # teardown - shutil.rmtree(temp_output_dir) diff --git a/tests/unittest/test_tools/test_common.py b/tests/unittest/test_tools/test_common.py deleted file mode 100644 index a6922540..00000000 --- a/tests/unittest/test_tools/test_common.py +++ /dev/null @@ -1,59 +0,0 @@ -import os -import shutil -import tempfile - -import pytest - -from magic_pdf.tools.common import do_parse - - -@pytest.mark.parametrize('method', ['auto', 'txt', 'ocr']) -def test_common_do_parse(method): - import magic_pdf.model as model_config - model_config.__use_inside_model__ = True - # setup - unitest_dir = '/tmp/magic_pdf/unittest/tools' - filename = 'fake' - os.makedirs(unitest_dir, exist_ok=True) - - temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools') - - # run - with open('tests/unittest/test_tools/assets/common/cli_test_01.pdf', 'rb') as f: - bits = f.read() - do_parse(temp_output_dir, - filename, - bits, [], - method, - False, - f_dump_content_list=True) - - # check - base_output_dir = os.path.join(temp_output_dir, f'fake/{method}') - - r = os.stat(os.path.join(base_output_dir, f'{filename}_content_list.json')) - assert r.st_size > 5000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}.md')) - assert r.st_size > 7000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json')) - assert r.st_size > 200000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json')) - assert r.st_size > 15000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf')) - assert r.st_size > 400000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf')) - assert r.st_size > 400000 - - r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf')) - assert r.st_size > 400000 - - os.path.exists(os.path.join(base_output_dir, 'images')) - os.path.isdir(os.path.join(base_output_dir, 'images')) - - # teardown - shutil.rmtree(temp_output_dir) diff --git a/tests/unittest/test_unit.py b/tests/unittest/test_unit.py deleted file mode 100644 index 37e6af5c..00000000 --- a/tests/unittest/test_unit.py +++ /dev/null @@ -1,542 +0,0 @@ -import os - -import pytest - -from magic_pdf.libs.boxbase import (__is_overlaps_y_exceeds_threshold, - _is_bottom_full_overlap, _is_in, - _is_in_or_part_overlap, - _is_in_or_part_overlap_with_area_ratio, - _is_left_overlap, _is_part_overlap, - _is_vertical_full_overlap, _left_intersect, - _right_intersect, bbox_distance, - bbox_relative_pos, calculate_iou, - calculate_overlap_area_2_minbox_area_ratio, - calculate_overlap_area_in_bbox1_area_ratio, - find_bottom_nearest_text_bbox, - find_left_nearest_text_bbox, - find_right_nearest_text_bbox, - find_top_nearest_text_bbox, - get_bbox_in_boundary, - get_minbox_if_overlap_by_ratio) -from magic_pdf.libs.commons import get_top_percent_list, join_path, mymax -from magic_pdf.libs.config_reader import get_s3_config -from magic_pdf.libs.path_utils import parse_s3path - - -# 输入一个列表,如果列表空返回0,否则返回最大元素 -@pytest.mark.parametrize('list_input, target_num', - [ - ([0, 0, 0, 0], 0), - ([0], 0), - ([1, 2, 5, 8, 4], 8), - ([], 0), - ([1.1, 7.6, 1.009, 9.9], 9.9), - ([1.0 * 10 ** 2, 3.5 * 10 ** 3, 0.9 * 10 ** 6], 0.9 * 10 ** 6), - ]) -def test_list_max(list_input: list, target_num) -> None: - """ - list_input: 输入列表元素,元素均为数字类型 - """ - assert target_num == mymax(list_input) - - -# 连接多个参数生成路径信息,使用"/"作为连接符,生成的结果需要是一个合法路径 -@pytest.mark.parametrize('path_input, target_path', [ - (['https:', '', 'www.baidu.com'], 'https://www.baidu.com'), - (['https:', 'www.baidu.com'], 'https:/www.baidu.com'), - (['D:', 'file', 'pythonProject', 'demo' + '.py'], 'D:/file/pythonProject/demo.py'), -]) -def test_join_path(path_input: list, target_path: str) -> None: - """ - path_input: 输入path的列表,列表元素均为字符串 - """ - assert target_path == join_path(*path_input) - - -# 获取列表中前百分之多少的元素 -@pytest.mark.parametrize('num_list, percent, target_num_list', [ - ([], 0.75, []), - ([-5, -10, 9, 3, 7, -7, 0, 23, -1, -11], 0.8, [23, 9, 7, 3, 0, -1, -5, -7]), - ([-5, -10, 9, 3, 7, -7, 0, 23, -1, -11], 0, []), - ([-5, -10, 9, 3, 7, -7, 0, 23, -1, -11, 28], 0.8, [28, 23, 9, 7, 3, 0, -1, -5]) -]) -def test_get_top_percent_list(num_list: list, percent: float, target_num_list: list) -> None: - """ - num_list: 数字列表,列表元素为数字 - percent: 占比,float, 向下取证 - """ - assert target_num_list == get_top_percent_list(num_list, percent) - - -# 输入一个s3路径,返回bucket名字和其余部分(key) -@pytest.mark.parametrize('s3_path, target_data', [ - ('s3://bucket/path/to/my/file.txt', 'bucket'), - ('s3a://bucket1/path/to/my/file2.txt', 'bucket1'), - # ("/path/to/my/file1.txt", "path"), - # ("bucket/path/to/my/file2.txt", "bucket"), -]) -def test_parse_s3path(s3_path: str, target_data: str): - """ - s3_path: s3路径 - 如果为无效路径,则返回对应的bucket名字和其余部分 - 如果为异常路径 例如:file2.txt,则报异常 - """ - bucket_name, key = parse_s3path(s3_path) - assert target_data == bucket_name - - -# 2个box是否处于包含或者部分重合关系。 -# 如果某边界重合算重合。 -# 部分边界重合,其他在内部也算包含 -@pytest.mark.parametrize('box1, box2, target_bool', [ - ((120, 133, 223, 248), (128, 168, 269, 295), True), - ((137, 53, 245, 157), (134, 11, 200, 147), True), # 部分重合 - ((137, 56, 211, 116), (140, 66, 202, 199), True), # 部分重合 - ((42, 34, 69, 65), (42, 34, 69, 65), True), # 部分重合 - ((39, 63, 87, 106), (37, 66, 85, 109), True), # 部分重合 - ((13, 37, 55, 66), (7, 46, 49, 75), True), # 部分重合 - ((56, 83, 85, 104), (64, 85, 93, 106), True), # 部分重合 - ((12, 53, 48, 94), (14, 53, 50, 94), True), # 部分重合 - ((43, 54, 93, 131), (55, 82, 77, 106), True), # 包含 - ((63, 2, 134, 71), (72, 43, 104, 78), True), # 包含 - ((25, 57, 109, 127), (26, 73, 49, 95), True), # 包含 - ((24, 47, 111, 115), (34, 81, 58, 106), True), # 包含 - ((34, 8, 105, 83), (76, 20, 116, 45), True), # 包含 -]) -def test_is_in_or_part_overlap(box1: tuple, box2: tuple, target_bool: bool) -> None: - """ - box1: 坐标数组 - box2: 坐标数组 - """ - assert target_bool == _is_in_or_part_overlap(box1, box2) - - -# 如果box1在box2内部,返回True -# 如果是部分重合的,则重合面积占box1的比例大于阈值时候返回True -@pytest.mark.parametrize('box1, box2, target_bool', [ - ((35, 28, 108, 90), (47, 60, 83, 96), False), # 包含 box1 up box2, box2 多半,box1少半 - ((65, 151, 92, 177), (49, 99, 105, 198), True), # 包含 box1 in box2 - ((80, 62, 112, 84), (74, 40, 144, 111), True), # 包含 box1 in box2 - ((65, 88, 127, 144), (92, 102, 131, 139), False), # 包含 box2 多半,box1约一半 - ((92, 102, 131, 139), (65, 88, 127, 144), True), # 包含 box1 多半 - ((100, 93, 199, 168), (169, 126, 198, 165), False), # 包含 box2 in box1 - ((26, 75, 106, 172), (65, 108, 90, 128), False), # 包含 box2 in box1 - ((28, 90, 77, 126), (35, 84, 84, 120), True), # 相交 box1多半,box2多半 - ((37, 6, 69, 52), (28, 3, 60, 49), True), # 相交 box1多半,box2多半 - ((94, 29, 133, 60), (84, 30, 123, 61), True), # 相交 box1多半,box2多半 -]) -def test_is_in_or_part_overlap_with_area_ratio(box1: tuple, box2: tuple, target_bool: bool) -> None: - out_bool = _is_in_or_part_overlap_with_area_ratio(box1, box2) - assert target_bool == out_bool - - -# box1在box2内部或者box2在box1内部返回True。如果部分边界重合也算作包含。 -@pytest.mark.parametrize('box1, box2, target_bool', [ - # ((), (), "Error"), # Error - ((65, 151, 92, 177), (49, 99, 105, 198), True), # 包含 box1 in box2 - ((80, 62, 112, 84), (74, 40, 144, 111), True), # 包含 box1 in box2 - ((76, 140, 154, 277), (121, 326, 192, 384), False), # 分离 - ((65, 88, 127, 144), (92, 102, 131, 139), False), # 包含 box2 多半,box1约一半 - ((92, 102, 131, 139), (65, 88, 127, 144), False), # 包含 box1 多半 - ((68, 94, 118, 120), (68, 90, 118, 122), True), # 包含,box1 in box2 两边x相切 - ((69, 94, 118, 120), (68, 90, 118, 122), True), # 包含,box1 in box2 一边x相切 - ((69, 114, 118, 122), (68, 90, 118, 122), True), # 包含,box1 in box2 一边y相切 - # ((100, 93, 199, 168), (169, 126, 198, 165), True), # 包含 box2 in box1 Error - # ((26, 75, 106, 172), (65, 108, 90, 128), True), # 包含 box2 in box1 Error - # ((38, 94, 122, 120), (68, 94, 118, 120), True), # 包含,box2 in box1 两边y相切 Error - # ((68, 34, 118, 158), (68, 94, 118, 120), True), # 包含,box2 in box1 两边x相切 Error - # ((68, 34, 118, 158), (68, 94, 84, 120), True), # 包含,box2 in box1 一边x相切 Error - # ((27, 94, 118, 158), (68, 94, 84, 120), True), # 包含,box2 in box1 一边y相切 Error -]) -def test_is_in(box1: tuple, box2: tuple, target_bool: bool) -> None: - assert target_bool == _is_in(box1, box2) - - -# 仅仅是部分包含关系,返回True,如果是完全包含关系则返回False -@pytest.mark.parametrize('box1, box2, target_bool', [ - ((65, 151, 92, 177), (49, 99, 105, 198), False), # 包含 box1 in box2 - ((80, 62, 112, 84), (74, 40, 144, 111), False), # 包含 box1 in box2 - # ((76, 140, 154, 277), (121, 326, 192, 384), False), # 分离 Error - ((76, 140, 154, 277), (121, 277, 192, 384), True), # 外相切 - ((65, 88, 127, 144), (92, 102, 131, 139), True), # 包含 box2 多半,box1约一半 - ((92, 102, 131, 139), (65, 88, 127, 144), True), # 包含 box1 多半 - ((68, 94, 118, 120), (68, 90, 118, 122), False), # 包含,box1 in box2 两边x相切 - ((69, 94, 118, 120), (68, 90, 118, 122), False), # 包含,box1 in box2 一边x相切 - ((69, 114, 118, 122), (68, 90, 118, 122), False), # 包含,box1 in box2 一边y相切 - # ((26, 75, 106, 172), (65, 108, 90, 128), False), # 包含 box2 in box1 Error - # ((38, 94, 122, 120), (68, 94, 118, 120), False), # 包含,box2 in box1 两边y相切 Error - # ((68, 34, 118, 158), (68, 94, 84, 120), False), # 包含,box2 in box1 一边x相切 Error - -]) -def test_is_part_overlap(box1: tuple, box2: tuple, target_bool: bool) -> None: - assert target_bool == _is_part_overlap(box1, box2) - - -# left_box右侧是否和right_box左侧有部分重叠 -@pytest.mark.parametrize('box1, box2, target_bool', [ - (None, None, False), - ((88, 81, 222, 173), (60, 221, 123, 358), False), # 分离 - ((121, 149, 184, 289), (172, 130, 230, 268), True), # box1 left bottom box2 相交 - ((172, 130, 230, 268), (121, 149, 184, 289), False), # box2 left bottom box1 相交 - ((109, 68, 182, 146), (215, 188, 277, 253), False), # box1 top left box2 分离 - ((117, 53, 222, 176), (174, 142, 298, 276), True), # box1 left top box2 相交 - ((174, 142, 298, 276), (117, 53, 222, 176), False), # box2 left top box1 相交 - ((65, 88, 127, 144), (92, 102, 131, 139), True), # box1 left box2 y:box2 in box1 - ((92, 102, 131, 139), (65, 88, 127, 144), False), # box2 left box1 y:box1 in box2 - ((182, 130, 230, 268), (121, 149, 174, 289), False), # box2 left box1 分离 - ((1, 10, 26, 45), (3, 4, 20, 39), True), # box1 bottom box2 x:box2 in box1 -]) -def test_left_intersect(box1: tuple, box2: tuple, target_bool: bool) -> None: - assert target_bool == _left_intersect(box1, box2) - - -# left_box左侧是否和right_box右侧部分重叠 -@pytest.mark.parametrize('box1, box2, target_bool', [ - (None, None, False), - ((88, 81, 222, 173), (60, 221, 123, 358), False), # 分离 - ((121, 149, 184, 289), (172, 130, 230, 268), False), # box1 left bottom box2 相交 - ((172, 130, 230, 268), (121, 149, 184, 289), True), # box2 left bottom box1 相交 - ((109, 68, 182, 146), (215, 188, 277, 253), False), # box1 top left box2 分离 - ((117, 53, 222, 176), (174, 142, 298, 276), False), # box1 left top box2 相交 - ((174, 142, 298, 276), (117, 53, 222, 176), True), # box2 left top box1 相交 - ((65, 88, 127, 144), (92, 102, 131, 139), False), # box1 left box2 y:box2 in box1 - # ((92, 102, 131, 139), (65, 88, 127, 144), True), # box2 left box1 y:box1 in box2 Error - ((182, 130, 230, 268), (121, 149, 174, 289), False), # box2 left box1 分离 - # ((1, 10, 26, 45), (3, 4, 20, 39), False), # box1 bottom box2 x:box2 in box1 Error -]) -def test_right_intersect(box1: tuple, box2: tuple, target_bool: bool) -> None: - assert target_bool == _right_intersect(box1, box2) - - -# x方向上:要么box1包含box2, 要么box2包含box1。不能部分包含 -# y方向上:box1和box2有重叠 -@pytest.mark.parametrize('box1, box2, target_bool', [ - # (None, None, False), # Error - ((35, 28, 108, 90), (47, 60, 83, 96), True), # box1 top box2, x:box2 in box1, y:有重叠 - ((35, 28, 98, 90), (27, 60, 103, 96), True), # box1 top box2, x:box1 in box2, y:有重叠 - ((57, 77, 130, 210), (59, 219, 119, 293), False), # box1 top box2, x: box2 in box1, y:无重叠 - ((47, 60, 83, 96), (35, 28, 108, 90), True), # box2 top box1, x:box1 in box2, y:有重叠 - ((27, 60, 103, 96), (35, 28, 98, 90), True), # box2 top box1, x:box2 in box1, y:有重叠 - ((59, 219, 119, 293), (57, 77, 130, 210), False), # box2 top box1, x: box1 in box2, y:无重叠 - ((35, 28, 55, 90), (57, 60, 83, 96), False), # box1 top box2, x:无重叠, y:有重叠 - ((47, 60, 63, 96), (65, 28, 108, 90), False), # box2 top box1, x:无重叠, y:有重叠 -]) -def test_is_vertical_full_overlap(box1: tuple, box2: tuple, target_bool: bool) -> None: - assert target_bool == _is_vertical_full_overlap(box1, box2) - - -# 检查box1下方和box2的上方有轻微的重叠,轻微程度收到y_tolerance的限制 -@pytest.mark.parametrize('box1, box2, target_bool', [ - (None, None, False), - ((35, 28, 108, 90), (47, 89, 83, 116), True), # box1 top box2, y:有重叠 - ((35, 28, 108, 90), (47, 60, 83, 96), False), # box1 top box2, y:有重叠且过多 - ((57, 77, 130, 210), (59, 219, 119, 293), False), # box1 top box2, y:无重叠 - ((47, 60, 83, 96), (35, 28, 108, 90), False), # box2 top box1, y:有重叠且过多 - ((27, 89, 103, 116), (35, 28, 98, 90), False), # box2 top box1, y:有重叠 - ((59, 219, 119, 293), (57, 77, 130, 210), False), # box2 top box1, y:无重叠 -]) -def test_is_bottom_full_overlap(box1: tuple, box2: tuple, target_bool: bool) -> None: - assert target_bool == _is_bottom_full_overlap(box1, box2) - - -# 检查box1的左侧是否和box2有重叠 -@pytest.mark.parametrize('box1, box2, target_bool', [ - (None, None, False), - ((88, 81, 222, 173), (60, 221, 123, 358), False), # 分离 - # ((121, 149, 184, 289), (172, 130, 230, 268), False), # box1 left bottom box2 相交 Error - # ((172, 130, 230, 268), (121, 149, 184, 289), True), # box2 left bottom box1 相交 Error - ((109, 68, 182, 146), (215, 188, 277, 253), False), # box1 top left box2 分离 - ((117, 53, 222, 176), (174, 142, 298, 276), False), # box1 left top box2 相交 - # ((174, 142, 298, 276), (117, 53, 222, 176), True), # box2 left top box1 相交 Error - # ((65, 88, 127, 144), (92, 102, 131, 139), False), # box1 left box2 y:box2 in box1 Error - ((1, 10, 26, 45), (3, 4, 20, 39), True), # box1 middle bottom box2 x:box2 in box1 - -]) -def test_is_left_overlap(box1: tuple, box2: tuple, target_bool: bool) -> None: - assert target_bool == _is_left_overlap(box1, box2) - - -# 查两个bbox在y轴上是否有重叠,并且该重叠区域的高度占两个bbox高度更低的那个超过阈值 -@pytest.mark.parametrize('box1, box2, target_bool', [ - # (None, None, "Error"), # Error - ((51, 69, 192, 147), (75, 48, 132, 187), True), # y: box1 in box2 - ((51, 39, 192, 197), (75, 48, 132, 187), True), # y: box2 in box1 - ((88, 81, 222, 173), (60, 221, 123, 358), False), # y: box1 top box2 - ((109, 68, 182, 196), (215, 188, 277, 253), False), # y: box1 top box2 little - ((109, 68, 182, 196), (215, 78, 277, 253), True), # y: box1 top box2 more - ((109, 68, 182, 196), (215, 138, 277, 213), False), # y: box1 top box2 more but lower overlap_ratio_threshold - ((109, 68, 182, 196), (215, 138, 277, 203), True), # y: box1 top box2 more and more overlap_ratio_threshold -]) -def test_is_overlaps_y_exceeds_threshold(box1: tuple, box2: tuple, target_bool: bool) -> None: - assert target_bool == __is_overlaps_y_exceeds_threshold(box1, box2) - - -# Determine the coordinates of the intersection rectangle -@pytest.mark.parametrize('box1, box2, target_num', [ - # (None, None, "Error"), # Error - ((88, 81, 222, 173), (60, 221, 123, 358), 0.0), # 分离 - ((76, 140, 154, 277), (121, 326, 192, 384), 0.0), # 分离 - ((142, 109, 238, 164), (134, 211, 224, 270), 0.0), # 分离 - ((109, 68, 182, 196), (175, 138, 277, 213), 0.024475524475524476), # 相交 - ((56, 90, 170, 219), (103, 212, 171, 304), 0.02288586346557361), # 相交 - ((109, 126, 204, 245), (130, 127, 232, 186), 0.33696071621517326), # 相交 - ((109, 126, 204, 245), (110, 127, 232, 206), 0.5493822593770807), # 相交 - ((76, 140, 154, 277), (121, 277, 192, 384), 0.0) # 相切 -]) -def test_calculate_iou(box1: tuple, box2: tuple, target_num: float) -> None: - assert target_num == calculate_iou(box1, box2) - - -# 计算box1和box2的重叠面积占最小面积的box的比例 -@pytest.mark.parametrize('box1, box2, target_num', [ - # (None, None, "Error"), # Error - ((142, 109, 238, 164), (134, 211, 224, 270), 0.0), # 分离 - ((88, 81, 222, 173), (60, 221, 123, 358), 0.0), # 分离 - ((76, 140, 154, 277), (121, 326, 192, 384), 0.0), # 分离 - ((76, 140, 154, 277), (121, 277, 192, 384), 0.0), # 相切 - ((109, 126, 204, 245), (110, 127, 232, 206), 0.7704918032786885), # 相交 - ((56, 90, 170, 219), (103, 212, 171, 304), 0.07496803069053709), # 相交 - ((121, 149, 184, 289), (172, 130, 230, 268), 0.17841079460269865), # 相交 - ((51, 69, 192, 147), (75, 48, 132, 187), 0.5611510791366906), # 相交 - ((117, 53, 222, 176), (174, 142, 298, 276), 0.12636469221835075), # 相交 - ((102, 60, 233, 203), (70, 190, 220, 319), 0.08188757807078417), # 相交 - ((109, 126, 204, 245), (130, 127, 232, 186), 0.7254901960784313), # 相交 -]) -def test_calculate_overlap_area_2_minbox_area_ratio(box1: tuple, box2: tuple, target_num: float) -> None: - assert target_num == calculate_overlap_area_2_minbox_area_ratio(box1, box2) - - -# 计算box1和box2的重叠面积占bbox1的比例 -@pytest.mark.parametrize('box1, box2, target_num', [ - # (None, None, "Error"), # Error - ((142, 109, 238, 164), (134, 211, 224, 270), 0.0), # 分离 - ((88, 81, 222, 173), (60, 221, 123, 358), 0.0), # 分离 - ((76, 140, 154, 277), (121, 326, 192, 384), 0.0), # 分离 - ((76, 140, 154, 277), (121, 277, 192, 384), 0.0), # 相切 - ((142, 109, 238, 164), (134, 164, 224, 270), 0.0), # 相切 - ((109, 126, 204, 245), (110, 127, 232, 206), 0.6568774878372402), # 相交 - ((56, 90, 170, 219), (103, 212, 171, 304), 0.03189174486604107), # 相交 - ((121, 149, 184, 289), (172, 130, 230, 268), 0.1619047619047619), # 相交 - ((51, 69, 192, 147), (75, 48, 132, 187), 0.40425531914893614), # 相交 - ((117, 53, 222, 176), (174, 142, 298, 276), 0.12636469221835075), # 相交 - ((102, 60, 233, 203), (70, 190, 220, 319), 0.08188757807078417), # 相交 - ((109, 126, 204, 245), (130, 127, 232, 186), 0.38620079610791685), # 相交 -]) -def test_calculate_overlap_area_in_bbox1_area_ratio(box1: tuple, box2: tuple, target_num: float) -> None: - assert target_num == calculate_overlap_area_in_bbox1_area_ratio(box1, box2) - - -# 计算两个bbox重叠的面积占最小面积的box的比例,如果比例大于ratio,则返回小的那个bbox,否则返回None -@pytest.mark.parametrize('box1, box2, ratio, target_box', [ - # (None, None, 0.8, "Error"), # Error - ((142, 109, 238, 164), (134, 211, 224, 270), 0.0, None), # 分离 - ((109, 126, 204, 245), (110, 127, 232, 206), 0.5, (110, 127, 232, 206)), - ((56, 90, 170, 219), (103, 212, 171, 304), 0.5, None), - ((121, 149, 184, 289), (172, 130, 230, 268), 0.5, None), - ((51, 69, 192, 147), (75, 48, 132, 187), 0.5, (75, 48, 132, 187)), - ((117, 53, 222, 176), (174, 142, 298, 276), 0.5, None), - ((102, 60, 233, 203), (70, 190, 220, 319), 0.5, None), - ((109, 126, 204, 245), (130, 127, 232, 186), 0.5, (130, 127, 232, 186)), -]) -def test_get_minbox_if_overlap_by_ratio(box1: tuple, box2: tuple, ratio: float, target_box: list) -> None: - assert target_box == get_minbox_if_overlap_by_ratio(box1, box2, ratio) - - -# 根据boundry获取在这个范围内的所有的box的列表,完全包含关系 -@pytest.mark.parametrize('boxes, boundary, target_boxs', [ - # ([], (), "Error"), # Error - ([], (110, 340, 209, 387), []), - ([(142, 109, 238, 164)], (134, 211, 224, 270), []), # 分离 - ([(109, 126, 204, 245), (110, 127, 232, 206)], (105, 116, 258, 300), [(109, 126, 204, 245), (110, 127, 232, 206)]), - ([(109, 126, 204, 245), (110, 127, 232, 206)], (105, 116, 258, 230), [(110, 127, 232, 206)]), - ([(81, 280, 123, 315), (282, 203, 342, 247), (183, 100, 300, 155), (46, 99, 133, 148), (33, 156, 97, 211), - (137, 29, 287, 87)], (80, 90, 249, 200), []), - ([(81, 280, 123, 315), (282, 203, 342, 247), (183, 100, 300, 155), (46, 99, 133, 148), (33, 156, 97, 211), - (137, 29, 287, 87)], (30, 20, 349, 320), - [(81, 280, 123, 315), (282, 203, 342, 247), (183, 100, 300, 155), (46, 99, 133, 148), (33, 156, 97, 211), - (137, 29, 287, 87)]), - ([(81, 280, 123, 315), (282, 203, 342, 247), (183, 100, 300, 155), (46, 99, 133, 148), (33, 156, 97, 211), - (137, 29, 287, 87)], (30, 20, 200, 320), - [(81, 280, 123, 315), (46, 99, 133, 148), (33, 156, 97, 211)]), -]) -def test_get_bbox_in_boundary(boxes: list, boundary: tuple, target_boxs: list) -> None: - assert target_boxs == get_bbox_in_boundary(boxes, boundary) - - -# 寻找上方距离最近的box,margin 4个单位, x方向有重合,y方向最近的 -@pytest.mark.parametrize('pymu_blocks, obj_box, target_boxs', [ - ([{'bbox': (81, 280, 123, 315)}, {'bbox': (282, 203, 342, 247)}, {'bbox': (183, 100, 300, 155)}, - {'bbox': (46, 99, 133, 148)}, {'bbox': (33, 156, 97, 211)}, - {'bbox': (137, 29, 287, 87)}], (81, 280, 123, 315), {'bbox': (33, 156, 97, 211)}), - # ([{"bbox": (168, 120, 263, 159)}, - # {"bbox": (231, 61, 279, 159)}, - # {"bbox": (35, 85, 136, 110)}, - # {"bbox": (228, 193, 347, 225)}, - # {"bbox": (144, 264, 188, 323)}, - # {"bbox": (62, 37, 126, 64)}], (228, 193, 347, 225), - # [{"bbox": (168, 120, 263, 159)}, {"bbox": (231, 61, 279, 159)}]), # y:方向最近的有两个,x: 两个均有重合 Error - ([{'bbox': (35, 85, 136, 159)}, - {'bbox': (168, 120, 263, 159)}, - {'bbox': (231, 61, 279, 118)}, - {'bbox': (228, 193, 347, 225)}, - {'bbox': (144, 264, 188, 323)}, - {'bbox': (62, 37, 126, 64)}], (228, 193, 347, 225), - {'bbox': (168, 120, 263, 159)},), # y:方向最近的有两个,x:只有一个有重合 - ([{'bbox': (239, 115, 379, 167)}, - {'bbox': (33, 237, 104, 262)}, - {'bbox': (124, 288, 168, 325)}, - {'bbox': (242, 291, 379, 340)}, - {'bbox': (55, 117, 121, 154)}, - {'bbox': (266, 183, 384, 217)}, ], (124, 288, 168, 325), {'bbox': (55, 117, 121, 154)}), - ([{'bbox': (239, 115, 379, 167)}, - {'bbox': (33, 237, 104, 262)}, - {'bbox': (124, 288, 168, 325)}, - {'bbox': (242, 291, 379, 340)}, - {'bbox': (55, 117, 119, 154)}, - {'bbox': (266, 183, 384, 217)}, ], (124, 288, 168, 325), None), # x没有重合 - ([{'bbox': (80, 90, 249, 200)}, - {'bbox': (183, 100, 240, 155)}, ], (183, 100, 240, 155), None), # 包含 -]) -def test_find_top_nearest_text_bbox(pymu_blocks: list, obj_box: tuple, target_boxs: dict) -> None: - assert target_boxs == find_top_nearest_text_bbox(pymu_blocks, obj_box) - - -# 寻找下方距离自己最近的box, x方向有重合,y方向最近的 -@pytest.mark.parametrize('pymu_blocks, obj_box, target_boxs', [ - ([{'bbox': (165, 96, 300, 114)}, - {'bbox': (11, 157, 139, 201)}, - {'bbox': (124, 208, 265, 262)}, - {'bbox': (124, 283, 248, 306)}, - {'bbox': (39, 267, 84, 301)}, - {'bbox': (36, 89, 114, 145)}, ], (165, 96, 300, 114), {'bbox': (124, 208, 265, 262)}), - ([{'bbox': (187, 37, 303, 49)}, - {'bbox': (2, 227, 90, 283)}, - {'bbox': (158, 174, 200, 212)}, - {'bbox': (259, 174, 324, 228)}, - {'bbox': (205, 61, 316, 97)}, - {'bbox': (295, 248, 374, 287)}, ], (205, 61, 316, 97), {'bbox': (259, 174, 324, 228)}), # y有两个最近的, x只有一个重合 - # ([{"bbox": (187, 37, 303, 49)}, - # {"bbox": (2, 227, 90, 283)}, - # {"bbox": (259, 174, 324, 228)}, - # {"bbox": (205, 61, 316, 97)}, - # {"bbox": (295, 248, 374, 287)}, - # {"bbox": (158, 174, 209, 212)}, ], (205, 61, 316, 97), - # [{"bbox": (259, 174, 324, 228)}, {"bbox": (158, 174, 209, 212)}]), # x有重合,y有两个最近的 Error - ([{'bbox': (287, 132, 398, 191)}, - {'bbox': (44, 141, 163, 188)}, - {'bbox': (132, 191, 240, 241)}, - {'bbox': (81, 25, 142, 67)}, - {'bbox': (74, 297, 116, 314)}, - {'bbox': (77, 84, 224, 107)}, ], (287, 132, 398, 191), None), # x没有重合 - ([{'bbox': (80, 90, 249, 200)}, - {'bbox': (183, 100, 240, 155)}, ], (183, 100, 240, 155), None), # 包含 -]) -def test_find_bottom_nearest_text_bbox(pymu_blocks: list, obj_box: tuple, target_boxs: dict) -> None: - assert target_boxs == find_bottom_nearest_text_bbox(pymu_blocks, obj_box) - - -# 寻找左侧距离自己最近的box, y方向有重叠,x方向最近 -@pytest.mark.parametrize('pymu_blocks, obj_box, target_boxs', [ - ([{'bbox': (80, 90, 249, 200)}, {'bbox': (183, 100, 240, 155)}], (183, 100, 240, 155), None), # 包含 - ([{'bbox': (28, 90, 77, 126)}, {'bbox': (35, 84, 84, 120)}], (35, 84, 84, 120), None), # y:重叠,x:重叠大于2 - ([{'bbox': (28, 90, 77, 126)}, {'bbox': (75, 84, 134, 120)}], (75, 84, 134, 120), {'bbox': (28, 90, 77, 126)}), - # y:重叠,x:重叠小于等于2 - ([{'bbox': (239, 115, 379, 167)}, - {'bbox': (33, 237, 104, 262)}, - {'bbox': (124, 288, 168, 325)}, - {'bbox': (242, 291, 379, 340)}, - {'bbox': (55, 113, 161, 154)}, - {'bbox': (266, 123, 384, 217)}], (266, 123, 384, 217), {'bbox': (55, 113, 161, 154)}), # y重叠,x left - # ([{"bbox": (136, 219, 268, 240)}, - # {"bbox": (169, 115, 268, 181)}, - # {"bbox": (33, 237, 104, 262)}, - # {"bbox": (124, 288, 168, 325)}, - # {"bbox": (55, 117, 161, 154)}, - # {"bbox": (266, 183, 384, 217)}], (266, 183, 384, 217), - # [{"bbox": (136, 219, 267, 240)}, {"bbox": (169, 115, 267, 181)}]), # y有重叠,x重叠小于2或者在left Error -]) -def test_find_left_nearest_text_bbox(pymu_blocks: list, obj_box: tuple, target_boxs: dict) -> None: - assert target_boxs == find_left_nearest_text_bbox(pymu_blocks, obj_box) - - -# 寻找右侧距离自己最近的box, y方向有重叠,x方向最近 -@pytest.mark.parametrize('pymu_blocks, obj_box, target_boxs', [ - ([{'bbox': (80, 90, 249, 200)}, {'bbox': (183, 100, 240, 155)}], (183, 100, 240, 155), None), # 包含 - ([{'bbox': (28, 90, 77, 126)}, {'bbox': (35, 84, 84, 120)}], (28, 90, 77, 126), None), # y:重叠,x:重叠大于2 - ([{'bbox': (28, 90, 77, 126)}, {'bbox': (75, 84, 134, 120)}], (28, 90, 77, 126), {'bbox': (75, 84, 134, 120)}), - # y:重叠,x:重叠小于等于2 - ([{'bbox': (239, 115, 379, 167)}, - {'bbox': (33, 237, 104, 262)}, - {'bbox': (124, 288, 168, 325)}, - {'bbox': (242, 291, 379, 340)}, - {'bbox': (55, 113, 161, 154)}, - {'bbox': (266, 123, 384, 217)}], (55, 113, 161, 154), {'bbox': (239, 115, 379, 167)}), # y重叠,x right - # ([{"bbox": (169, 115, 298, 181)}, - # {"bbox": (169, 219, 268, 240)}, - # {"bbox": (33, 177, 104, 262)}, - # {"bbox": (124, 288, 168, 325)}, - # {"bbox": (55, 117, 161, 154)}, - # {"bbox": (266, 183, 384, 217)}], (33, 177, 104, 262), - # [{"bbox": (169, 115, 298, 181)}, {"bbox": (169, 219, 268, 240)}]), # y有重叠,x重叠小于2或者在right Error -]) -def test_find_right_nearest_text_bbox(pymu_blocks: list, obj_box: tuple, target_boxs: dict) -> None: - assert target_boxs == find_right_nearest_text_bbox(pymu_blocks, obj_box) - - -# 判断两个矩形框的相对位置关系 (left, right, bottom, top) -@pytest.mark.parametrize('box1, box2, target_box', [ - # (None, None, "Error"), # Error - ((80, 90, 249, 200), (183, 100, 240, 155), (False, False, False, False)), # 包含 - # ((124, 81, 222, 173), (60, 221, 123, 358), (False, True, False, True)), # 分离,右上 Error - ((142, 109, 238, 164), (134, 211, 224, 270), (False, False, False, True)), # 分离,上 - # ((51, 69, 192, 147), (205, 198, 282, 297), (True, False, False, True)), # 分离,左上 Error - # ((101, 149, 164, 289), (172, 130, 230, 268), (True, False, False, False)), # 分离,左 Error - # ((69, 196, 124, 285), (130, 127, 232, 186), (True, False, True, False)), # 分离,左下 Error - ((103, 212, 171, 304), (56, 90, 170, 209), (False, False, True, False)), # 分离,下 - # ((124, 367, 222, 415), (60, 221, 123, 358), (False, True, True, False)), # 分离,右下 Error - # ((172, 130, 230, 268), (101, 149, 164, 289), (False, True, False, False)), # 分离,右 Error -]) -def test_bbox_relative_pos(box1: tuple, box2: tuple, target_box: tuple) -> None: - assert target_box == bbox_relative_pos(box1, box2) - - -# 计算两个矩形框的距离 -""" -受bbox_relative_pos方法的影响,左右相反,这里计算结果全部受影响,在错误的基础上计算出了正确的结果 -""" - - -@pytest.mark.parametrize('box1, box2, target_num', [ - # (None, None, "Error"), # Error - ((80, 90, 249, 200), (183, 100, 240, 155), 0.0), # 包含 - ((142, 109, 238, 164), (134, 211, 224, 270), 47.0), # 分离,上 - ((103, 212, 171, 304), (56, 90, 170, 209), 3.0), # 分离,下 - ((101, 149, 164, 289), (172, 130, 230, 268), 8.0), # 分离,左 - ((172, 130, 230, 268), (101, 149, 164, 289), 8.0), # 分离,右 - ((80.3, 90.8, 249.0, 200.5), (183.8, 100.6, 240.2, 155.1), 0.0), # 包含 - ((142.3, 109.5, 238.9, 164.2), (134.4, 211.2, 224.8, 270.1), 47.0), # 分离,上 - ((103.5, 212.6, 171.1, 304.8), (56.1, 90.9, 170.6, 209.2), 3.4), # 分离,下 - ((101.1, 149.3, 164.9, 289.0), (172.1, 130.1, 230.5, 268.5), 7.2), # 分离,左 - ((172.1, 130.3, 230.1, 268.1), (101.2, 149.9, 164.3, 289.1), 7.8), # 分离,右 - ((124.3, 81.1, 222.5, 173.8), (60.3, 221.5, 123.0, 358.9), 47.717711596429254), # 分离,右上 - ((51.2, 69.31, 192.5, 147.9), (205.0, 198.1, 282.98, 297.09), 51.73287156151299), # 分离,左上 - ((124.3, 367.1, 222.9, 415.7), (60.9, 221.4, 123.2, 358.6), 8.570880934886448), # 分离,右下 - ((69.9, 196.2, 124.1, 285.7), (130.0, 127.3, 232.6, 186.1), 11.69700816448377), # 分离,左下 -]) -def test_bbox_distance(box1: tuple, box2: tuple, target_num: float) -> None: - assert target_num - bbox_distance(box1, box2) < 1 - - -@pytest.mark.skip(reason='skip') -# 根据bucket_name获取s3配置ak,sk,endpoint -def test_get_s3_config() -> None: - bucket_name = os.getenv('bucket_name') - target_data = os.getenv('target_data') - assert convert_string_to_list(target_data) == list(get_s3_config(bucket_name)) - - -def convert_string_to_list(s): - cleaned_s = s.strip("'") - items = cleaned_s.split(',') - cleaned_items = [item.strip() for item in items] - return cleaned_items |