From b7ef8f7e056cbf811571f2e770104d1eb645989d Mon Sep 17 00:00:00 2001
From: myhloli <moe@myhloli.com>
Date: Fri, 12 Dec 2025 17:37:55 +0800
Subject: [PATCH 1/6] fix: add logging for content list in test_e2e.py

---
 tests/unittest/test_e2e.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/unittest/test_e2e.py b/tests/unittest/test_e2e.py
index d50e69a2..19f3b436 100644
--- a/tests/unittest/test_e2e.py
+++ b/tests/unittest/test_e2e.py
@@ -241,6 +241,7 @@ def assert_content(content_path, parse_method="txt"):
     content_list = []
     with open(content_path, "r", encoding="utf-8") as file:
         content_list = json.load(file)
+        logger.info(content_list)
     type_set = set()
     for content_dict in content_list:
         match content_dict["type"]:

From 63e45db40e772cc855ff529642d38c1e6785304d Mon Sep 17 00:00:00 2001
From: myhloli <moe@myhloli.com>
Date: Fri, 12 Dec 2025 18:17:12 +0800
Subject: [PATCH 2/6] comment out test_vlm_transformers_with_default_config
 function in test_e2e.py

---
 tests/unittest/test_e2e.py | 136 ++++++++++++++++++-------------------
 1 file changed, 68 insertions(+), 68 deletions(-)

diff --git a/tests/unittest/test_e2e.py b/tests/unittest/test_e2e.py
index 19f3b436..85c408a1 100644
--- a/tests/unittest/test_e2e.py
+++ b/tests/unittest/test_e2e.py
@@ -96,74 +96,74 @@ def test_pipeline_with_two_config():
     assert_content(res_json_path, parse_method="ocr")
 
 
-def test_vlm_transformers_with_default_config():
-    __dir__ = os.path.dirname(os.path.abspath(__file__))
-    pdf_files_dir = os.path.join(__dir__, "pdfs")
-    output_dir = os.path.join(__dir__, "output")
-    pdf_suffixes = [".pdf"]
-    image_suffixes = [".png", ".jpeg", ".jpg"]
-
-    doc_path_list = []
-    for doc_path in Path(pdf_files_dir).glob("*"):
-        if doc_path.suffix in pdf_suffixes + image_suffixes:
-            doc_path_list.append(doc_path)
-
-    # os.environ["MINERU_MODEL_SOURCE"] = "modelscope"
-
-    pdf_file_names = []
-    pdf_bytes_list = []
-    p_lang_list = []
-    for path in doc_path_list:
-        file_name = str(Path(path).stem)
-        pdf_bytes = read_fn(path)
-        pdf_file_names.append(file_name)
-        pdf_bytes_list.append(pdf_bytes)
-        p_lang_list.append("en")
-
-    for idx, pdf_bytes in enumerate(pdf_bytes_list):
-        pdf_file_name = pdf_file_names[idx]
-        pdf_bytes = convert_pdf_bytes_to_bytes_by_pypdfium2(pdf_bytes)
-        local_image_dir, local_md_dir = prepare_env(
-            output_dir, pdf_file_name, parse_method="vlm"
-        )
-        image_writer, md_writer = FileBasedDataWriter(
-            local_image_dir
-        ), FileBasedDataWriter(local_md_dir)
-        middle_json, infer_result = vlm_doc_analyze(
-            pdf_bytes, image_writer=image_writer, backend="transformers"
-        )
-
-        pdf_info = middle_json["pdf_info"]
-
-        image_dir = str(os.path.basename(local_image_dir))
-
-        md_content_str = vlm_union_make(pdf_info, MakeMode.MM_MD, image_dir)
-        md_writer.write_string(
-            f"{pdf_file_name}.md",
-            md_content_str,
-        )
-
-        content_list = vlm_union_make(pdf_info, MakeMode.CONTENT_LIST, image_dir)
-        md_writer.write_string(
-            f"{pdf_file_name}_content_list.json",
-            json.dumps(content_list, ensure_ascii=False, indent=4),
-        )
-
-        md_writer.write_string(
-            f"{pdf_file_name}_middle.json",
-            json.dumps(middle_json, ensure_ascii=False, indent=4),
-        )
-
-        md_writer.write_string(
-            f"{pdf_file_name}_model.json",
-            json.dumps(infer_result, ensure_ascii=False, indent=4),
-        )
-
-        logger.info(f"local output dir is {local_md_dir}")
-        res_json_path = (
-            Path(__file__).parent / "output" / "test" / "vlm" / "test_content_list.json"
-        ).as_posix()
-        assert_content(res_json_path, parse_method="vlm")
+# def test_vlm_transformers_with_default_config():
+#     __dir__ = os.path.dirname(os.path.abspath(__file__))
+#     pdf_files_dir = os.path.join(__dir__, "pdfs")
+#     output_dir = os.path.join(__dir__, "output")
+#     pdf_suffixes = [".pdf"]
+#     image_suffixes = [".png", ".jpeg", ".jpg"]
+#
+#     doc_path_list = []
+#     for doc_path in Path(pdf_files_dir).glob("*"):
+#         if doc_path.suffix in pdf_suffixes + image_suffixes:
+#             doc_path_list.append(doc_path)
+#
+#     # os.environ["MINERU_MODEL_SOURCE"] = "modelscope"
+#
+#     pdf_file_names = []
+#     pdf_bytes_list = []
+#     p_lang_list = []
+#     for path in doc_path_list:
+#         file_name = str(Path(path).stem)
+#         pdf_bytes = read_fn(path)
+#         pdf_file_names.append(file_name)
+#         pdf_bytes_list.append(pdf_bytes)
+#         p_lang_list.append("en")
+#
+#     for idx, pdf_bytes in enumerate(pdf_bytes_list):
+#         pdf_file_name = pdf_file_names[idx]
+#         pdf_bytes = convert_pdf_bytes_to_bytes_by_pypdfium2(pdf_bytes)
+#         local_image_dir, local_md_dir = prepare_env(
+#             output_dir, pdf_file_name, parse_method="vlm"
+#         )
+#         image_writer, md_writer = FileBasedDataWriter(
+#             local_image_dir
+#         ), FileBasedDataWriter(local_md_dir)
+#         middle_json, infer_result = vlm_doc_analyze(
+#             pdf_bytes, image_writer=image_writer, backend="transformers"
+#         )
+#
+#         pdf_info = middle_json["pdf_info"]
+#
+#         image_dir = str(os.path.basename(local_image_dir))
+#
+#         md_content_str = vlm_union_make(pdf_info, MakeMode.MM_MD, image_dir)
+#         md_writer.write_string(
+#             f"{pdf_file_name}.md",
+#             md_content_str,
+#         )
+#
+#         content_list = vlm_union_make(pdf_info, MakeMode.CONTENT_LIST, image_dir)
+#         md_writer.write_string(
+#             f"{pdf_file_name}_content_list.json",
+#             json.dumps(content_list, ensure_ascii=False, indent=4),
+#         )
+#
+#         md_writer.write_string(
+#             f"{pdf_file_name}_middle.json",
+#             json.dumps(middle_json, ensure_ascii=False, indent=4),
+#         )
+#
+#         md_writer.write_string(
+#             f"{pdf_file_name}_model.json",
+#             json.dumps(infer_result, ensure_ascii=False, indent=4),
+#         )
+#
+#         logger.info(f"local output dir is {local_md_dir}")
+#         res_json_path = (
+#             Path(__file__).parent / "output" / "test" / "vlm" / "test_content_list.json"
+#         ).as_posix()
+#         assert_content(res_json_path, parse_method="vlm")
 
 
 def write_infer_result(

From a2a25200bc35f49d18b4279d79e96ac552468543 Mon Sep 17 00:00:00 2001
From: myhloli <moe@myhloli.com>
Date: Fri, 12 Dec 2025 18:18:42 +0800
Subject: [PATCH 3/6] fix: comment out notify_to_feishu step in cli.yml

---
 .github/workflows/cli.yml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/cli.yml b/.github/workflows/cli.yml
index 762e106c..996b2ba0 100644
--- a/.github/workflows/cli.yml
+++ b/.github/workflows/cli.yml
@@ -38,11 +38,11 @@ jobs:
           cd $GITHUB_WORKSPACE && coverage run
           cd $GITHUB_WORKSPACE && python tests/get_coverage.py
 
-  notify_to_feishu:
-    if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure')}}
-    needs: cli-test
-    runs-on: ubuntu-latest
-    steps:
-      - name: notify
-        run: |
-          curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"'${{ github.repository }}' GitHubAction Failed","content":[[{"tag":"text","text":""},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"}]]}}}}'  ${{ secrets.FEISHU_WEBHOOK_URL }}
+#  notify_to_feishu:
+#    if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure')}}
+#    needs: cli-test
+#    runs-on: ubuntu-latest
+#    steps:
+#      - name: notify
+#        run: |
+#          curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"'${{ github.repository }}' GitHubAction Failed","content":[[{"tag":"text","text":""},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"}]]}}}}'  ${{ secrets.FEISHU_WEBHOOK_URL }}

From 5f86767faebfec01474d909572da2b1298f01807 Mon Sep 17 00:00:00 2001
From: myhloli <moe@myhloli.com>
Date: Fri, 12 Dec 2025 20:21:59 +0800
Subject: [PATCH 4/6] refactor: update comments in compose.yaml for clarity and
 guidance on engine parameters

---
 docker/compose.yaml | 66 ++++++++-------------------------------------
 1 file changed, 11 insertions(+), 55 deletions(-)

diff --git a/docker/compose.yaml b/docker/compose.yaml
index ccaa3a1f..abd4b7d5 100644
--- a/docker/compose.yaml
+++ b/docker/compose.yaml
@@ -10,29 +10,11 @@ services:
       MINERU_MODEL_SOURCE: local
     entrypoint: mineru-openai-server
     command:
-      # ==================== Engine Selection ====================
-      # WARNING: Only ONE engine can be enabled at a time!
-      # Choose 'vllm' OR 'lmdeploy' (uncomment one line below)
       --engine vllm
-      # --engine lmdeploy
-      
-      # ==================== vLLM Engine Parameters ====================
-      # Uncomment if using --engine vllm
       --host 0.0.0.0
       --port 30000
-      # Multi-GPU configuration (increase throughput)
-      # --data-parallel-size 2
-      # Single GPU memory optimization (reduce if VRAM insufficient)
-      # --gpu-memory-utilization 0.5  # Try 0.4 or lower if issues persist
-
-      # ==================== LMDeploy Engine Parameters ====================
-      # Uncomment if using --engine lmdeploy
-      # --server-name 0.0.0.0
-      # --server-port 30000
-      # Multi-GPU configuration (increase throughput)
-      # --dp 2
-      # Single GPU memory optimization (reduce if VRAM insufficient)
-      # --cache-max-entry-count 0.5  # Try 0.4 or lower if issues persist
+      # --data-parallel-size 2  # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode
+      # --gpu-memory-utilization 0.5  # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
     ulimits:
       memlock: -1
       stack: 67108864
@@ -58,21 +40,11 @@ services:
       MINERU_MODEL_SOURCE: local
     entrypoint: mineru-api
     command:
-      # ==================== Server Configuration ====================
       --host 0.0.0.0
       --port 8000
-
-      # ==================== vLLM Engine Parameters ====================
-      # Multi-GPU configuration
-      # --data-parallel-size 2
-      # Single GPU memory optimization
-      # --gpu-memory-utilization 0.5  # Try 0.4 or lower if VRAM insufficient
-
-      # ==================== LMDeploy Engine Parameters ====================
-      # Multi-GPU configuration
-      # --dp 2
-      # Single GPU memory optimization
-      # --cache-max-entry-count 0.5  # Try 0.4 or lower if VRAM insufficient
+      # parameters for vllm-engine
+      # --data-parallel-size 2  # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode
+      # --gpu-memory-utilization 0.5  # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
     ulimits:
       memlock: -1
       stack: 67108864
@@ -96,30 +68,14 @@ services:
       MINERU_MODEL_SOURCE: local
     entrypoint: mineru-gradio
     command:
-      # ==================== Gradio Server Configuration ====================
       --server-name 0.0.0.0
       --server-port 7860
-      
-      # ==================== Gradio Feature Settings ====================
-      # --enable-api false  # Disable API endpoint
-      # --max-convert-pages 20  # Limit conversion page count
-      
-      # ==================== Engine Selection ====================
-      # WARNING: Only ONE engine can be enabled at a time!
-      
-      # Option 1: vLLM Engine (recommended for most users)
-      --enable-vllm-engine true
-      # Multi-GPU configuration
-      # --data-parallel-size 2
-      # Single GPU memory optimization
-      # --gpu-memory-utilization 0.5  # Try 0.4 or lower if VRAM insufficient
-
-      # Option 2: LMDeploy Engine
-      # --enable-lmdeploy-engine true
-      # Multi-GPU configuration
-      # --dp 2
-      # Single GPU memory optimization
-      # --cache-max-entry-count 0.5  # Try 0.4 or lower if VRAM insufficient
+      --enable-vllm-engine true  # Enable the vllm engine for Gradio
+      # --enable-api false  # If you want to disable the API, set this to false
+      # --max-convert-pages 20  # If you want to limit the number of pages for conversion, set this to a specific number
+      # parameters for vllm-engine
+      # --data-parallel-size 2  # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode
+      # --gpu-memory-utilization 0.5  # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below.
     ulimits:
       memlock: -1
       stack: 67108864

From 107e17722337b0f5076fdf0faad651bebf8dd0aa Mon Sep 17 00:00:00 2001
From: myhloli <moe@myhloli.com>
Date: Mon, 15 Dec 2025 17:58:11 +0800
Subject: [PATCH 5/6] fix: improve content check and streamline content list
 generation in vlm_middle_json_mkcontent.py

---
 mineru/backend/vlm/vlm_middle_json_mkcontent.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/mineru/backend/vlm/vlm_middle_json_mkcontent.py b/mineru/backend/vlm/vlm_middle_json_mkcontent.py
index 196d6bd0..f05a38b5 100644
--- a/mineru/backend/vlm/vlm_middle_json_mkcontent.py
+++ b/mineru/backend/vlm/vlm_middle_json_mkcontent.py
@@ -484,7 +484,7 @@ def merge_para_with_text_v2(para_block):
     for line in para_block['lines']:
         for span in line['spans']:
             span_type = span['type']
-            if span['content']:
+            if span.get("content", '').strip():
                 if para_type == BlockType.PHONETIC and span_type == ContentTypeV2.SPAN_TEXT:
                     span_type = ContentTypeV2.SPAN_PHONETIC
                 if span_type == ContentType.INLINE_EQUATION:
@@ -534,13 +534,12 @@ def union_make(pdf_info_dict: list,
                 output_content.append(para_content)
         elif make_mode == MakeMode.CONTENT_LIST_V2:
             # https://github.com/drunkpig/llm-webkit-mirror/blob/dev6/docs/specification/output_format/content_list_spec.md
-            page_contents = []
             para_blocks = (paras_of_layout or []) + (paras_of_discarded or [])
-            if not para_blocks:
-                continue
-            for para_block in para_blocks:
-                para_content = make_blocks_to_content_list_v2(para_block, img_buket_path, page_size)
-                page_contents.append(para_content)
+            page_contents = []
+            if para_blocks:
+                for para_block in para_blocks:
+                    para_content = make_blocks_to_content_list_v2(para_block, img_buket_path, page_size)
+                    page_contents.append(para_content)
             output_content.append(page_contents)
 
     if make_mode in [MakeMode.MM_MD, MakeMode.NLP_MD]:

From eed479eb56bba93ee99c1a8c255d509bd2f837e5 Mon Sep 17 00:00:00 2001
From: myhloli <moe@myhloli.com>
Date: Mon, 15 Dec 2025 10:22:19 +0000
Subject: [PATCH 6/6] Update version.py with new version

---
 mineru/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mineru/version.py b/mineru/version.py
index 492f7d9a..763cefde 100644
--- a/mineru/version.py
+++ b/mineru/version.py
@@ -1 +1 @@
-__version__ = "2.6.7"
+__version__ = "2.6.8"