diff --git a/mineru/cli/client.py b/mineru/cli/client.py index 3c87c398..d220d99a 100644 --- a/mineru/cli/client.py +++ b/mineru/cli/client.py @@ -47,6 +47,7 @@ TASK_RESULT_TIMEOUT_SECONDS = 600 LOCAL_API_STARTUP_TIMEOUT_SECONDS = 30 LOCAL_API_CLEANUP_RETRIES = 8 LOCAL_API_CLEANUP_RETRY_INTERVAL_SECONDS = 0.25 +LOCAL_API_MAX_CONCURRENT_REQUESTS = 1 @dataclass(frozen=True) @@ -95,12 +96,9 @@ class LocalAPIServer: self.temp_dir = tempfile.TemporaryDirectory(prefix="mineru-api-client-") self.temp_root = Path(self.temp_dir.name) self.output_root = self.temp_root / "output" - self.log_path = self.temp_root / "mineru-api.log" self.base_url: str | None = None self.process: subprocess.Popen[bytes] | None = None - self._log_handle = None self._atexit_registered = False - self._cleanup_enabled = True def start(self) -> str: if self.process is not None: @@ -110,9 +108,12 @@ class LocalAPIServer: self.base_url = f"http://127.0.0.1:{port}" env = os.environ.copy() env["MINERU_API_OUTPUT_ROOT"] = str(self.output_root) + env["MINERU_API_MAX_CONCURRENT_REQUESTS"] = str( + LOCAL_API_MAX_CONCURRENT_REQUESTS + ) + env["MINERU_API_DISABLE_ACCESS_LOG"] = "1" self.output_root.mkdir(parents=True, exist_ok=True) - self._log_handle = open(self.log_path, "wb") self.process = subprocess.Popen( [ sys.executable, @@ -123,8 +124,6 @@ class LocalAPIServer: "--port", str(port), ], - stdout=self._log_handle, - stderr=subprocess.STDOUT, cwd=os.getcwd(), env=env, ) @@ -134,8 +133,7 @@ class LocalAPIServer: self._atexit_registered = True return self.base_url - def stop(self, preserve_logs: bool = False) -> None: - self._cleanup_enabled = not preserve_logs + def stop(self) -> None: process = self.process self.process = None try: @@ -147,17 +145,13 @@ class LocalAPIServer: process.kill() process.wait(timeout=5) finally: - if self._log_handle is not None: - self._log_handle.close() - self._log_handle = None if self._atexit_registered: try: atexit.unregister(self.stop) except Exception: pass self._atexit_registered = False - if self._cleanup_enabled: - self._cleanup_temp_dir() + self._cleanup_temp_dir() def _cleanup_temp_dir(self) -> None: last_error: Exception | None = None @@ -180,15 +174,6 @@ class LocalAPIServer: last_error, ) - def read_log_tail(self, max_chars: int = 4000) -> str: - if not self.log_path.exists(): - return "" - content = self.log_path.read_text(encoding="utf-8", errors="ignore") - if len(content) <= max_chars: - return content - return content[-max_chars:] - - def find_free_port() -> int: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: sock.bind(("127.0.0.1", 0)) @@ -282,10 +267,8 @@ async def wait_for_local_api_ready( while asyncio.get_running_loop().time() < deadline: process = local_server.process if process is not None and process.poll() is not None: - log_tail = local_server.read_log_tail() raise click.ClickException( - f"Local mineru-api exited before becoming healthy. " - f"Log file: {local_server.log_path}\n{log_tail}" + "Local mineru-api exited before becoming healthy." ) try: return await fetch_server_health(client, local_server.base_url) @@ -295,11 +278,10 @@ async def wait_for_local_api_ready( last_error = str(exc) await asyncio.sleep(TASK_STATUS_POLL_INTERVAL_SECONDS) - log_tail = local_server.read_log_tail() - raise click.ClickException( - f"Timed out waiting for local mineru-api to become healthy. " - f"Log file: {local_server.log_path}\n{last_error or ''}\n{log_tail}" - ) + message = "Timed out waiting for local mineru-api to become healthy." + if last_error: + message = f"{message} {last_error}" + raise click.ClickException(message) def probe_pdf_effective_pages( @@ -780,8 +762,7 @@ async def run_orchestrated_cli( ) finally: if local_server is not None: - preserve_logs = sys.exc_info()[0] is not None - local_server.stop(preserve_logs=preserve_logs) + local_server.stop() @click.command() diff --git a/mineru/cli/common.py b/mineru/cli/common.py index a618fe27..302da4b5 100644 --- a/mineru/cli/common.py +++ b/mineru/cli/common.py @@ -173,7 +173,7 @@ def _process_output( json.dumps(model_output, ensure_ascii=False, indent=4), ) - logger.info(f"local output dir is {local_md_dir}") + logger.debug(f"local output dir is {local_md_dir}") def _process_pipeline( diff --git a/mineru/cli/fast_api.py b/mineru/cli/fast_api.py index afb2d7dd..3baf8358 100644 --- a/mineru/cli/fast_api.py +++ b/mineru/cli/fast_api.py @@ -64,7 +64,7 @@ DEFAULT_TASK_RETENTION_SECONDS = 24 * 60 * 60 DEFAULT_TASK_CLEANUP_INTERVAL_SECONDS = 5 * 60 DEFAULT_OUTPUT_ROOT = "./output" ALLOWED_PARSE_METHODS = {"auto", "txt", "ocr"} -DEFAULT_MAX_CONCURRENT_REQUESTS = 3 +DEFAULT_MAX_CONCURRENT_REQUESTS = 1 FILE_PARSE_TASK_ID_HEADER = "X-MinerU-Task-Id" FILE_PARSE_TASK_STATUS_HEADER = "X-MinerU-Task-Status" FILE_PARSE_TASK_STATUS_URL_HEADER = "X-MinerU-Task-Status-Url" @@ -76,6 +76,13 @@ _configured_max_concurrent_requests = 0 _mps_parse_lock = threading.Lock() +def env_flag_enabled(name: str, default: bool = False) -> bool: + value = os.getenv(name) + if value is None: + return default + return value.lower() in ("1", "true", "yes", "on") + + @dataclass class ParseRequestOptions: files: list[UploadFile] @@ -170,11 +177,7 @@ async def lifespan(app: FastAPI): def create_app(): # By default, the OpenAPI documentation endpoints (openapi_url, docs_url, redoc_url) are enabled. # To disable the FastAPI docs and schema endpoints, set the environment variable MINERU_API_ENABLE_FASTAPI_DOCS=0. - enable_docs = str(os.getenv("MINERU_API_ENABLE_FASTAPI_DOCS", "1")).lower() in ( - "1", - "true", - "yes", - ) + enable_docs = env_flag_enabled("MINERU_API_ENABLE_FASTAPI_DOCS", default=True) app = FastAPI( openapi_url="/openapi.json" if enable_docs else None, docs_url="/docs" if enable_docs else None, @@ -1338,26 +1341,21 @@ def main(ctx, host, port, reload, **kwargs): kwargs.update(arg_parse(ctx)) app.state.config = kwargs - - try: - mcr = int( - kwargs.get( - "mineru_api_max_concurrent_requests", - DEFAULT_MAX_CONCURRENT_REQUESTS, - ) - or DEFAULT_MAX_CONCURRENT_REQUESTS - ) - except ValueError: - mcr = DEFAULT_MAX_CONCURRENT_REQUESTS - os.environ["MINERU_API_MAX_CONCURRENT_REQUESTS"] = str(mcr) + access_log = not env_flag_enabled("MINERU_API_DISABLE_ACCESS_LOG") print(f"Start MinerU FastAPI Service: http://{host}:{port}") print(f"API documentation: http://{host}:{port}/docs") if reload: - uvicorn.run("mineru.cli.fast_api:app", host=host, port=port, reload=True) + uvicorn.run( + "mineru.cli.fast_api:app", + host=host, + port=port, + reload=True, + access_log=access_log, + ) else: - uvicorn.run(app, host=host, port=port, reload=False) + uvicorn.run(app, host=host, port=port, reload=False, access_log=access_log) if __name__ == "__main__":