mirror of
https://github.com/opendatalab/MinerU.git
synced 2026-03-27 02:58:54 +07:00
修复init错误
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -34,4 +34,4 @@ tmp
|
||||
ocr_demo
|
||||
|
||||
/app/common/__init__.py
|
||||
/magic_pdf/spark/__init__.py
|
||||
/magic_pdf/config/__init__.py
|
||||
|
||||
@@ -12,7 +12,6 @@ from magic_pdf.dict2md.ocr_mkcontent import (
|
||||
make_standard_format_with_para
|
||||
)
|
||||
from magic_pdf.libs.commons import join_path, read_file
|
||||
from magic_pdf.pdf_parse_by_ocr import parse_pdf_by_ocr
|
||||
|
||||
|
||||
def save_markdown(markdown_text, input_filepath):
|
||||
|
||||
@@ -19,9 +19,9 @@ from magic_pdf.filter.pdf_meta_scan import pdf_meta_scan
|
||||
from loguru import logger
|
||||
|
||||
from magic_pdf.pdf_parse_for_train import parse_pdf_for_train
|
||||
from magic_pdf.spark.base import exception_handler, get_data_source, get_bookname, get_pdf_bytes
|
||||
from magic_pdf.spark import exception_handler, get_data_source
|
||||
from magic_pdf.train_utils.convert_to_train_format import convert_to_train_format
|
||||
from magic_pdf.spark.s3 import get_s3_config, get_s3_client
|
||||
from magic_pdf.spark import get_s3_config, get_s3_client
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ from loguru import logger
|
||||
from magic_pdf.dict2md.mkcontent import mk_mm_markdown, mk_universal_format
|
||||
from magic_pdf.libs.commons import join_path
|
||||
from magic_pdf.libs.json_compressor import JsonCompressor
|
||||
from magic_pdf.spark.base import exception_handler, get_data_source
|
||||
from magic_pdf.spark import exception_handler, get_data_source
|
||||
|
||||
|
||||
def txt_pdf_to_standard_format(jso: dict, debug_mode=False) -> dict:
|
||||
|
||||
0
magic_pdf/spark/__init__.py
Normal file
0
magic_pdf/spark/__init__.py
Normal file
@@ -8,7 +8,7 @@ from typing import List, Union
|
||||
try:
|
||||
from app.config import s3_buckets, s3_clusters, get_cluster_name, s3_users
|
||||
except ImportError:
|
||||
from magic_pdf.spark import s3_buckets, s3_clusters, get_cluster_name, s3_users
|
||||
from magic_pdf.config import s3_buckets, s3_clusters, get_cluster_name, s3_users
|
||||
|
||||
__re_s3_path = re.compile("^s3a?://([^/]+)(?:/(.*))?$")
|
||||
def get_s3_config(path: Union[str, List[str]], outside=False):
|
||||
|
||||
@@ -3,7 +3,7 @@ import json
|
||||
import os
|
||||
from magic_pdf.libs.commons import fitz
|
||||
|
||||
from magic_pdf.spark.s3 import get_s3_config, get_s3_client
|
||||
from magic_pdf.spark import get_s3_config, get_s3_client
|
||||
from magic_pdf.libs.commons import join_path, json_dump_path, read_file, parse_bucket_key
|
||||
from loguru import logger
|
||||
|
||||
|
||||
Reference in New Issue
Block a user