mirror of
https://github.com/opendatalab/MinerU.git
synced 2026-03-27 19:18:34 +07:00
63 lines
2.1 KiB
Python
63 lines
2.1 KiB
Python
import os
|
|
|
|
from .base import DataReader, DataWriter
|
|
|
|
|
|
class FileBasedDataReader(DataReader):
|
|
def __init__(self, parent_dir: str = ''):
|
|
"""Initialized with parent_dir.
|
|
|
|
Args:
|
|
parent_dir (str, optional): the parent directory that may be used within methods. Defaults to ''.
|
|
"""
|
|
self._parent_dir = parent_dir
|
|
|
|
def read_at(self, path: str, offset: int = 0, limit: int = -1) -> bytes:
|
|
"""Read at offset and limit.
|
|
|
|
Args:
|
|
path (str): the path of file, if the path is relative path, it will be joined with parent_dir.
|
|
offset (int, optional): the number of bytes skipped. Defaults to 0.
|
|
limit (int, optional): the length of bytes want to read. Defaults to -1.
|
|
|
|
Returns:
|
|
bytes: the content of file
|
|
"""
|
|
fn_path = path
|
|
if not os.path.isabs(fn_path) and len(self._parent_dir) > 0:
|
|
fn_path = os.path.join(self._parent_dir, path)
|
|
|
|
with open(fn_path, 'rb') as f:
|
|
f.seek(offset)
|
|
if limit == -1:
|
|
return f.read()
|
|
else:
|
|
return f.read(limit)
|
|
|
|
|
|
class FileBasedDataWriter(DataWriter):
|
|
def __init__(self, parent_dir: str = '') -> None:
|
|
"""Initialized with parent_dir.
|
|
|
|
Args:
|
|
parent_dir (str, optional): the parent directory that may be used within methods. Defaults to ''.
|
|
"""
|
|
self._parent_dir = parent_dir
|
|
|
|
def write(self, path: str, data: bytes) -> None:
|
|
"""Write file with data.
|
|
|
|
Args:
|
|
path (str): the path of file, if the path is relative path, it will be joined with parent_dir.
|
|
data (bytes): the data want to write
|
|
"""
|
|
fn_path = path
|
|
if not os.path.isabs(fn_path) and len(self._parent_dir) > 0:
|
|
fn_path = os.path.join(self._parent_dir, path)
|
|
|
|
if not os.path.exists(os.path.dirname(fn_path)) and os.path.dirname(fn_path) != "":
|
|
os.makedirs(os.path.dirname(fn_path), exist_ok=True)
|
|
|
|
with open(fn_path, 'wb') as f:
|
|
f.write(data)
|