diff --git a/qim3d/io/loading.py b/qim3d/io/loading.py index 4235d419cc7a7e66e2258d8ec707dc8b8d0317f3..1c1eee146cafefc39c5c467dbf61f676f26565c5 100644 --- a/qim3d/io/loading.py +++ b/qim3d/io/loading.py @@ -26,6 +26,7 @@ import qim3d from qim3d.io.logger import log from qim3d.utils.internal_tools import get_file_size, sizeof, stringify_path from qim3d.utils.system import Memory +from qim3d.utils import ProgressBar dask.config.set(scheduler="processes") @@ -770,6 +771,7 @@ def load( dataset_name=None, return_metadata=False, contains=None, + progress_bar:bool = True, force_load: bool = False, dim_order=(2, 1, 0), **kwargs, @@ -796,6 +798,7 @@ def load( return_metadata (bool, optional): Specifies whether to return metadata or not. Default is False (only for HDF5 and TXRM/TXM/XRM files) contains (str, optional): Specifies a part of the name that is common for the TIFF file stack to be loaded (only for TIFF stacks). Default is None. + progress_bar (bool, optional): Displays tqdm progress bar. Useful for large files. So far works only for linux. Default is False. force_load (bool, optional): If the file size exceeds available memory, a MemoryError is raised. If force_load is True, the error is changed to warning and the loader tries to load it anyway. Default is False. dim_order (tuple, optional): The order of the dimensions in the volume for .vol files. Default is (2,1,0) which corresponds to (z,y,x) @@ -829,7 +832,11 @@ def load( **kwargs, ) - data = loader.load(path) + if progress_bar and os.name == 'posix': + with ProgressBar(path): + data = loader.load(path) + else: + data = loader.load(path) def log_memory_info(data): mem = Memory() diff --git a/qim3d/utils/__init__.py b/qim3d/utils/__init__.py index 3d9e2150f8e949b4a00e31a2e88e340100be382c..4771b35f8f0b121d50cf77db783603dd66f49832 100644 --- a/qim3d/utils/__init__.py +++ b/qim3d/utils/__init__.py @@ -5,4 +5,5 @@ from .data import Dataset, prepare_dataloaders, prepare_datasets from .img import generate_volume, overlay_rgb_images from .models import inference, model_summary, train_model from .preview import image_preview +from .loading_progress_bar import ProgressBar from .system import Memory diff --git a/qim3d/utils/loading_progress_bar.py b/qim3d/utils/loading_progress_bar.py new file mode 100644 index 0000000000000000000000000000000000000000..980349ea10389cb9efb1aea831aac1a7d70d7914 --- /dev/null +++ b/qim3d/utils/loading_progress_bar.py @@ -0,0 +1,73 @@ +from threading import Timer +import psutil +import sys + +from tqdm.auto import tqdm + +from qim3d.utils.internal_tools import get_file_size + + +class RepeatTimer(Timer): + """ + If the memory check is set as a normal thread, there is no garuantee it will switch + resulting in not enough memory checks to create smooth progress bar or to make it + work at all. + Thus we have to use timer, which runs the function at (approximately) the given time. With this subclass + from https://stackoverflow.com/a/48741004/11514359 + we don't have to guess how many timers we will need and create multiple timers. + """ + def run(self): + while not self.finished.wait(self.interval): + self.function(*self.args, **self.kwargs) + +class ProgressBar: + def __init__(self, filename:str, repeat_time:float = 0.5, *args, **kwargs): + """ + Creates class for 'with' statement to track progress during loading a file into memory + + Parameters: + ------------ + - filename (str): to get size of the file + - repeat_time (float, optional): How often the timer checks how many bytes were loaded. Even if very small, + it doesn't make the progress bar smoother as there are only few visible changes in number of read_chars. + Defaults to 0.25 + """ + self.timer = RepeatTimer(repeat_time, self.memory_check) + self.pbar = tqdm(total = get_file_size(filename), + desc = "Loading: ", + unit = "B", + file = sys.stdout, + unit_scale = True, + unit_divisor = 1024, + bar_format = '{l_bar}{bar}| {n_fmt}{unit}/{total_fmt}{unit} [{elapsed}<{remaining}, ' '{rate_fmt}{postfix}]') + self.last_memory = 0 + self.process = psutil.Process() + + def memory_check(self): + counters = self.process.io_counters() + try: + memory = counters.read_chars + except AttributeError: + memory = counters.read_bytes + counters.other_bytes + + + try: + self.pbar.update(memory - self.last_memory) + except AttributeError: # When we leave the context manager, we delete the pbar so it can not be updated anymore + # It's because it takes quite a long time for the timer to end and might update the pbar + # one more time before ending which messes up the whole thing + pass + + + self.last_memory = memory + + def __enter__(self): + self.timer.start() + + def __exit__(self, exception_type, exception_value, exception_traceback): + self.timer.cancel() + self.pbar.clear() + self.pbar.n = self.pbar.total + self.pbar.display() + del self.pbar # So the update process can not update it anymore +