diff --git a/qim3d/io/loading.py b/qim3d/io/loading.py index f0952269cf92871c911770b6154eab23af5406fe..046f65136416f2cb79f980bd3ca86c63d5a72725 100644 --- a/qim3d/io/loading.py +++ b/qim3d/io/loading.py @@ -27,7 +27,7 @@ from PIL import Image, UnidentifiedImageError import qim3d from qim3d.io.logger import log -from qim3d.utils.internal_tools import sizeof, stringify_path +from qim3d.utils.internal_tools import sizeof, stringify_path, get_file_size from qim3d.utils.system import Memory @@ -42,7 +42,7 @@ class DataLoader: (only relevant for HDF5, TXRM/TXM/XRM and NIfTI files) contains (str): Specifies a part of the name that is common for the TIFF file stack to be loaded (only relevant for TIFF stacks) - + Methods: load_tiff(path): Load a TIFF file from the specified path. load_h5(path): Load an HDF5 file from the specified path. @@ -67,13 +67,16 @@ class DataLoader: return_metadata (bool, optional): Specifies whether to return metadata or not. Default is False (only for HDF5, TXRM/TXM/XRM and NIfTI files) contains (str, optional): Specifies a part of the name that is common for the TIFF file stack to be loaded (only for TIFF stacks). Default is None. + force_load (bool, optional): If false and user tries to load file that exceeds available memory, throws a MemoryError. If true, this error is + changed to warning and dataloader tries to load the file. Default is False. dim_order (tuple, optional): The order of the dimensions in the volume. Default is (2,1,0) which corresponds to (z,y,x) """ self.virtual_stack = kwargs.get("virtual_stack", False) self.dataset_name = kwargs.get("dataset_name", None) self.return_metadata = kwargs.get("return_metadata", False) self.contains = kwargs.get("contains", None) - self.dim_order = kwargs.get("dim_order", (2,1,0)) + self.force_load = kwargs.get("force_load", False) + self.dim_order = kwargs.get("dim_order", (2, 1, 0)) def load_tiff(self, path): """Load a TIFF file from the specified path. @@ -93,7 +96,7 @@ class DataLoader: if self.virtual_stack: vol = tifffile.memmap(path) else: - vol = tifffile.imread(path, key=range(series) if series>1 else None) + vol = tifffile.imread(path, key=range(series) if series > 1 else None) log.info("Loaded shape: %s", vol.shape) @@ -234,7 +237,7 @@ class DataLoader: log.info("Loaded shape: %s", vol.shape) return vol - + def load_txrm(self, path): """Load a TXRM/XRM/TXM file from the specified path. @@ -258,9 +261,11 @@ class DataLoader: ) if self.virtual_stack: - if not path.endswith('.txm'): - log.warning("Virtual stack is only thoroughly tested for reconstructed volumes in TXM format and is thus not guaranteed to load TXRM and XRM files correctly") - + if not path.endswith(".txm"): + log.warning( + "Virtual stack is only thoroughly tested for reconstructed volumes in TXM format and is thus not guaranteed to load TXRM and XRM files correctly" + ) + # Get metadata ole = olefile.OleFileIO(path) metadata = dxchange.reader.read_ole_metadata(ole) @@ -268,23 +273,29 @@ class DataLoader: # Compute data offsets in bytes for each slice offsets = _get_ole_offsets(ole) - if len(offsets)!=metadata['number_of_images']: - raise ValueError(f'Metadata is erroneous: number of images {metadata["number_of_images"]} is different from number of data offsets {len(offsets)}') + if len(offsets) != metadata["number_of_images"]: + raise ValueError( + f'Metadata is erroneous: number of images {metadata["number_of_images"]} is different from number of data offsets {len(offsets)}' + ) slices = [] - for _,offset in offsets.items(): + for _, offset in offsets.items(): slices.append( np.memmap( path, - dtype=dxchange.reader._get_ole_data_type(metadata).newbyteorder('<'), - mode='r', + dtype=dxchange.reader._get_ole_data_type(metadata).newbyteorder( + "<" + ), + mode="r", offset=offset, - shape = (1,metadata['image_height'],metadata['image_width']) - ) + shape=(1, metadata["image_height"], metadata["image_width"]), ) - - vol = da.concatenate(slices,axis=0) - log.warning('Virtual stack volume will be returned as a dask array. To load certain slices into memory, use normal indexing followed by the compute() method, e.g. vol[:,0,:].compute()') + ) + + vol = da.concatenate(slices, axis=0) + log.warning( + "Virtual stack volume will be returned as a dask array. To load certain slices into memory, use normal indexing followed by the compute() method, e.g. vol[:,0,:].compute()" + ) else: vol, metadata = dxchange.read_txrm(path) @@ -296,8 +307,8 @@ class DataLoader: return vol, metadata else: return vol - - def load_nifti(self,path): + + def load_nifti(self, path): """Load a NIfTI file from the specified path. Args: @@ -308,27 +319,27 @@ class DataLoader: If 'self.virtual_stack' is True, returns a nibabel.arrayproxy.ArrayProxy object If 'self.return_metadata' is True, returns a tuple (volume, metadata). """ - + data = nib.load(path) # Get image array proxy vol = data.dataobj if not self.virtual_stack: - vol = np.asarray(vol,dtype=data.get_data_dtype()) + vol = np.asarray(vol, dtype=data.get_data_dtype()) if self.return_metadata: metadata = {} for key in data.header: - metadata[key]=data.header[key] - + metadata[key] = data.header[key] + return vol, metadata else: return vol - + def load_pil(self, path): """Load a PIL image from the specified path - + Args: path (str): The path to the image supported by PIL. @@ -338,11 +349,11 @@ class DataLoader: return np.array(Image.open(path)) def _load_vgi_metadata(self, path): - """ Helper functions that loads metadata from a VGI file + """Helper functions that loads metadata from a VGI file Args: path (str): The path to the VGI file. - + returns: dict: The loaded metadata. """ @@ -352,11 +363,11 @@ class DataLoader: should_indent = True - with open(path, 'r') as f: + with open(path, "r") as f: for line in f: line = line.strip() # {NAME} is start of a new object, so should indent - if line.startswith('{') and line.endswith('}'): + if line.startswith("{") and line.endswith("}"): section_name = line[1:-1] current_section[section_name] = {} section_stack.append(current_section) @@ -364,7 +375,7 @@ class DataLoader: should_indent = True # [NAME] is start of a section, so should not indent - elif line.startswith('[') and line.endswith(']'): + elif line.startswith("[") and line.endswith("]"): section_name = line[1:-1] if not should_indent: @@ -377,62 +388,72 @@ class DataLoader: should_indent = False # = is a key value pair - elif '=' in line: - key, value = line.split('=', 1) + elif "=" in line: + key, value = line.split("=", 1) current_section[key.strip()] = value.strip() - elif line == '': + elif line == "": if len(section_stack) > 1: current_section = section_stack.pop() return meta_data def load_vol(self, path): - """ Load a VOL filed based on the VGI metadata file + """Load a VOL filed based on the VGI metadata file Args: path (str): The path to the VGI file. - + Raises: ValueError: If path points to a .vol file and not a .vgi file - + returns: numpy.ndarray, numpy.memmap or tuple: The loaded volume. If 'self.return_metadata' is True, returns a tuple (volume, metadata). """ # makes sure path point to .VGI metadata file and not the .VOL file - if path.endswith(".vol") and os.path.isfile(path.replace(".vol",".vgi")): - path = path.replace(".vol",".vgi") + if path.endswith(".vol") and os.path.isfile(path.replace(".vol", ".vgi")): + path = path.replace(".vol", ".vgi") log.warning("Corrected path to .vgi metadata file from .vol file") - elif path.endswith(".vol") and not os.path.isfile(path.replace(".vol",".vgi")): - raise ValueError(f"Unsupported file format, should point to .vgi metadata file assumed to be in same folder as .vol file: {path}") + elif path.endswith(".vol") and not os.path.isfile(path.replace(".vol", ".vgi")): + raise ValueError( + f"Unsupported file format, should point to .vgi metadata file assumed to be in same folder as .vol file: {path}" + ) meta_data = self._load_vgi_metadata(path) # Extracts relevant information from the metadata - file_name = meta_data['volume1']["file1"]["Name"] - path = path.rsplit('/', 1)[0] # Remove characters after the last "/" to be replaced with .vol filename - vol_path = os.path.join(path, file_name) # .vol and .vgi files are assumed to be in the same directory - dims = meta_data['volume1']['file1']['Size'] + file_name = meta_data["volume1"]["file1"]["Name"] + path = path.rsplit("/", 1)[ + 0 + ] # Remove characters after the last "/" to be replaced with .vol filename + vol_path = os.path.join( + path, file_name + ) # .vol and .vgi files are assumed to be in the same directory + dims = meta_data["volume1"]["file1"]["Size"] dims = [int(n) for n in dims.split() if n.isdigit()] - - dt = meta_data['volume1']['file1']['Datatype'] + + dt = meta_data["volume1"]["file1"]["Datatype"] match dt: - case 'float': + case "float": dt = np.float32 - case 'float32': + case "float32": dt = np.float32 - case 'uint8': + case "uint8": dt = np.uint8 - case 'unsigned integer': + case "unsigned integer": dt = np.uint16 - case 'uint16': + case "uint16": dt = np.uint16 case _: raise ValueError(f"Unsupported data type: {dt}") - - dims_order = (dims[self.dim_order[0]], dims[self.dim_order[1]], dims[self.dim_order[2]]) + + dims_order = ( + dims[self.dim_order[0]], + dims[self.dim_order[1]], + dims[self.dim_order[2]], + ) if self.virtual_stack: - vol = np.memmap(vol_path, dtype=dt, mode='r', shape=dims_order) + vol = np.memmap(vol_path, dtype=dt, mode="r", shape=dims_order) else: vol = np.fromfile(vol_path, dtype=dt, count=np.prod(dims)) vol = np.reshape(vol, dims_order) @@ -441,36 +462,32 @@ class DataLoader: return vol, meta_data else: return vol - + def load_dicom(self, path): - """ Load a DICOM file + """Load a DICOM file Args: path (str): Path to file """ dcm_data = pydicom.dcmread(path) - + if self.return_metadata: return dcm_data.pixel_array, dcm_data else: return dcm_data.pixel_array - + def load_dicom_dir(self, path): - """ Load a directory of DICOM files into a numpy 3d array + """Load a directory of DICOM files into a numpy 3d array Args: path (str): Directory path - """ + """ if not self.contains: raise ValueError( "Please specify a part of the name that is common for the DICOM file stack with the argument 'contains'" ) - dicom_stack = [ - file - for file in os.listdir(path) - if self.contains in file - ] + dicom_stack = [file for file in os.listdir(path) if self.contains in file] dicom_stack.sort() # Ensure proper ordering # Check that only one DICOM stack in the directory contains the provided string in its name @@ -492,12 +509,40 @@ class DataLoader: dicom_list = [pydicom.dcmread(os.path.join(path, f)) for f in dicom_stack] # vol contains the pixel data vol = np.stack([dicom.pixel_array for dicom in dicom_list], axis=0) - + if self.return_metadata: return vol, dicom_list else: return vol - + + def check_file_size(self, filename: str): + """ + Checks if there is enough memory where the file can be loaded. + Args: + ------------ + filename: (str) Specifies path to file + force_load: (bool, optional) If true, the memory error will not be raised. Warning will be printed insted and + the loader will attempt to load the file. + + Raises: + ----------- + MemoryError: If filesize is greater then available memory + """ + + if ( + self.virtual_stack + ): # If virtual_stack is True, then data is loaded from the disk, no need for loading into memory + return + file_size = get_file_size(filename) + available_memory = Memory().free + if file_size > available_memory: + message = f"The file {filename} has {sizeof(file_size)} but only {sizeof(available_memory)} of memory is available." + if self.force_load: + log.warning(message) + else: + raise MemoryError( + message + " Set 'force_load=True' to ignore this error." + ) def load(self, path): """ @@ -508,13 +553,14 @@ class DataLoader: Returns: vol (numpy.ndarray, numpy.memmap, h5py._hl.dataset.Dataset, nibabel.arrayproxy.ArrayProxy or tuple): The loaded volume - + If `virtual_stack=True`, returns `numpy.memmap`, `h5py._hl.dataset.Dataset` or `nibabel.arrayproxy.ArrayProxy` depending on file format If `return_metadata=True` and file format is either HDF5, NIfTI or TXRM/TXM/XRM, returns `tuple` (volume, metadata). Raises: ValueError: If the format is not supported ValueError: If the file or directory does not exist. + MemoryError: If file size exceeds available memory and force_load is not set to True. In check_size function. Example: loader = qim3d.io.DataLoader() @@ -527,17 +573,18 @@ class DataLoader: # Load a file if os.path.isfile(path): # Choose the loader based on the file extension + self.check_file_size(path) if path.endswith(".tif") or path.endswith(".tiff"): return self.load_tiff(path) elif path.endswith(".h5"): return self.load_h5(path) elif path.endswith((".txrm", ".txm", ".xrm")): return self.load_txrm(path) - elif path.endswith((".nii",".nii.gz")): + elif path.endswith((".nii", ".nii.gz")): return self.load_nifti(path) - elif path.endswith((".vol",".vgi")): + elif path.endswith((".vol", ".vgi")): return self.load_vol(path) - elif path.endswith((".dcm",".DCM")): + elif path.endswith((".dcm", ".DCM")): return self.load_dicom(path) else: try: @@ -548,7 +595,9 @@ class DataLoader: # Load a directory elif os.path.isdir(path): # load tiff stack if folder contains tiff files else load dicom directory - if any([f.endswith('.tif') or f.endswith('.tiff') for f in os.listdir(path)]): + if any( + [f.endswith(".tif") or f.endswith(".tiff") for f in os.listdir(path)] + ): return self.load_tiff_stack(path) else: return self.load_dicom_dir(path) @@ -556,8 +605,8 @@ class DataLoader: # Fails else: # Find the closest matching path to warn the user - parent_dir = os.path.dirname(path) or '.' - parent_files = os.listdir(parent_dir) if os.path.isdir(parent_dir) else '' + parent_dir = os.path.dirname(path) or "." + parent_files = os.listdir(parent_dir) if os.path.isdir(parent_dir) else "" valid_paths = [os.path.join(parent_dir, file) for file in parent_files] similar_paths = difflib.get_close_matches(path, valid_paths) if similar_paths: @@ -573,18 +622,24 @@ def _get_h5_dataset_keys(f): f.visit(lambda key: keys.append(key) if isinstance(f[key], h5py.Dataset) else None) return keys + def _get_ole_offsets(ole): slice_offset = {} for stream in ole.listdir(): - if stream[0].startswith('ImageData'): + if stream[0].startswith("ImageData"): sid = ole._find(stream) direntry = ole.direntries[sid] sect_start = direntry.isectStart - offset = ole.sectorsize * (sect_start+1) - slice_offset[f'{stream[0]}/{stream[1]}']=offset + offset = ole.sectorsize * (sect_start + 1) + slice_offset[f"{stream[0]}/{stream[1]}"] = offset # sort dictionary after natural sorting (https://blog.codinghorror.com/sorting-for-humans-natural-sort-order/) - sorted_keys = sorted(slice_offset.keys(),key=lambda string_: [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', string_)]) + sorted_keys = sorted( + slice_offset.keys(), + key=lambda string_: [ + int(s) if s.isdigit() else s for s in re.split(r"(\d+)", string_) + ], + ) slice_offset_sorted = {key: slice_offset[key] for key in sorted_keys} return slice_offset_sorted @@ -596,7 +651,8 @@ def load( dataset_name=None, return_metadata=False, contains=None, - dim_order=(2,1,0), + force_load: bool = False, + dim_order=(2, 1, 0), **kwargs, ): """ @@ -611,16 +667,21 @@ def load( return_metadata (bool, optional): Specifies whether to return metadata or not. Default is False (only for HDF5 and TXRM/TXM/XRM files) contains (str, optional): Specifies a part of the name that is common for the TIFF file stack to be loaded (only for TIFF stacks). Default is None. + force_load (bool, optional): If the file size exceeds available memory, a MemoryError is raised. + If force_load is True, the error is changed to warning and the loader tries to load it anyway. Default is False. dim_order (tuple, optional): The order of the dimensions in the volume for .vol files. Default is (2,1,0) which corresponds to (z,y,x) **kwargs: Additional keyword arguments to be passed to the DataLoader constructor. Returns: vol (numpy.ndarray, numpy.memmap, h5py._hl.dataset.Dataset, nibabel.arrayproxy.ArrayProxy or tuple): The loaded volume - + If `virtual_stack=True`, returns `numpy.memmap`, `h5py._hl.dataset.Dataset` or `nibabel.arrayproxy.ArrayProxy` depending on file format If `return_metadata=True` and file format is either HDF5, NIfTI or TXRM/TXM/XRM, returns `tuple` (volume, metadata). + Raises: + MemoryError: if the given file size exceeds available memory + Example: ```python import qim3d @@ -634,13 +695,13 @@ def load( dataset_name=dataset_name, return_metadata=return_metadata, contains=contains, + force_load=force_load, dim_order=dim_order, **kwargs, ) data = loader.load(path) - def log_memory_info(data): mem = Memory() log.info( @@ -649,17 +710,19 @@ def load( ) mem.report() - if return_metadata and not isinstance(data,tuple): - log.warning('The file format does not contain metadata') + if return_metadata and not isinstance(data, tuple): + log.warning("The file format does not contain metadata") if not virtual_stack: log_memory_info(data) else: # Only log if file type is not a np.ndarray, i.e., it is some kind of memmap object - if not isinstance( type(data[0]) if isinstance(data,tuple) else type(data), np.ndarray ): + if not isinstance( + type(data[0]) if isinstance(data, tuple) else type(data), np.ndarray + ): log.info("Using virtual stack") else: - log.warning('Virtual stack is not supported for this file format') + log.warning("Virtual stack is not supported for this file format") log_memory_info(data) return data @@ -677,10 +740,10 @@ class ImgExamples: NT_10x200x100 (numpy.ndarray): A 3D volume of a neuron. NT_128x128x128 (numpy.ndarray): A 3D volume of a neuron. shell_225x128x128 (numpy.ndarray): A 3D volume of a shell. - + Tip: Call `qim3d.examples.<name>` to access the image examples easily as this class is instantiated when importing `qim3d` - + Example: ```python import qim3d @@ -694,11 +757,7 @@ class ImgExamples: def __init__(self): img_examples_path = Path(qim3d.__file__).parents[0] / "img_examples" img_paths = list(img_examples_path.glob("*.tif")) - - img_names = [] - for path in img_paths: - img_names.append(path.stem) - - # Generate loader for each image found - for idx, name in enumerate(img_names): - exec(f"self.{name} = load(path = img_paths[idx])") \ No newline at end of file + + + update_dict = {path.stem : load(path) for path in img_paths} + self.__dict__.update(update_dict) diff --git a/qim3d/io/logger.py b/qim3d/io/logger.py index 3dfb4aa2b61209a4400454e90bb1f7617409ba73..afe4393d1cae80d7c35d88c32a94e3677afdec73 100644 --- a/qim3d/io/logger.py +++ b/qim3d/io/logger.py @@ -132,5 +132,5 @@ def level(log_level): # create the logger log = logging.getLogger("qim3d") -set_simple_output() +# set_simple_output() #TODO: This used to work, but now it gives duplicated messages. Need to be investigated. set_level_warning() diff --git a/qim3d/utils/internal_tools.py b/qim3d/utils/internal_tools.py index e434c5c2af99c238f99e89de40830070fcf79c89..58b97a37a857199863cddd056c4ec531f5c59abe 100644 --- a/qim3d/utils/internal_tools.py +++ b/qim3d/utils/internal_tools.py @@ -179,6 +179,18 @@ def sizeof(num, suffix="B"): num /= 1024.0 return f"{num:.1f} Y{suffix}" +def get_file_size(filename:str) -> int: + """ + Args: + ----- + filename (str): Specifies full path to file + + Returns: + --------- + size (int): size of file in bytes + """ + + return os.path.getsize(filename) def is_server_running(ip, port): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)