diff --git a/qim3d/io/convert.py b/qim3d/io/convert.py index 24c41e8c72478e513956321f92432e32c9d54645..dd29d7e2d25ea8be5a0d096c1b3841e8b6159502 100644 --- a/qim3d/io/convert.py +++ b/qim3d/io/convert.py @@ -1,3 +1,5 @@ +import difflib +import os from itertools import product import numpy as np @@ -5,56 +7,113 @@ import tifffile as tiff import zarr from tqdm import tqdm +from qim3d.utils.internal_tools import stringify_path -def convert_tif_to_zarr(tif_path, zarr_path, chunks=(64, 64, 64)): - """ Convert a tiff file to a zarr file - Args: - tif_path (str): path to the tiff file - zarr_path (str): path to the zarr file - chunks (tuple, optional): chunk size for the zarr file. Defaults to (64, 64, 64). +class Convert: - Returns: - zarr.core.Array: zarr array containing the data from the tiff file - """ - vol = tiff.memmap(tif_path) - z = zarr.open(zarr_path, mode='w', shape=vol.shape, chunks=chunks, dtype=vol.dtype) - chunk_shape = tuple((s + c - 1) // c for s, c in zip(z.shape, z.chunks)) - for chunk_indices in tqdm(product(*[range(n) for n in chunk_shape]), total=np.prod(chunk_shape)): - slices = tuple(slice(c * i, min(c * (i + 1), s)) - for s, c, i in zip(z.shape, z.chunks, chunk_indices)) - temp_data = vol[slices] - # The assignment takes 99% of the cpu-time - z.blocks[chunk_indices] = temp_data + def __init__(self,**kwargs): + """ Utility class to convert files to other formats without loading the entire file into memory - return z + Args: + chunk_shape (tuple, optional): chunk size for the zarr file. Defaults to (64, 64, 64). + """ + self.chunk_shape = kwargs.get("chunk_shape", (64, 64, 64)) -def convert_npy_to_zarr(npy_path, zarr_path, shape, dtype=np.float32, chunks=(64, 64, 64)): - """ Convert a numpy file to a zarr file + def convert(self, input_path, output_path): + # Stringify path in case it is not already a string + input_path = stringify_path(input_path) - Args: - npy_path (str): path to the numpy file - zarr_path (str): path to the zarr file - chunks (tuple, optional): chunk size for the zarr file. Defaults to (64, 64, 64). + if os.path.isfile(input_path) and os.path.isfile(output_path): + match input_path, output_path: + case (".tif", ".zarr"): + return self.convert_tif_to_zarr(input_path, output_path) + case (".npy", ".zarr"): + return self.convert_npy_to_zarr(input_path, output_path, shape=(64, 64, 64)) + case (".zarr", ".tif"): + return self.convert_zarr_to_tif(input_path, output_path) + case _: + raise ValueError("Unsupported file format") + # Load a directory + elif os.path.isdir(input_path): + raise ValueError("Unsupported file format") + # Fail + else: + # Find the closest matching path to warn the user + parent_dir = os.path.dirname(input_path) or "." + parent_files = os.listdir(parent_dir) if os.path.isdir(parent_dir) else "" + valid_paths = [os.path.join(parent_dir, file) for file in parent_files] + similar_paths = difflib.get_close_matches(input_path, valid_paths) + if similar_paths: + suggestion = similar_paths[0] # Get the closest match + message = f"Invalid path. Did you mean '{suggestion}'?" + raise ValueError(repr(message)) + else: + raise ValueError("Invalid path") - Returns: - zarr.core.Array: zarr array containing the data from the numpy file - """ - vol = np.memmap(npy_path, dtype=dtype, mode='r', shape=shape) - z = zarr.open(zarr_path, mode='w', shape=vol.shape, chunks=chunks, dtype=vol.dtype) - z[:] = vol[:] + def convert_tif_to_zarr(self, tif_path, zarr_path, chunks=(64, 64, 64)): + """ Convert a tiff file to a zarr file - return z + Args: + tif_path (str): path to the tiff file + zarr_path (str): path to the zarr file + chunks (tuple, optional): chunk size for the zarr file. Defaults to (64, 64, 64). -def convert_zarr_to_tif(zarr_path, tif_path): - """ Convert a zarr file to a tiff file + Returns: + zarr.core.Array: zarr array containing the data from the tiff file + """ + vol = tiff.memmap(tif_path) + z = zarr.open(zarr_path, mode='w', shape=vol.shape, chunks=chunks, dtype=vol.dtype) + chunk_shape = tuple((s + c - 1) // c for s, c in zip(z.shape, z.chunks)) + # ! Fastest way is z[:] = vol[:], but does not have a progress bar + for chunk_indices in tqdm(product(*[range(n) for n in chunk_shape]), total=np.prod(chunk_shape)): + slices = tuple(slice(c * i, min(c * (i + 1), s)) + for s, c, i in zip(z.shape, z.chunks, chunk_indices)) + temp_data = vol[slices] + # The assignment takes 99% of the cpu-time + z.blocks[chunk_indices] = temp_data - Args: - zarr_path (str): path to the zarr file - tif_path (str): path to the tiff file + return z + + def convert_npy_to_zarr(self, npy_path, zarr_path, shape, dtype=np.float32, chunks=(64, 64, 64)): + """ Convert a numpy file to a zarr file + + Args: + npy_path (str): path to the numpy file + zarr_path (str): path to the zarr file + chunks (tuple, optional): chunk size for the zarr file. Defaults to (64, 64, 64). + + Returns: + zarr.core.Array: zarr array containing the data from the numpy file + """ + vol = np.memmap(npy_path, dtype=dtype, mode='r', shape=shape) + z = zarr.open(zarr_path, mode='w', shape=vol.shape, chunks=chunks, dtype=vol.dtype) + z[:] = vol[:] - returns: - None + return z + + def convert_zarr_to_tif(self, zarr_path, tif_path): + """ Convert a zarr file to a tiff file + + Args: + zarr_path (str): path to the zarr file + tif_path (str): path to the tiff file + + returns: + None + """ + z = zarr.open(zarr_path) + tiff.imwrite(tif_path, z) + + +def convert(input_path: str, output_path: str, chunk_shape: tuple = (64, 64, 64)): + """ Convert a file to another format without loading the entire file into memory + + Args: + input_path (str): path to the input file + output_path (str): path to the output file + chunk_shape (tuple, optional): chunk size for the zarr file. Defaults to (64, 64, 64). """ - z = zarr.open(zarr_path) - tiff.imwrite(tif_path, z) \ No newline at end of file + + converter = Convert(chunk_shape=chunk_shape) + converter.convert(input_path, output_path)