Skip to content
Snippets Groups Projects

Conv zarr tiff folders

1 file
+ 100
41
Compare changes
  • Side-by-side
  • Inline
+ 100
41
import difflib
import os
from itertools import product
import numpy as np
@@ -5,56 +7,113 @@ import tifffile as tiff
import zarr
from tqdm import tqdm
from qim3d.utils.internal_tools import stringify_path
def convert_tif_to_zarr(tif_path, zarr_path, chunks=(64, 64, 64)):
""" Convert a tiff file to a zarr file
Args:
tif_path (str): path to the tiff file
zarr_path (str): path to the zarr file
chunks (tuple, optional): chunk size for the zarr file. Defaults to (64, 64, 64).
class Convert:
Returns:
zarr.core.Array: zarr array containing the data from the tiff file
"""
vol = tiff.memmap(tif_path)
z = zarr.open(zarr_path, mode='w', shape=vol.shape, chunks=chunks, dtype=vol.dtype)
chunk_shape = tuple((s + c - 1) // c for s, c in zip(z.shape, z.chunks))
for chunk_indices in tqdm(product(*[range(n) for n in chunk_shape]), total=np.prod(chunk_shape)):
slices = tuple(slice(c * i, min(c * (i + 1), s))
for s, c, i in zip(z.shape, z.chunks, chunk_indices))
temp_data = vol[slices]
# The assignment takes 99% of the cpu-time
z.blocks[chunk_indices] = temp_data
def __init__(self,**kwargs):
""" Utility class to convert files to other formats without loading the entire file into memory
return z
Args:
chunk_shape (tuple, optional): chunk size for the zarr file. Defaults to (64, 64, 64).
"""
self.chunk_shape = kwargs.get("chunk_shape", (64, 64, 64))
def convert_npy_to_zarr(npy_path, zarr_path, shape, dtype=np.float32, chunks=(64, 64, 64)):
""" Convert a numpy file to a zarr file
def convert(self, input_path, output_path):
# Stringify path in case it is not already a string
input_path = stringify_path(input_path)
Args:
npy_path (str): path to the numpy file
zarr_path (str): path to the zarr file
chunks (tuple, optional): chunk size for the zarr file. Defaults to (64, 64, 64).
if os.path.isfile(input_path) and os.path.isfile(output_path):
match input_path, output_path:
case (".tif", ".zarr"):
return self.convert_tif_to_zarr(input_path, output_path)
case (".npy", ".zarr"):
return self.convert_npy_to_zarr(input_path, output_path, shape=(64, 64, 64))
case (".zarr", ".tif"):
return self.convert_zarr_to_tif(input_path, output_path)
case _:
raise ValueError("Unsupported file format")
# Load a directory
elif os.path.isdir(input_path):
raise ValueError("Unsupported file format")
# Fail
else:
# Find the closest matching path to warn the user
parent_dir = os.path.dirname(input_path) or "."
parent_files = os.listdir(parent_dir) if os.path.isdir(parent_dir) else ""
valid_paths = [os.path.join(parent_dir, file) for file in parent_files]
similar_paths = difflib.get_close_matches(input_path, valid_paths)
if similar_paths:
suggestion = similar_paths[0] # Get the closest match
message = f"Invalid path. Did you mean '{suggestion}'?"
raise ValueError(repr(message))
else:
raise ValueError("Invalid path")
Returns:
zarr.core.Array: zarr array containing the data from the numpy file
"""
vol = np.memmap(npy_path, dtype=dtype, mode='r', shape=shape)
z = zarr.open(zarr_path, mode='w', shape=vol.shape, chunks=chunks, dtype=vol.dtype)
z[:] = vol[:]
def convert_tif_to_zarr(self, tif_path, zarr_path, chunks=(64, 64, 64)):
""" Convert a tiff file to a zarr file
return z
Args:
tif_path (str): path to the tiff file
zarr_path (str): path to the zarr file
chunks (tuple, optional): chunk size for the zarr file. Defaults to (64, 64, 64).
def convert_zarr_to_tif(zarr_path, tif_path):
""" Convert a zarr file to a tiff file
Returns:
zarr.core.Array: zarr array containing the data from the tiff file
"""
vol = tiff.memmap(tif_path)
z = zarr.open(zarr_path, mode='w', shape=vol.shape, chunks=chunks, dtype=vol.dtype)
chunk_shape = tuple((s + c - 1) // c for s, c in zip(z.shape, z.chunks))
# ! Fastest way is z[:] = vol[:], but does not have a progress bar
for chunk_indices in tqdm(product(*[range(n) for n in chunk_shape]), total=np.prod(chunk_shape)):
slices = tuple(slice(c * i, min(c * (i + 1), s))
for s, c, i in zip(z.shape, z.chunks, chunk_indices))
temp_data = vol[slices]
# The assignment takes 99% of the cpu-time
z.blocks[chunk_indices] = temp_data
Args:
zarr_path (str): path to the zarr file
tif_path (str): path to the tiff file
return z
def convert_npy_to_zarr(self, npy_path, zarr_path, shape, dtype=np.float32, chunks=(64, 64, 64)):
""" Convert a numpy file to a zarr file
Args:
npy_path (str): path to the numpy file
zarr_path (str): path to the zarr file
chunks (tuple, optional): chunk size for the zarr file. Defaults to (64, 64, 64).
Returns:
zarr.core.Array: zarr array containing the data from the numpy file
"""
vol = np.memmap(npy_path, dtype=dtype, mode='r', shape=shape)
z = zarr.open(zarr_path, mode='w', shape=vol.shape, chunks=chunks, dtype=vol.dtype)
z[:] = vol[:]
returns:
None
return z
def convert_zarr_to_tif(self, zarr_path, tif_path):
""" Convert a zarr file to a tiff file
Args:
zarr_path (str): path to the zarr file
tif_path (str): path to the tiff file
returns:
None
"""
z = zarr.open(zarr_path)
tiff.imwrite(tif_path, z)
def convert(input_path: str, output_path: str, chunk_shape: tuple = (64, 64, 64)):
""" Convert a file to another format without loading the entire file into memory
Args:
input_path (str): path to the input file
output_path (str): path to the output file
chunk_shape (tuple, optional): chunk size for the zarr file. Defaults to (64, 64, 64).
"""
z = zarr.open(zarr_path)
tiff.imwrite(tif_path, z)
\ No newline at end of file
converter = Convert(chunk_shape=chunk_shape)
converter.convert(input_path, output_path)
Loading