Skip to content
Snippets Groups Projects
Commit 5ab7a51f authored by monj's avatar monj
Browse files

added code for sharing

parent 02babe28
No related branches found
No related tags found
No related merge requests found
#%% #!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Preprocessing of the images
1. Compute maximum projections.
2. Explore the intensities of the maximum projection images.
3. Correct the intensities. Normalisation across channels and images,
to equalise the standard deviation for intensity values > th (just considering
the pixels that contain protein, and not those represinting air, so as to avoid
enhancing the background noise)
Created on Wed Oct 7 10:34:52 2020
@author: monj
"""
import os
import microscopy_analysis as ma
import matplotlib.pyplot as plt
from datetime import datetime
import numpy as np
import skimage.io as io
import copy
# Turn interactive plotting off
plt.ioff()
plt.close('all')
start_time_global = datetime.now()
#%% I/O directories
diseases = ['emphysema','uip','sarcoidosis','covid'] #I don't include sarcoidosis and uip, I manually create the group later, just so those don't count twice for the std estimation
conditions = diseases[:]
conditions.insert(0,'control')
# Input directories - images are stored in folders starting with the name 'frame'
base_dir = '../input_data/'
dir_in = base_dir + '/raw/'
base_name = 'frame'
# Output directories: max projections and postprocessed
dir_maxProj_raw = base_dir + 'maximum_projections/'
ma.make_output_dirs(dir_maxProj_raw,conditions)
dir_postprocessed = base_dir + 'postprocessed_maxProjs/'
dir_maxProj_processed = dir_postprocessed + 'rgb/'
ma.make_output_dirs(dir_maxProj_processed,conditions)
dir_maxProj_processed_cmy = dir_postprocessed + 'cmy/'
ma.make_output_dirs(dir_maxProj_processed_cmy,conditions)
dir_results_intinspect = dir_postprocessed + 'intensity_inspection/'
ma.make_output_dirs(dir_results_intinspect,conditions)
#%% Get maximum projection images
# Automatically decide whether to recompute or read the max. projections images
read_flag = True
for disease in diseases:
disease_flag = len(os.listdir(dir_maxProj_raw + disease + '/'))>2
#print(disease)
#print(disease_flag)
read_flag = disease_flag&read_flag
# Get maximum projection images
if not(read_flag):
# Compute maximum projection images (runtime ~= 135 s)
print('Computing maximum projection images')
startTime = datetime.now()
max_img_list = []
for condition in conditions:
max_img_list += [ma.comp_max_imgs(dir_in + condition, base_name, dir_maxProj_raw + condition)]
endTime = datetime.now() - startTime
print(endTime)
else:
# Read maximum projection images (runtime ~= 20 s )
print('Reading maximum projection images')
startTime = datetime.now()
max_img_list = []
for condition in conditions:
max_img_list += [ma.read_max_imgs(dir_maxProj_raw + condition, base_name)]
endTime = datetime.now() - startTime
print(endTime)
#%% Plot histograms of raw images (approx. 29s per condition)
for condition, max_img_list_condition in zip(conditions, max_img_list):
ma.plotHist_perChannel_imgset_list(max_img_list_condition, dir_in + condition, dir_results_intinspect + condition, name_tag = 'original')
#%% Measure the spread of intensities in each channel
"""The spread of intensities is quantified by the standard deviation of the
intensities that are over a threshold. The threshold value is set manually
and it defines the minimum intensity associated with the presence of protein.
Image regions with intensity values under the threshold are considered to
image air, i.e. absence of protein."""
# Protein presence/absence (air) intensity threshold
th_int = 15 #could it be set automatically based on the data?
# Compute stds per image channel and, for each sample, plot intensity histograms per channel.
std_list = []
for condition, max_img_list_condition in zip(conditions, max_img_list):
std_list += [ma.comp_std_perChannel(max_img_list_condition, th_int, dir_in + condition)]
# Histogram of standard deviations (std) per image channel
fig, axs = plt.subplots(len(conditions),4, figsize = (4*2,len(conditions)*2), sharex = True, sharey = True)
plt.suptitle('Distribution of intensity spread across images\n Rows: ' + ', '.join(conditions))
for ind_c,std_list_condition in enumerate(std_list):
for ind,channel_list in enumerate(std_list_condition):
axs[0][ind+1].title.set_text('Channel ' + str(ind+1))
axs[ind_c][ind+1].hist(channel_list, bins = 50, density = True)
# Histogram of standard deviations per image
axs[0][0].title.set_text('All channels')
std_list_flat = []
for ind_c,std_list_condition in enumerate(std_list):
std_list_condition_flat = ma.flatten_list(std_list_condition)
axs[ind_c][0].hist(std_list_condition_flat, bins = 50, density = True)
std_list_flat += std_list_condition_flat
plt.show()
# Average intesity-spread (std) across all image channels
mean_std = sum(std_list_flat)/len(std_list_flat)
print('The average spread of an image channel is ' + str(round(mean_std)))
#%% Correct intensities (34s per condition)
max_img_corr_list = []
for condition, max_img_list_condition in zip(conditions, max_img_list):
max_img_corr_list += [ma.intensity_spread_normalisation(copy.deepcopy(max_img_list_condition), th_int, mean_std, dir_in + condition, base_name, dir_maxProj_processed + condition)]
#%% Plot histograms of corrected images (29s per condition)
for condition, max_img_corr_list_condition in zip(conditions, max_img_corr_list):
ma.plotHist_perChannel_imgset_list(max_img_corr_list_condition, dir_in + condition, dir_results_intinspect + condition, name_tag = 'corrected')
#%% Visualise raw vs. corrected images
for max_img_list_condition, max_img_corr_list_condition, condition in zip(max_img_list, max_img_corr_list, conditions):
ma.compare_imgpairs_list(max_img_list_condition, max_img_corr_list_condition , dir_in + condition, dir_results_intinspect + condition)
#%% Exchange channels: fibrillar collagen with elastin
for max_img_list_condition in max_img_corr_list:
print(len(max_img_list_condition))
for max_img_list_sample in max_img_list_condition:
print(len(max_img_list_sample))
for nr_frame,max_img in enumerate(max_img_list_sample):
max_img_list_sample[nr_frame]= max_img[:,:,[0,2,1]]
#%% Save images in cmy colourspace
for max_img_corr_list_condition, condition in zip(max_img_corr_list, conditions):
ma.rgb2cmy_list(max_img_corr_list_condition, dir_in + condition, base_name, dir_maxProj_processed_cmy + condition)
#%%% Print total computational time
end_time_global = datetime.now() - startTime
print(end_time_global)
\ No newline at end of file
"""
Helping functions for extracting features from images
"""
import skimage.io
import numpy as np
import matplotlib.pyplot as plt
import scipy.ndimage
def get_gauss_feat_im(im, sigma=1, normalize=False):
"""Gauss derivative feaures for every image pixel.
Arguments:
image: a 2D image, shape (r,c).
sigma: standard deviation for Gaussian derivatives.
normalize: flag indicating normalization of features.
Returns:
imfeat: a 3D array of size (r,c,15) with a 15-dimentional feature
vector for every image pixel.
Author: vand@dtu.dk, 2020
"""
r,c = im.shape
imfeat = np.zeros((r,c,15))
imfeat[:,:,0] = scipy.ndimage.gaussian_filter(im,sigma,order=0)
imfeat[:,:,1] = scipy.ndimage.gaussian_filter(im,sigma,order=[0,1])
imfeat[:,:,2] = scipy.ndimage.gaussian_filter(im,sigma,order=[1,0])
imfeat[:,:,3] = scipy.ndimage.gaussian_filter(im,sigma,order=[0,2])
imfeat[:,:,4] = scipy.ndimage.gaussian_filter(im,sigma,order=[1,1])
imfeat[:,:,5] = scipy.ndimage.gaussian_filter(im,sigma,order=[2,0])
imfeat[:,:,6] = scipy.ndimage.gaussian_filter(im,sigma,order=[0,3])
imfeat[:,:,7] = scipy.ndimage.gaussian_filter(im,sigma,order=[1,2])
imfeat[:,:,8] = scipy.ndimage.gaussian_filter(im,sigma,order=[2,1])
imfeat[:,:,9] = scipy.ndimage.gaussian_filter(im,sigma,order=[3,0])
imfeat[:,:,10] = scipy.ndimage.gaussian_filter(im,sigma,order=[0,4])
imfeat[:,:,11] = scipy.ndimage.gaussian_filter(im,sigma,order=[1,3])
imfeat[:,:,12] = scipy.ndimage.gaussian_filter(im,sigma,order=[2,2])
imfeat[:,:,13] = scipy.ndimage.gaussian_filter(im,sigma,order=[3,1])
imfeat[:,:,14] = scipy.ndimage.gaussian_filter(im,sigma,order=[4,0])
if normalize:
imfeat -= np.mean(imfeat, axis=(0,1))
imfeat *= (1/np.std(imfeat, axis=(0,1)))
return imfeat
def im2col(im, patch_size=[3,3], stepsize=1):
"""Rearrange image patches into columns
Arguments:
image: a 2D image, shape (r,c).
patch size: size of extracted paches.
stepsize: patch step size.
Returns:
patches: a 2D array which in every column has a patch associated
with one image pixel. For stepsize 1, number of returned column
is (r-patch_size[0]+1)*(c-patch_size[0]+1) due to bounary. The
length of columns is pathc_size[0]*patch_size[1].
"""
r,c = im.shape
s0, s1 = im.strides
nrows =r-patch_size[0]+1
ncols = c-patch_size[1]+1
shp = patch_size[0],patch_size[1],nrows,ncols
strd = s0,s1,s0,s1
out_view = np.lib.stride_tricks.as_strided(im, shape=shp, strides=strd)
return out_view.reshape(patch_size[0]*patch_size[1],-1)[:,::stepsize]
def ndim2col(im, block_size=[3,3], stepsize=1):
"""Rearrange image blocks into columns for N-D image (e.g. RGB image)"""""
if(im.ndim == 2):
return im2col(im, block_size, stepsize)
else:
r,c,l = im.shape
patches = np.zeros((l*block_size[0]*block_size[1],
(r-block_size[0]+1)*(c-block_size[1]+1)))
for i in range(l):
patches[i*block_size[0]*block_size[1]:(i+1)*block_size[0]*block_size[1],
:] = im2col(im[:,:,i],block_size,stepsize)
return patches
#%%
if __name__ == '__main__':
filename = '../example_data/training_image.png'
I = skimage.io.imread(filename)
I = I.astype(np.float)
# features based on gaussian derivatives
sigma = 1;
gf = get_gauss_feat_im(I, sigma)
fig,ax = plt.subplots(3,5)
for j in range(5):
for i in range(3):
ax[i][j].imshow(gf[:,:,5*i+j])
ax[i][j].set_title(f'layer {5*i+j}')
# features based on image patches
I = I[I.shape[0]//2:I.shape[0]//2+20,I.shape[1]//2:I.shape[1]//2+20] # smaller image such that we can see the difference in layers
pf = im2col(I,[3,3])
pf = pf.reshape((9,I.shape[0]-2,I.shape[1]-2))
fig, ax = plt.subplots()
ax.imshow(I)
fig,ax = plt.subplots(3,3)
for j in range(3):
for i in range(3):
ax[i][j].imshow(pf[3*i+j])
ax[i][j].set_title(f'layer {3*i+j}')
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Transform lif files to png for easy access and visualization
"""
import numpy as np
from readlif.reader import LifFile
import skimage.io
import os
import microscopy_analysis as ma
#%%
# folder containing lif files
in_dir = '../../../covidPatients/scans/' #data101220/'# data_October2020/sarcoidosis/' #'../../5April2020_control/' #healthy #'../../24December19_emphysema/' #sick
# output folder
out_dir = '../data_corrected_bis/covid/' #control/ #/emphysema/
file_names = ma.listdir_custom(in_dir, string_start_pos = -7, ext = 'lif')
# make output directory if it does not exist
if not os.path.exists(out_dir):
os.mkdir(out_dir)
# run through all lif files and save as png for easy access and visualization
for file in file_names:
print(file)
lif_in = LifFile(in_dir + file)
n_im = lif_in.num_images
dir_name_out = (out_dir + (file.split('.')[0]).replace(' ', '_') + '/')
if not os.path.exists(dir_name_out):
os.mkdir(dir_name_out)
for n in range(0,n_im):
print(n)
im_in = lif_in.get_image(n)
r,c,n_frames = im_in.dims[:3]
n_ch = im_in.channels
if (n_ch == 3)|(n_ch==4): #TODO: Fix support for removing channel 1 in covids. Could be generalised to select channels of interest, but we don't need this functionality now
dir_name_out_im = dir_name_out + ('frame_%02d' % n)
if not os.path.exists(dir_name_out_im):
os.mkdir(dir_name_out_im)
im_out = np.zeros((r,c,3), dtype='uint8')
for i in range(0,n_frames):
im_out[:,:,0] = np.array(im_in.get_frame(i,0,0)).transpose()
for j in range(1,3):
if n_ch == 4: #support for removing channel 1 in covids.
im_out[:,:,j] = np.array(im_in.get_frame(i,0,j+1)).transpose()
else:
im_out[:,:,j] = np.array(im_in.get_frame(i,0,j)).transpose()
skimage.io.imsave(dir_name_out_im + '/image_%02d.png' % i, im_out)
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment