added code for sharing

5ab7a51f · monj · 02babe28 · 5ab7a51f · 5ab7a51f · 5ab7a51f
Commit 5ab7a51f authored 3 years ago by monj
--- a/code/image_preprocessing.py
+++ b/code/image_preprocessing.py
+#%% #!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Preprocessing of the images
+
+1. Compute maximum projections.
+2. Explore the intensities of the maximum projection images.
+3. Correct the intensities. Normalisation across channels and images, 
+to equalise the standard deviation for intensity values > th (just considering 
+the pixels that contain protein, and not those represinting air, so as to avoid
+enhancing the background noise)
+
+Created on Wed Oct  7 10:34:52 2020
+
+@author: monj
+"""
+import os
+import microscopy_analysis as ma
+import matplotlib.pyplot as plt
+from datetime import datetime
+import numpy as np
+import skimage.io as io
+import copy
+
+# Turn interactive plotting off
+plt.ioff()
+
+plt.close('all')
+
+start_time_global = datetime.now()
+
+#%% I/O directories
+
+diseases = ['emphysema','uip','sarcoidosis','covid'] #I don't include sarcoidosis and uip, I manually create the group later, just so those don't count twice for the std estimation
+conditions = diseases[:]
+conditions.insert(0,'control')
+
+# Input directories - images are stored in folders starting with the name 'frame'
+base_dir = '../input_data/'
+dir_in = base_dir + '/raw/'
+base_name = 'frame'
+
+# Output directories: max projections and postprocessed
+dir_maxProj_raw = base_dir + 'maximum_projections/' 
+ma.make_output_dirs(dir_maxProj_raw,conditions)
+
+dir_postprocessed = base_dir + 'postprocessed_maxProjs/'
+
+dir_maxProj_processed = dir_postprocessed + 'rgb/' 
+ma.make_output_dirs(dir_maxProj_processed,conditions)
+    
+dir_maxProj_processed_cmy = dir_postprocessed + 'cmy/' 
+ma.make_output_dirs(dir_maxProj_processed_cmy,conditions)
+
+dir_results_intinspect = dir_postprocessed + 'intensity_inspection/' 
+ma.make_output_dirs(dir_results_intinspect,conditions)
+
+#%% Get maximum projection images
+
+# Automatically decide whether to recompute or read the max. projections images
+read_flag = True
+for disease in diseases:
+    disease_flag = len(os.listdir(dir_maxProj_raw + disease + '/'))>2
+    #print(disease)
+    #print(disease_flag)
+    read_flag = disease_flag&read_flag
+     
+
+# Get maximum projection images
+if not(read_flag):
+    # Compute maximum projection images (runtime ~= 135 s)
+    print('Computing maximum projection images')
+    startTime = datetime.now()
+    max_img_list = []
+    for condition in conditions:
+        max_img_list += [ma.comp_max_imgs(dir_in + condition, base_name, dir_maxProj_raw + condition)]
+    endTime = datetime.now() - startTime
+    print(endTime)
+else:
+    # Read maximum projection images (runtime ~= 20 s )
+    print('Reading maximum projection images')
+    startTime = datetime.now()
+    max_img_list = []
+    for condition in conditions:
+        max_img_list += [ma.read_max_imgs(dir_maxProj_raw + condition, base_name)]
+    endTime = datetime.now() - startTime
+    print(endTime)
+
+#%% Plot histograms of raw images (approx. 29s per condition)
+for condition, max_img_list_condition in zip(conditions, max_img_list):
+    ma.plotHist_perChannel_imgset_list(max_img_list_condition, dir_in + condition, dir_results_intinspect + condition, name_tag = 'original')
+
+#%% Measure the spread of intensities in each channel
+"""The spread of intensities is quantified by the standard deviation of the 
+intensities that are over a threshold. The threshold value is set manually 
+and it defines the minimum intensity associated with the presence of protein.
+Image regions with intensity values under the threshold are considered to 
+image air, i.e. absence of protein."""
+
+# Protein presence/absence (air) intensity threshold
+th_int = 15 #could it be set automatically based on the data?
+
+# Compute stds per image channel and, for each sample, plot intensity histograms per channel.
+std_list = []
+for condition, max_img_list_condition in zip(conditions, max_img_list):
+    std_list += [ma.comp_std_perChannel(max_img_list_condition, th_int, dir_in + condition)] 
+
+# Histogram of standard deviations (std) per image channel 
+fig, axs = plt.subplots(len(conditions),4, figsize = (4*2,len(conditions)*2), sharex = True, sharey = True)
+plt.suptitle('Distribution of intensity spread across images\n Rows: ' +  ', '.join(conditions))
+for ind_c,std_list_condition in enumerate(std_list):  
+    for ind,channel_list in enumerate(std_list_condition):
+        axs[0][ind+1].title.set_text('Channel ' + str(ind+1))
+        axs[ind_c][ind+1].hist(channel_list, bins = 50, density = True)
+  
+# Histogram of standard deviations per image  
+axs[0][0].title.set_text('All channels')
+std_list_flat = []
+for ind_c,std_list_condition in enumerate(std_list):
+    std_list_condition_flat = ma.flatten_list(std_list_condition) 
+    axs[ind_c][0].hist(std_list_condition_flat, bins = 50, density = True)    
+    std_list_flat += std_list_condition_flat
+plt.show()    
+    
+# Average intesity-spread (std) across all image channels
+mean_std = sum(std_list_flat)/len(std_list_flat)
+print('The average spread of an image channel is ' + str(round(mean_std)))
+ 
+#%% Correct intensities (34s per condition)
+max_img_corr_list = []
+for condition, max_img_list_condition in zip(conditions, max_img_list):
+    max_img_corr_list += [ma.intensity_spread_normalisation(copy.deepcopy(max_img_list_condition), th_int, mean_std, dir_in + condition, base_name, dir_maxProj_processed + condition)]
+
+#%% Plot histograms of corrected images (29s per condition)
+for condition, max_img_corr_list_condition in zip(conditions, max_img_corr_list):
+   ma.plotHist_perChannel_imgset_list(max_img_corr_list_condition, dir_in + condition, dir_results_intinspect + condition, name_tag = 'corrected')
+
+#%% Visualise raw vs. corrected images
+for max_img_list_condition, max_img_corr_list_condition, condition in zip(max_img_list, max_img_corr_list, conditions):
+    ma.compare_imgpairs_list(max_img_list_condition, max_img_corr_list_condition , dir_in + condition, dir_results_intinspect + condition)  
+
+ #%% Exchange channels: fibrillar collagen with elastin
+for max_img_list_condition in max_img_corr_list:
+    print(len(max_img_list_condition))
+    for max_img_list_sample in max_img_list_condition:
+        print(len(max_img_list_sample))
+        for nr_frame,max_img in enumerate(max_img_list_sample):
+            max_img_list_sample[nr_frame]= max_img[:,:,[0,2,1]]
+            
+#%% Save images in cmy colourspace
+for max_img_corr_list_condition, condition in zip(max_img_corr_list, conditions):
+    ma.rgb2cmy_list(max_img_corr_list_condition, dir_in + condition, base_name, dir_maxProj_processed_cmy + condition)  
+
+#%%% Print total computational time
+end_time_global = datetime.now() - startTime
+print(end_time_global)
\ No newline at end of file
--- a/code/local_features.py
+++ b/code/local_features.py
+"""
+Helping functions for extracting features from images
+"""
+
+import skimage.io
+import numpy as np
+import matplotlib.pyplot as plt
+import scipy.ndimage
+
+def get_gauss_feat_im(im, sigma=1, normalize=False):
+    """Gauss derivative feaures for every image pixel.
+    Arguments:
+        image: a 2D image, shape (r,c).
+        sigma: standard deviation for Gaussian derivatives.
+        normalize: flag indicating normalization of features.
+    Returns:
+        imfeat: a 3D array of size (r,c,15) with a 15-dimentional feature
+            vector for every image pixel.
+    Author: vand@dtu.dk, 2020
+    """
+      
+    r,c = im.shape
+    imfeat = np.zeros((r,c,15))
+    imfeat[:,:,0] = scipy.ndimage.gaussian_filter(im,sigma,order=0)
+    imfeat[:,:,1] = scipy.ndimage.gaussian_filter(im,sigma,order=[0,1])
+    imfeat[:,:,2] = scipy.ndimage.gaussian_filter(im,sigma,order=[1,0])
+    imfeat[:,:,3] = scipy.ndimage.gaussian_filter(im,sigma,order=[0,2])
+    imfeat[:,:,4] = scipy.ndimage.gaussian_filter(im,sigma,order=[1,1])
+    imfeat[:,:,5] = scipy.ndimage.gaussian_filter(im,sigma,order=[2,0])
+    imfeat[:,:,6] = scipy.ndimage.gaussian_filter(im,sigma,order=[0,3])
+    imfeat[:,:,7] = scipy.ndimage.gaussian_filter(im,sigma,order=[1,2])
+    imfeat[:,:,8] = scipy.ndimage.gaussian_filter(im,sigma,order=[2,1])
+    imfeat[:,:,9] = scipy.ndimage.gaussian_filter(im,sigma,order=[3,0])
+    imfeat[:,:,10] = scipy.ndimage.gaussian_filter(im,sigma,order=[0,4])
+    imfeat[:,:,11] = scipy.ndimage.gaussian_filter(im,sigma,order=[1,3])
+    imfeat[:,:,12] = scipy.ndimage.gaussian_filter(im,sigma,order=[2,2])
+    imfeat[:,:,13] = scipy.ndimage.gaussian_filter(im,sigma,order=[3,1])
+    imfeat[:,:,14] = scipy.ndimage.gaussian_filter(im,sigma,order=[4,0])
+    
+    if normalize:
+        imfeat -= np.mean(imfeat, axis=(0,1))
+        imfeat *= (1/np.std(imfeat, axis=(0,1)))
+    
+    return imfeat
+
+def im2col(im, patch_size=[3,3], stepsize=1):
+    """Rearrange image patches into columns
+    Arguments:
+        image: a 2D image, shape (r,c).
+        patch size: size of extracted paches.
+        stepsize: patch step size.
+    Returns:
+        patches: a 2D array which in every column has a patch associated 
+            with one image pixel. For stepsize 1, number of returned column 
+            is (r-patch_size[0]+1)*(c-patch_size[0]+1) due to bounary. The 
+            length of columns is pathc_size[0]*patch_size[1].
+    """
+    
+    r,c = im.shape
+    s0, s1 = im.strides    
+    nrows =r-patch_size[0]+1
+    ncols = c-patch_size[1]+1
+    shp = patch_size[0],patch_size[1],nrows,ncols
+    strd = s0,s1,s0,s1
+
+    out_view = np.lib.stride_tricks.as_strided(im, shape=shp, strides=strd)
+    return out_view.reshape(patch_size[0]*patch_size[1],-1)[:,::stepsize]
+
+
+def ndim2col(im, block_size=[3,3], stepsize=1):
+    """Rearrange image blocks into columns for N-D image (e.g. RGB image)"""""
+    if(im.ndim == 2):
+        return im2col(im, block_size, stepsize)
+    else:
+        r,c,l = im.shape
+        patches = np.zeros((l*block_size[0]*block_size[1],
+                            (r-block_size[0]+1)*(c-block_size[1]+1)))
+        for i in range(l):
+            patches[i*block_size[0]*block_size[1]:(i+1)*block_size[0]*block_size[1],
+                    :] = im2col(im[:,:,i],block_size,stepsize)
+        return patches
+
+#%%
+
+if __name__ == '__main__':
+    
+    filename = '../example_data/training_image.png'
+    I = skimage.io.imread(filename)
+    I = I.astype(np.float)
+    
+    
+    # features based on gaussian derivatives
+    sigma = 1;
+    gf = get_gauss_feat_im(I, sigma)
+    
+    fig,ax = plt.subplots(3,5)
+    for j in range(5):
+        for i in range(3):
+            ax[i][j].imshow(gf[:,:,5*i+j])
+            ax[i][j].set_title(f'layer {5*i+j}')
+            
+            
+    # features based on image patches
+    I = I[I.shape[0]//2:I.shape[0]//2+20,I.shape[1]//2:I.shape[1]//2+20] # smaller image such that we can see the difference in layers
+    pf = im2col(I,[3,3])
+    pf = pf.reshape((9,I.shape[0]-2,I.shape[1]-2))
+    
+    fig, ax = plt.subplots()
+    ax.imshow(I)
+            
+    fig,ax = plt.subplots(3,3)
+    for j in range(3):
+        for i in range(3):
+            ax[i][j].imshow(pf[3*i+j])
+            ax[i][j].set_title(f'layer {3*i+j}')
+   
+    
--- a/code/lung_feature_patch_allPatients_singleProtein.py
+++ b/code/lung_feature_patch_allPatients_singleProtein.py
--- a/code/lung_feature_patch_allPatients_threeProtein.py
+++ b/code/lung_feature_patch_allPatients_threeProtein.py
--- a/code/lung_feature_patch_allPatients_twoProtein.py
+++ b/code/lung_feature_patch_allPatients_twoProtein.py
--- a/code/lung_lif_to_png.py
+++ b/code/lung_lif_to_png.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Transform lif files to png for easy access and visualization
+"""
+
+
+import numpy as np
+from readlif.reader import LifFile
+import skimage.io
+import os
+import microscopy_analysis as ma
+
+#%%
+# folder containing lif files
+in_dir = '../../../covidPatients/scans/' #data101220/'# data_October2020/sarcoidosis/' #'../../5April2020_control/' #healthy #'../../24December19_emphysema/' #sick 
+# output folder
+out_dir = '../data_corrected_bis/covid/' #control/ #/emphysema/
+file_names = ma.listdir_custom(in_dir, string_start_pos = -7, ext = 'lif')
+
+# make output directory if it does not exist
+if not os.path.exists(out_dir):
+    os.mkdir(out_dir)
+
+# run through all lif files and save as png for easy access and visualization
+for file in file_names:
+    print(file)
+    lif_in = LifFile(in_dir + file)
+    n_im = lif_in.num_images
+    dir_name_out = (out_dir + (file.split('.')[0]).replace(' ', '_') + '/')
+    if not os.path.exists(dir_name_out):
+        os.mkdir(dir_name_out)
+    for n in range(0,n_im):
+        print(n)
+        im_in = lif_in.get_image(n)
+        r,c,n_frames = im_in.dims[:3]
+        n_ch = im_in.channels
+        if (n_ch == 3)|(n_ch==4): #TODO: Fix support for removing channel 1 in covids. Could be generalised to select channels of interest, but we don't need this functionality now
+            dir_name_out_im = dir_name_out + ('frame_%02d' % n)
+            if not os.path.exists(dir_name_out_im):
+                os.mkdir(dir_name_out_im)
+            im_out = np.zeros((r,c,3), dtype='uint8')
+            for i in range(0,n_frames):
+                im_out[:,:,0] = np.array(im_in.get_frame(i,0,0)).transpose()
+                for j in range(1,3):
+                    if n_ch == 4: #support for removing channel 1 in covids.
+                        im_out[:,:,j] = np.array(im_in.get_frame(i,0,j+1)).transpose()
+                    else:
+                        im_out[:,:,j] = np.array(im_in.get_frame(i,0,j)).transpose()
+                skimage.io.imsave(dir_name_out_im + '/image_%02d.png' % i, im_out)
+        
--- a/code/microscopy_analysis.py
+++ b/code/microscopy_analysis.py