diff --git a/code/image_preprocessing.py b/code/image_preprocessing.py deleted file mode 100755 index 266b43113d8a20c8e52a62ca5bd64fab052b6348..0000000000000000000000000000000000000000 --- a/code/image_preprocessing.py +++ /dev/null @@ -1,143 +0,0 @@ -#%% #!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Preprocessing of the images - -1. Compute maximum projections. -2. Explore the intensities of the maximum projection images. -3. (optional) Correct the intensities. Normalisation across channels and images. -Equalise the standard deviation for intensity values that contain structures. -The air in the backround is removed with a manual threshold which is constant across -images and channels. - -Created on Wed Oct 7 10:34:52 2020 - -@author: monj -""" -import os -import microscopy_analysis as ma -import matplotlib.pyplot as plt -from datetime import datetime -import numpy as np -import skimage.io as io -import copy - -# Turn interactive plotting off -plt.ioff() - -plt.close('all') - -start_time_global = datetime.now() - -#%% I/O directories - -diseases = ['emphysema','diseased', 'sarcoidosis'] -conditions = diseases[:] -conditions.insert(0,'control') - -version = 'corrected_bis' #data identifier: My versions were called: blank, corrected or corrected_bis -preprocessing = 'preprocessed_ignback/' - -# input directories - images are stored in folders starting with the name 'frame' -dir_in = '../data_' + version+'/' -base_name = 'frame' - -#output directories: max projections and intensity inspection -dir_maxProj = '../maxProjImages_'+version+'/' - -dir_maxProj_raw = dir_maxProj + 'raw/' -ma.make_output_dirs(dir_maxProj_raw,conditions) - -dir_maxProj_processed = dir_maxProj + preprocessing #rerun just to make sure not to muck up the results -ma.make_output_dirs(dir_maxProj_processed,conditions) - -dir_results_intinspect = dir_maxProj + preprocessing + 'intensity_inspection/' #rerun just to make sure not to muck up the results -ma.make_output_dirs(dir_results_intinspect,conditions) - -#%% Get maximum projection images - -if len(os.listdir(dir_maxProj_raw + 'control/'))<2: - #Compute maximum projection images (runtime ~= 135 s) - print('Computing maximum projection images') - startTime = datetime.now() - max_img_list = [] - for condition in conditions: - max_img_list += [ma.comp_max_imgs(dir_in + condition, base_name, dir_maxProj_raw + condition)] - endTime = datetime.now() - startTime - print(endTime) -else: - #Read maximum projection images (runtime ~= 20 s ) - print('Reading maximum projection images') - startTime = datetime.now() - max_img_list = [] - for condition in conditions: - max_img_list += [ma.read_max_imgs(dir_maxProj_raw + condition, base_name)] - endTime = datetime.now() - startTime - print(endTime) - -#%% Plot histograms of raw images - -#Takes approx. 29s per condition -for condition, max_img_list_condition in zip(conditions, max_img_list): - ma.plotHist_perChannel_imgset_list(max_img_list_condition, dir_in + condition, dir_results_intinspect + condition, name_tag = 'original') - -#%% Measure the spread of intensities in each channel -"""The spread of intensities is quantified by the standard deviation of the -intensities that are over a threshold. The threshold value is set manually -and it defines the minimum intensity value that is associated with the -presence of protein. Image regions with intensity values under the threshold -are considered as air, i.e. absence of protein.""" - -#Protein presence/absence (air) intensity threshold -th_int = 5 #could it be set automatically based on the data? - -#Compute stds per image channel and, -#for each sample, make a figure with the intensity histograms per image channel. -std_list = [] -for condition, max_img_list_condition in zip(conditions, max_img_list): - std_list += [ma.comp_std_perChannel(max_img_list_condition, th_int, dir_in + condition)] - -# histogram of the intensity-spread per image channel -fig, axs = plt.subplots(len(conditions),4, figsize = (4*2,len(conditions)*2), sharex = True, sharey = True) -plt.suptitle('Distribution of intensity spread across images\n Rows: ' + ', '.join(conditions)) -for ind_c,std_list_condition in enumerate(std_list): - for ind,channel_list in enumerate(std_list_condition): - axs[0][ind+1].title.set_text('Channel ' + str(ind+1)) - axs[ind_c][ind+1].hist(channel_list, bins = 50, density = True) - -# histogram of the intensity-spread per image -axs[0][0].title.set_text('All channels') -std_list_flat = [] -for ind_c,std_list_condition in enumerate(std_list): - std_list_condition_flat = ma.flatten_list(std_list_condition) - axs[ind_c][0].hist(std_list_condition_flat, bins = 50, density = True) - std_list_flat += std_list_condition_flat -plt.show() - -#Average intesity-spread across al image channels -mean_std = sum(std_list_flat)/len(std_list_flat) -print('The average spread of an image channel is ' + str(round(mean_std))) - -#%% Correct intensities -#Consider not using th_int when correcting, only when computing the spread - -#Takes 34s per conditoin -max_img_corr_list = [] -for condition, max_img_list_condition in zip(conditions, max_img_list): - max_img_corr_list += [ma.intensity_spread_normalisation(copy.deepcopy(max_img_list_condition), th_int, mean_std, dir_in + condition, base_name, dir_maxProj_processed + condition)] - -#%% Plot histograms of corrected images - -#Takes 29s -for condition, max_img_corr_list_condition in zip(conditions, max_img_corr_list): - ma.plotHist_perChannel_imgset_list(max_img_corr_list_condition, dir_in + condition, dir_results_intinspect + condition, name_tag = 'corrected') - -#%% Visualise raw vs. corrected images - -for max_img_list_condition, max_img_corr_list_condition, condition in zip(max_img_list, max_img_corr_list, conditions): - ma.compare_imgpairs_list(max_img_list_condition, max_img_corr_list_condition , dir_in + condition, dir_results_intinspect + condition) - -#%%% Print total computational time - -end_time_global = datetime.now() - startTime -print(end_time_global) \ No newline at end of file diff --git a/code/local_features.py b/code/local_features.py deleted file mode 100644 index 11b5b3b8f8e1ecdb0cd2941d0dc5b050d040a4d6..0000000000000000000000000000000000000000 --- a/code/local_features.py +++ /dev/null @@ -1,117 +0,0 @@ -""" -Helping functions for extracting features from images -""" - -import skimage.io -import numpy as np -import matplotlib.pyplot as plt -import scipy.ndimage - -def get_gauss_feat_im(im, sigma=1, normalize=False): - """Gauss derivative feaures for every image pixel. - Arguments: - image: a 2D image, shape (r,c). - sigma: standard deviation for Gaussian derivatives. - normalize: flag indicating normalization of features. - Returns: - imfeat: a 3D array of size (r,c,15) with a 15-dimentional feature - vector for every image pixel. - Author: vand@dtu.dk, 2020 - """ - - r,c = im.shape - imfeat = np.zeros((r,c,15)) - imfeat[:,:,0] = scipy.ndimage.gaussian_filter(im,sigma,order=0) - imfeat[:,:,1] = scipy.ndimage.gaussian_filter(im,sigma,order=[0,1]) - imfeat[:,:,2] = scipy.ndimage.gaussian_filter(im,sigma,order=[1,0]) - imfeat[:,:,3] = scipy.ndimage.gaussian_filter(im,sigma,order=[0,2]) - imfeat[:,:,4] = scipy.ndimage.gaussian_filter(im,sigma,order=[1,1]) - imfeat[:,:,5] = scipy.ndimage.gaussian_filter(im,sigma,order=[2,0]) - imfeat[:,:,6] = scipy.ndimage.gaussian_filter(im,sigma,order=[0,3]) - imfeat[:,:,7] = scipy.ndimage.gaussian_filter(im,sigma,order=[1,2]) - imfeat[:,:,8] = scipy.ndimage.gaussian_filter(im,sigma,order=[2,1]) - imfeat[:,:,9] = scipy.ndimage.gaussian_filter(im,sigma,order=[3,0]) - imfeat[:,:,10] = scipy.ndimage.gaussian_filter(im,sigma,order=[0,4]) - imfeat[:,:,11] = scipy.ndimage.gaussian_filter(im,sigma,order=[1,3]) - imfeat[:,:,12] = scipy.ndimage.gaussian_filter(im,sigma,order=[2,2]) - imfeat[:,:,13] = scipy.ndimage.gaussian_filter(im,sigma,order=[3,1]) - imfeat[:,:,14] = scipy.ndimage.gaussian_filter(im,sigma,order=[4,0]) - - if normalize: - imfeat -= np.mean(imfeat, axis=(0,1)) - imfeat *= (1/np.std(imfeat, axis=(0,1))) - - return imfeat - -def im2col(im, patch_size=[3,3], stepsize=1): - """Rearrange image patches into columns - Arguments: - image: a 2D image, shape (r,c). - patch size: size of extracted paches. - stepsize: patch step size. - Returns: - patches: a 2D array which in every column has a patch associated - with one image pixel. For stepsize 1, number of returned column - is (r-patch_size[0]+1)*(c-patch_size[0]+1) due to bounary. The - length of columns is pathc_size[0]*patch_size[1]. - """ - - r,c = im.shape - s0, s1 = im.strides - nrows =r-patch_size[0]+1 - ncols = c-patch_size[1]+1 - shp = patch_size[0],patch_size[1],nrows,ncols - strd = s0,s1,s0,s1 - - out_view = np.lib.stride_tricks.as_strided(im, shape=shp, strides=strd) - return out_view.reshape(patch_size[0]*patch_size[1],-1)[:,::stepsize] - - -def ndim2col(im, block_size=[3,3], stepsize=1): - """Rearrange image blocks into columns for N-D image (e.g. RGB image)""""" - if(im.ndim == 2): - return im2col(im, block_size, stepsize) - else: - r,c,l = im.shape - patches = np.zeros((l*block_size[0]*block_size[1], - (r-block_size[0]+1)*(c-block_size[1]+1))) - for i in range(l): - patches[i*block_size[0]*block_size[1]:(i+1)*block_size[0]*block_size[1], - :] = im2col(im[:,:,i],block_size,stepsize) - return patches - -#%% - -if __name__ == '__main__': - - filename = '../example_data/training_image.png' - I = skimage.io.imread(filename) - I = I.astype(np.float) - - - # features based on gaussian derivatives - sigma = 1; - gf = get_gauss_feat_im(I, sigma) - - fig,ax = plt.subplots(3,5) - for j in range(5): - for i in range(3): - ax[i][j].imshow(gf[:,:,5*i+j]) - ax[i][j].set_title(f'layer {5*i+j}') - - - # features based on image patches - I = I[I.shape[0]//2:I.shape[0]//2+20,I.shape[1]//2:I.shape[1]//2+20] # smaller image such that we can see the difference in layers - pf = im2col(I,[3,3]) - pf = pf.reshape((9,I.shape[0]-2,I.shape[1]-2)) - - fig, ax = plt.subplots() - ax.imshow(I) - - fig,ax = plt.subplots(3,3) - for j in range(3): - for i in range(3): - ax[i][j].imshow(pf[3*i+j]) - ax[i][j].set_title(f'layer {3*i+j}') - - diff --git a/code/lung_feature_patch_allPatients.py b/code/lung_feature_patch_allPatients.py deleted file mode 100755 index 8b8e9d42e634d5b23c110bbcc33ef2abbcfc9158..0000000000000000000000000000000000000000 --- a/code/lung_feature_patch_allPatients.py +++ /dev/null @@ -1,775 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Script for extracting patches from max-projection images and visualizing their clustering - -Basic functionality by Anders B. Dahl. -- Compute maximum projection images -- Extract patches -- Cluster patches -- Build assignment histograms -- Build probability images from the ratio between the histogram for healthy and diseased. - -Extended functionality by Monica J. Emerson. -- Adapted the above to integrate image normalisation, e.g. modified max proj.image functions. -- Created a pipeline to analyse multiple samples/patients. -- Study of several diseases. -- Added functionality for supporting the analysis of different data versions. -- Computation of health scores per image, sample and patient. -- Visualisation of cluster centers as a grid. -- Boxplots to compare probabilities across samples and to clinical values. -- Normalisation of int_sum_list across images and channels. -- Possibility to ignore the background (air phase). -- Visualisation of assignment images to inspect results and support the development of the approach to ignore background. -- Implementation and investigation of feature variations (colour, bnw, bnw+colour). -- Study of the parameters (nr clusters and scale - relative patch/image size) -- Extended visualisation of cluster centers to support the comparison across diseases and parameters. - a) Visualisation of cluster centers split into channels. - b) Compute a population (p) and condition probability (c) value for each cluster. - c) Identify the presence of "weak" clusters. If they exist, rerun kmeans. - d) Select and visualise characteristic clusters for the conditions based on p and c. - e) Plot all clusters in the population/condition probability space. - f) Order cluster centers according to condition probability -""" - -import numpy as np -import matplotlib.pyplot as plt -import sklearn.cluster -import microscopy_analysis as ma -import skimage.io -import skimage.transform - -import os -from datetime import datetime -import sys -from matplotlib.offsetbox import OffsetImage, AnnotationBbox -from math import ceil - -startTime = datetime.now() - -plt.close('all') - -sc_fac = 0.25 #25 #25 #0.5 # scaling factor of the image -patch_size = 17 -nr_clusters = 100 # number of clusters -colour_mode = 'colour' #'bnw' #colour - -#%% Directories - -version = 'corrected_bis' -preprocessing = '/preprocessed_ignback/' #''(none) -disease = 'diseased' #diseased (mix, 2 of each condition)' #''emphysema' 'sarcoidosis' -conditions = [disease] -conditions.insert(0,'control') - -# input directories - images start with the name 'frame' -dir_in = '../maxProjImages_'+version + preprocessing -# dir_control = dir_in + 'control/' # 200401_109a/' -# dir_sick = dir_in + disease + '/' #191216_100a/' -base_name = 'frame' - -#output directories -dir_results = '../results_monj/patches/data_' + version+'/' #rerun just to make sure not to muck up the results -os.makedirs(dir_results, exist_ok = True) - -dir_probs = dir_results + disease + '_' + colour_mode + '_%dclusters_%ddownscale_%dpatchsize/'%(nr_clusters,1/sc_fac,patch_size) -ma.make_output_dirs(dir_probs, conditions) - -dir_probs_noBack = dir_probs + 'noBackground/' -ma.make_output_dirs(dir_probs_noBack, conditions) - -# if not os.path.exists(dir_probs): -# os.mkdir(dir_probs) -# os.mkdir(dir_probs + 'control/') -# os.mkdir(dir_probs + disease + '/') -# os.mkdir(dir_probs + 'control_withBackground/') -# os.mkdir(dir_probs + disease + '_withBackground/') - -#%% Read (preprocessed) maximum corrected images -#Read maximum projection images (runtime ~= 20 s ) -print('Reading maximum projection images') - -max_img_list_control = ma.read_max_imgs(dir_in + 'control', base_name) -max_img_list_sick = ma.read_max_imgs(dir_in + disease, base_name) - -#TO DO: Consider removing colour mode and rescaling from the read function. - -#%% Prepare maximum projection images for feature extraction - -#Flatten and merge conditions -max_img_list_control_flat = ma.flatten_list(max_img_list_control) -max_img_list_sick_flat = ma.flatten_list(max_img_list_sick) - -#Rescale -max_img_list_control_processed = [] -for max_img in ma.flatten_list(max_img_list_control) : - # if colour_mode == 'bnw': - # max_img_list_flat_processed += [skimage.transform.rescale(skimage.color.rgb2gray(max_img), sc_fac, preserve_range = True).astype('uint8')] - # else: - max_img_list_control_processed += [skimage.transform.rescale(max_img, sc_fac, preserve_range = True, multichannel=True).astype('uint8')] - -max_img_list_sick_processed = [] -for max_img in ma.flatten_list(max_img_list_sick) : - max_img_list_sick_processed += [skimage.transform.rescale(max_img, sc_fac, preserve_range = True, multichannel=True).astype('uint8')] - -#%% Compute patches -patch_feat_list_control = [] -for max_img in max_img_list_control_processed: - patch_feat_list_control += [ma.ndim2col_pad(max_img, (patch_size, patch_size),norm=False).transpose()] - -patch_feat_list_sick = [] -for max_img in max_img_list_sick_processed: - patch_feat_list_sick += [ma.ndim2col_pad(max_img, (patch_size, patch_size),norm=False).transpose()] - -patch_feat_total = patch_feat_list_control + patch_feat_list_sick - -#TO DO: Consider turning patches to bnw here instead -#so we can read the protein content for each patch as the sum of the patch int_sum_list - -#%% features for clustering -nr_keep = 10000 # number of features randomly picked for clustering -n_im = len(patch_feat_total) -feat_dim = patch_feat_total[0].shape[1] -patch_feat_to_cluster = np.zeros((nr_keep*n_im,feat_dim)) -f = 0 -for patch_feat in patch_feat_total: - keep_indices = np.random.permutation(np.arange(patch_feat.shape[0]))[:nr_keep] - patch_feat_to_cluster[f:f+nr_keep,:] = patch_feat[keep_indices,:] - f += nr_keep - -#%% k-means clustering -batch_size = 1000 -th_nr_pathesINcluster = 10 - -#If cluster centers were already computed, use those -if os.path.exists(dir_probs + 'array_cluster_centers_'+colour_mode+'.npy'): - cluster_centers = np.load(dir_probs + 'array_cluster_centers_'+colour_mode+'.npy') - #kmeans = sklearn.cluster.MiniBatchKMeans(n_clusters=nr_clusters, init = cluster_centers, batch_size = batch_size) - #kmeans.fit(patch_feat_to_cluster) - kmeans = sklearn.cluster.MiniBatchKMeans(n_clusters=nr_clusters, batch_size = batch_size) - kmeans.cluster_centers_ = cluster_centers - -#Compute cluster centers and save if there is no weak clusters -else: - kmeans = sklearn.cluster.MiniBatchKMeans(n_clusters=nr_clusters, batch_size = batch_size) - kmeans.fit(patch_feat_to_cluster) - - #Nr. patches that have contributed to each cluster - features_in_cluster = [] - for cluster in range(0,nr_clusters): - features_in_cluster += [[ind for ind,i in enumerate(kmeans.labels_) if i==cluster]] - nr_feat_in_cluster = [len(i) for i in features_in_cluster] - - #Weak clusters are those composed by very few patches - nr_weakClusters = len([1 for i in nr_feat_in_cluster if i<th_nr_pathesINcluster]) - - #If there are weak clusters, the clustering should be recomputed - if nr_weakClusters!=0: - sys.exit(str(nr_weakClusters)+ " clusters composed of less than "+str(th_nr_pathesINcluster)+" images") - else: - np.save(dir_probs + 'array_cluster_centers_'+colour_mode+'.npy', kmeans.cluster_centers_) # .npy extension is added if not given - -#%% Plot all cluster centers (grid view) - -#grid dimensions -size_x = round(nr_clusters**(1/2)) -size_y = ceil(nr_clusters/size_x) - -#figure format -w, h = plt.figaspect(size_x/size_y) -fig, axs = plt.subplots(size_x,size_y, figsize=(w,h), sharex=True, sharey=True) - -print('Grid size: ', size_x, size_y, 'Figure size: ', w, h) - -#plot cluster centers -int_sum_list = [] -ax_list = axs.ravel() -for ind in np.arange(0,nr_clusters): - if colour_mode == 'bnw': #in bnw + colour give the clusters a uniform colour - cluster_centre = np.reshape(kmeans.cluster_centers_[ind,:],(patch_size,patch_size)) - int_sum_list += [sum((cluster_centre).ravel())] - ax_list[ind].imshow(cluster_centre.astype('uint8'),cmap='gray') - else: - cluster_centre = np.transpose((np.reshape(kmeans.cluster_centers_[ind,:],(3,patch_size,patch_size))),(1,2,0)) - int_sum_list += [sum((np.max(cluster_centre,2)).ravel())] - ax_list[ind].imshow(cluster_centre.astype('uint8')) - -plt.setp(axs, xticks=[], yticks=[]) -plt.savefig(dir_probs + 'clustercenters_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) - -#%% Histograms for background, healthy and sick -hist_control, assignment_list_control= ma.compute_assignment_hist(patch_feat_list_control, kmeans) -hist_sick, assignment_list_sick = ma.compute_assignment_hist(patch_feat_list_sick, kmeans) - -#%% show bar plot of healthy and sick - -fig, ax = plt.subplots(1,1) -ax.bar(np.array(range(0,nr_clusters))-0.25, hist_control, width = 0.5, label='Control', color = 'r') -ax.bar(np.array(range(0,nr_clusters))+0.25, hist_sick, width = 0.5, label='Sick', color = 'b') -ax.legend() -plt.savefig(dir_probs + 'assignmentHistograms_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) - -#%% Compute populated, occurrence and importance cluster measures from the histogram - -#OCCURRENCE RATIO: How more often does it occur in one of the conditions -occurrence_ratio = np.empty((nr_clusters)) -ind_control = hist_control>hist_sick -ind_sick = ~ind_control -control_cond = (hist_sick!=0) & ind_control -occurrence_ratio[control_cond] = hist_control[control_cond]/hist_sick[control_cond] -sick_cond = (hist_control!=0) & ind_sick -occurrence_ratio[sick_cond] = -hist_sick[sick_cond]/hist_control[sick_cond] - -#POPULATED How trustworthy is the cluster? -populated = hist_control+hist_sick - -weights = populated/populated.max() - -#Cluster IMPORTANCE: occurrence weighed by POPULATED -importance = ((2*np.exp(populated/populated.max())-1)*occurrence_ratio).reshape((size_x,size_y)) -importance[importance>2*importance.std()] = 2*importance.std() -importance[importance<-2*importance.std()] = -2*importance.std() - - -#%% Plot measures as images - -#OCCURRENCE RATIO -fig, ax = plt.subplots(figsize=(5,5)) -plt.imshow(occurrence_ratio.reshape((size_x,size_y)),cmap = 'PiYG') -plt.colorbar() -plt.setp(axs, xticks=[], yticks=[]) -plt.title('Condition dominance \n Dominant condition (+ control, - disease)') - -#POPULATED -fig, ax = plt.subplots(figsize=(5,5)) -plt.imshow(populated.reshape((size_x,size_y)),cmap = 'PiYG') -plt.colorbar() -plt.setp(axs, xticks=[], yticks=[]) -plt.title('Cluster population ratio \n Distribution of patches across clusters') - -#Cluster IMPORTANCE: occurrence times POPULATED -fig, ax = plt.subplots(figsize=(5,5)) -plt.imshow((populated*occurrence_ratio).reshape((size_x,size_y)),cmap = 'PiYG') -plt.colorbar() -plt.setp(ax, xticks=[], yticks=[]) -plt.title('Cluster importance \n Occurrence ratio*population') - -#Saturated cluster IMPORTANCE: occurrence times POPULATED Saturated -fig, ax = plt.subplots(figsize=(5,5)) - -plt.imshow(importance, cmap = 'PiYG', vmin=-2*importance.std(), vmax=2*importance.std()) -colorbar = plt.colorbar() -limit_cmap = max(occurrence_ratio) -ticks = np.linspace(-limit_cmap,limit_cmap,len(colorbar.get_ticks())) -tick_labels = [str(i) for i in ticks.tolist()] -colorbar.set_ticklabels(np.linspace(-limit_cmap,limit_cmap,len(colorbar.get_ticks()))) -plt.setp(ax, xticks=[], yticks=[]) -plt.title('Cluster importance for the condition \n Healthy (green), ' + str(disease) + ' (pink)') - -plt.savefig(dir_probs + 'clusterCentreImportance_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) - -#%% Plot grid of ordered clusters - -#Order the clusters according to importance -clusters_control_importance_sorted = [np.arange(0,100)[i] for i in np.argsort(importance.flatten()) if (importance.flatten())[i]>0] -clusters_control_importance_sorted.reverse() - -clusters_sick_importance_sorted = [np.arange(0,100)[i] for i in np.argsort(importance.flatten()) if (importance.flatten())[i]<0] - -clusters_importance_sorted = [np.arange(0,100)[i] for i in np.argsort(importance.flatten())] -clusters_importance_sorted.reverse() - -occurrence_ratio_sorted = [occurrence_ratio[i] for i in np.argsort(importance.flatten()) ] -occurrence_ratio_sorted.reverse() - -ma.plot_grid_cluster_centers(kmeans.cluster_centers_, clusters_importance_sorted, patch_size, colour_mode = 'colour', occurrence = occurrence_ratio_sorted) - -plt.suptitle('Cluster centers \n from more characteristic of control (0, ' + str(len(clusters_control_importance_sorted)) + ') to more characteristic of sick') -plt.savefig(dir_probs + 'clustercenters_sorted_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) - -#%% Plot cluster centers in the 2d populated/occurrence space - -fig_control, ax_control = plt.subplots(figsize=(15,15)) -ax_control.scatter(populated[occurrence_ratio>1], occurrence_ratio[occurrence_ratio>1]) -ax_control.set_ylim(0.8,7) -ax_control.set_xlim(0,0.08) - -fig_sick, ax_sick = plt.subplots(figsize=(15,15)) -ax_sick.scatter(populated[occurrence_ratio<1], -occurrence_ratio[occurrence_ratio<1]) -ax_sick.set_ylim(0.8,7) -ax_sick.set_xlim(0,0.08) - -for x0, y0, cluster_nr in zip(populated, occurrence_ratio, np.arange(0,nr_clusters)): - if colour_mode == 'bnw': - cluster_centre = np.reshape(kmeans.cluster_centers_[cluster_nr,:],(patch_size,patch_size)) - else: - cluster_centre = np.transpose((np.reshape(kmeans.cluster_centers_[cluster_nr,:],(3,patch_size,patch_size))),(1,2,0)) - if y0>0: - if colour_mode == 'bnw': - ab = AnnotationBbox(OffsetImage(cluster_centre.astype('uint8'),cmap='gray'), (x0, y0), frameon=False) - else: - ab = AnnotationBbox(OffsetImage(cluster_centre.astype('uint8')), (x0, y0), frameon=False) - ax_control.add_artist(ab) - plt.savefig(dir_probs + 'controlClustercenters_2Dspace_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) - else: - if colour_mode == 'bnw': - ab = AnnotationBbox(OffsetImage(cluster_centre.astype('uint8'),cmap='gray'), (x0, -y0), frameon=False) - else: - ab = AnnotationBbox(OffsetImage(cluster_centre.astype('uint8')), (x0, -y0), frameon=False) - ax_sick.add_artist(ab) - plt.savefig(dir_probs + 'sickClustercenters_2Dspace_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) - - -#%% Find background clusters -th_back = 10000 -#background clusters -clusters_background_intBased = [i for i in range(len(int_sum_list)) if int_sum_list[i] < 8000] -#clusters_background_annotBased = [ind for ind, value in enumerate(hist_background) if value>0] - -clusters_background = clusters_background_intBased -#clusters_background = list(set(clusters_background_intBased) | set(clusters_background_annotBased)) -print('Background clusters'+str(clusters_background)) - -#%% Plot grid of ordered, non-background clusters that are populated over a thresh -th_populated = 0.4/nr_clusters -th_occurr = 1.25 - -#control -clusters_control_importance_sorted_noBackground = [i for i in clusters_control_importance_sorted if (i not in clusters_background)&(populated[i]>th_populated)&(occurrence_ratio[i]>th_occurr)] - -ma.plot_grid_cluster_centers(kmeans.cluster_centers_, clusters_control_importance_sorted_noBackground, patch_size, colour_mode = 'colour') - -plt.suptitle('Characteristic clusters for control, excluding background, \n from most important to least') -plt.savefig(dir_probs_noBack + 'clustercenters_control_sorted_noBack%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) - -#disease -clusters_sick_importance_sorted_noBackground = [i for i in clusters_sick_importance_sorted if (i not in clusters_background)&(populated[i]>th_populated)&(abs(occurrence_ratio[i])>th_occurr)] - -ma.plot_grid_cluster_centers(kmeans.cluster_centers_, clusters_sick_importance_sorted_noBackground, patch_size, colour_mode = 'colour') - -plt.suptitle('Charactertistic clusters for ' + disease + ', excluding background, \n from most important to least') -plt.savefig(dir_probs_noBack + 'clustercenters_sick_sorted_noBack%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) - -# #%% Find characteristic clusters -# th_proportion = 2#2 #2.4 -# th_populated_condition = 0.01#0.005#0.015 - -# clusters_sick = (hist_sick>th_populated_condition)&(hist_sick>th_proportion*hist_control) -# clusters_sick = [ind for ind,value in enumerate(clusters_sick) if value == True] -# clusters_control = (hist_control>th_populated_condition)&(hist_control>th_proportion*hist_sick) -# clusters_control = [ind for ind,value in enumerate(clusters_control) if value == True] - -# #eliminate background clusters if contained here -# clusters_control = [i for i in clusters_control if i not in clusters_background] -# clusters_sick = [i for i in clusters_sick if i not in clusters_background] - -# print('Clusters characteristic of the ' + disease + ' tissue',clusters_sick) -# print('Clusters characteristic of the control tissue',clusters_control) - - -#%% Plot centers of the characteristic clusters -nr_clusters_display = 10 -clusters_control = clusters_control_importance_sorted_noBackground[1:10] - -#control clusters and contrast enhanced -cluster_centers_control = kmeans.cluster_centers_[clusters_control] -fig, axs = plt.subplots(1,len(clusters_control), figsize=(len(clusters_control)*3,3), sharex=True, sharey=True) -fig.suptitle('Cluster centers for control') -if colour_mode!='bnw': - fig_split, axs_split = plt.subplots(3,len(clusters_control), figsize=(len(clusters_control)*3,9), sharex=True, sharey=True) - fig_split.suptitle('Control centers split channels (contrast enhanced)') -for l in np.arange(0,len(clusters_control)): - if colour_mode == 'bnw': - cluster_centre = np.reshape(cluster_centers_control[l,:],(patch_size,patch_size)) - axs[l].imshow(cluster_centre.astype(np.uint8),cmap='gray') - axs[l].axis('off') - axs[l].set_title(clusters_control[l]) - else: - cluster_centre = np.transpose((np.reshape(cluster_centers_control[l,:],(3,patch_size,patch_size))),(1,2,0)) - axs_split[0][l].imshow(cluster_centre[...,0].astype(np.uint8),cmap='gray') - axs_split[0][l].axis('off') - axs_split[0][l].set_title(clusters_control[l]) - axs_split[1][l].imshow(cluster_centre[...,1].astype(np.uint8),cmap='gray') - axs_split[1][l].axis('off') - axs_split[2][l].imshow(cluster_centre[...,2].astype(np.uint8),cmap='gray') - axs_split[2][l].axis('off') - plt.figure(fig_split.number), - plt.savefig(dir_probs + 'clustercenters_control_splitContrastEnhanced_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) - axs[l].imshow(cluster_centre.astype(np.uint8)) - axs[l].axis('off') - axs[l].set_title(clusters_control[l]) - plt.figure(fig.number), - plt.savefig(dir_probs + 'clustercenters_control_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) - -#sick clusters and contrast enhanced -clusters_sick = clusters_sick_importance_sorted_noBackground[1:10] - -cluster_centers_sick = kmeans.cluster_centers_[clusters_sick] -fig, axs = plt.subplots(1,len(clusters_sick), figsize=(len(clusters_sick)*3,3), sharex=True, sharey=True) -fig.suptitle('Cluster centers for sick') -if colour_mode!='bnw': - fig_split, axs_split = plt.subplots(3,len(clusters_sick), figsize=(len(clusters_sick)*3,9), sharex=True, sharey=True) - fig_split.suptitle('sick centers split channels (contrast enhanced)') -for l in np.arange(0,len(clusters_sick)): - if colour_mode == 'bnw': - cluster_centre = np.reshape(cluster_centers_sick[l,:],(patch_size,patch_size)) - axs[l].imshow(cluster_centre.astype(np.uint8),cmap='gray') - axs[l].axis('off') - axs[l].set_title(clusters_sick[l]) - else: - cluster_centre = np.transpose((np.reshape(cluster_centers_sick[l,:],(3,patch_size,patch_size))),(1,2,0)) - axs_split[0][l].imshow(cluster_centre[...,0].astype(np.uint8),cmap='gray') - axs_split[0][l].axis('off') - axs_split[0][l].set_title(clusters_sick[l]) - axs_split[1][l].imshow(cluster_centre[...,1].astype(np.uint8),cmap='gray') - axs_split[1][l].axis('off') - axs_split[2][l].imshow(cluster_centre[...,2].astype(np.uint8),cmap='gray') - axs_split[2][l].axis('off') - plt.figure(fig_split.number), - plt.savefig(dir_probs + 'clustercenters_'+disease+'_splitContrastEnhanced_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) - axs[l].imshow(cluster_centre.astype(np.uint8)) - axs[l].axis('off') - axs[l].set_title(clusters_sick[l]) - plt.figure(fig.number), - plt.savefig(dir_probs + 'clustercenters_'+disease+'_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) - -if colour_mode!='bnw': - #plot sick and control clusters with the same intensity range - max_value = [] - min_value = [] - for l in np.arange(0,len(clusters_control)): - cluster_centre = np.transpose((np.reshape(cluster_centers_control[l,:],(3,patch_size,patch_size))),(1,2,0)) - max_value += [cluster_centre.max()] - min_value += [cluster_centre.min()] - - for l in np.arange(0,len(clusters_sick)): - cluster_centre = np.transpose((np.reshape(cluster_centers_sick[l,:],(3,patch_size,patch_size))),(1,2,0)) - max_value += [cluster_centre.max()] - min_value += [cluster_centre.min()] - - range_max = max(max_value) - range_min = min(min_value) - - fig_split, axs_split = plt.subplots(3,len(clusters_control), figsize=(len(clusters_control)*3,9),sharex=True, sharey=True) - fig_split.suptitle('Control centers split channels (fixed intensity range)') - for l in np.arange(0,len(clusters_control)): - cluster_centre = np.transpose((np.reshape(cluster_centers_control[l,:],(3,patch_size,patch_size))),(1,2,0)) - im = np.zeros(cluster_centre.shape) - im[...,0] = cluster_centre[...,0] - axs_split[0][l].imshow(im.astype(np.uint8)) - axs_split[0][l].axis('off') - axs_split[0][l].set_title(clusters_control[l]) - im = np.zeros(cluster_centre.shape) - im[...,1] = cluster_centre[...,1] - axs_split[1][l].imshow(im.astype(np.uint8)) - axs_split[1][l].axis('off') - im = np.zeros(cluster_centre.shape) - im[...,2] = cluster_centre[...,2] - axs_split[2][l].imshow(im.astype(np.uint8)) - axs_split[2][l].axis('off') - plt.savefig(dir_probs + 'clustercenters_control_split_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) - - - fig_split, axs_split = plt.subplots(3,len(clusters_sick), figsize=(len(clusters_sick)*3,9),sharex=True, sharey=True) - fig_split.suptitle('sick centers split channels (fixed intensity range)') - for l in np.arange(0,len(clusters_sick)): - cluster_centre = np.transpose((np.reshape(cluster_centers_sick[l,:],(3,patch_size,patch_size))),(1,2,0)) - im = np.zeros(cluster_centre.shape) - im[...,0] = cluster_centre[...,0] - axs_split[0][l].imshow(im.astype(np.uint8),vmin = range_min, vmax = range_max) - axs_split[0][l].axis('off') - im = np.zeros(cluster_centre.shape) - im[...,1] = cluster_centre[...,1] - axs_split[0][l].set_title(clusters_sick[l]) - axs_split[1][l].imshow(im.astype(np.uint8),vmin = range_min, vmax = range_max) - axs_split[1][l].axis('off') - im = np.zeros(cluster_centre.shape) - im[...,2] = cluster_centre[...,2] - axs_split[2][l].imshow(im.astype(np.uint8),vmin = range_min, vmax = range_max) - axs_split[2][l].axis('off') - plt.savefig(dir_probs + 'clustercenters_'+disease+'_split_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) - - - -#%% display - -startTime_probs = datetime.now() - -r,c = max_im_list_control[0].shape[:2] - -version = '_withBackground' #''#'_withBackground' #'' - -# prob_control_back = hist_control/(hist_control+hist_sick) -# prob_sick_back = hist_sick/(hist_control+hist_sick) -prob_control= hist_control/(hist_control+hist_sick) -prob_sick = hist_sick/(hist_control+hist_sick) - -#Assign equal probability to the background clusters -# prob_sick = prob_sick_back -# prob_control = prob_control_back -if version == '': - prob_sick[clusters_background] = 0.5 - prob_control[clusters_background] = 0.5 -else: - if not os.path.exists(dir_probs + 'control'+version+'/'): - os.mkdir(dir_probs + 'control'+version+'/') - os.mkdir(dir_probs + disease +version+'/') - - -#Pixel-wise probabilities for control patients -prob_im_control = [] -for assignment in assignment_list_control: - prob = prob_control[assignment.astype(int)] - prob_im_control += [prob.reshape(r,c)] - -# prob_im_control_back = [] -# for assignment in assignment_list_control: -# prob_back = prob_control_back[assignment.astype(int)] -# prob_im_control_back += [prob_back.reshape(r,c)] - -prob_im_list = [] -prob_px_list = [] -nr_list = 0 -for directory in dir_list_control: - in_dir = dir_control + directory + '/' - dir_list = [dI for dI in os.listdir(in_dir) if dI[0:len(base_name)]==base_name] - print(dir_list) - im_all_control = [] - im_all_control += prob_im_control[nr_list:nr_list+len(dir_list)] - im_all_control += max_im_list_control[nr_list:nr_list+len(dir_list)] - fig, axs = plt.subplots(2,len(dir_list), figsize = (30,10), sharex=True, sharey=True) - nr = 0 - prob_sample = np.zeros((len(dir_list),1)) - for ax, im in zip(axs.ravel(), im_all_control): - print(nr) - if nr<len(dir_list): - if version == '': - prob_im = sum(im[im!=0.5])/(im[im!=0.5]).size - else: - prob_im = sum(sum(im))/(r*c) - prob_sample[nr] = prob_im - - ax.set_title('Pc '+str(round(prob_im,2))) - #print(prob_sample) - # elif nr<2*len(dir_list): - # prob_im_back = sum(im)/(r*c) - - nr += 1 - if nr<=len(im_all_control)/2: - #ax.imshow(skimage.transform.resize(im, (1024,1024)), cmap=plt.cm.bwr, vmin = 0, vmax = 1) - ax.imshow(im, cmap=plt.cm.bwr, vmin = 0, vmax = 1) - else: - #ax.imshow(skimage.transform.resize(im.astype(np.uint8), (1024,1024))) - if colour_mode == 'bnw': - ax.imshow(im.astype(np.uint8),cmap='gray') - else: - ax.imshow(im.astype(np.uint8)) - prob_im_list += [prob_sample] - #prob_px_list += [(np.array(im_all_control[:len(dir_list)])).ravel()] - plt.suptitle('Probability control of sample '+str(directory)+', avg:' + str(round(prob_sample.mean(),2))+ ' std:' + str(round(prob_sample.std(),2))) - plt.savefig(dir_probs + 'control'+version+'/' + 'probImControl_'+str(directory)+'_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) - plt.show() - nr_list += len(dir_list) - -#Pixel-wise cluster assignments for control patients -if True: #sc_fac == 1: - assignment_im_control = [] - for assignment in assignment_list_control: - cluster_assignment = assignment.astype(int) - assignment_im_control += [cluster_assignment.reshape(r,c)] - - nr_list = 0 - for directory in dir_list_control: - in_dir = dir_control + directory + '/' - dir_list = [dI for dI in os.listdir(in_dir) if dI[0:len(base_name)]==base_name] - print(dir_list) - im_all_control = [] - im_all_control += assignment_im_control[nr_list:nr_list+len(dir_list)] - im_all_control += max_im_list_control[nr_list:nr_list+len(dir_list)] - fig, axs = plt.subplots(2,len(dir_list), figsize = (30,10), sharex=True, sharey=True) - for ax, im in zip(axs.ravel(), im_all_control): - if ( im.ndim == 2 ): - #ax.imshow(skimage.transform.resize(im, (1024,1024), order=0, preserve_range = True),cmap=plt.cm.gist_ncar,vmin = 0,vmax = nr_clusters-1) - ax.imshow(im,cmap=plt.cm.gist_ncar,vmin = 0,vmax = nr_clusters-1) - - else: - #ax.imshow(skimage.transform.resize(im.astype(np.uint8), (1024,1024))) - ax.imshow(im.astype(np.uint8)) - plt.suptitle('Sample '+str(directory)) - plt.savefig(dir_probs + 'control/' + 'assignmentImControl_'+str(directory)+'_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) - plt.show() - nr_list += len(dir_list) - -#Pixel-wise probabilities for sick patients -prob_im_sick = [] -for assignment in assignment_list_sick: - prob = prob_control[assignment.astype(int)] - prob_im_sick += [prob.reshape(r,c)] - -nr_list = 0 -for directory in dir_list_sick: - in_dir = dir_sick + directory + '/' - dir_list = [dI for dI in os.listdir(in_dir) if dI[0:len(base_name)]==base_name] - print(dir_list) - im_all_sick = [] - im_all_sick += prob_im_sick[nr_list:nr_list+len(dir_list)] - im_all_sick += max_im_list_sick[nr_list:nr_list+len(dir_list)] - fig, axs = plt.subplots(2,len(dir_list), figsize = (30,10), sharex=True, sharey=True) - nr = 0 - prob_sample = np.zeros((len(dir_list),1)) - for ax, im in zip(axs.ravel(), im_all_sick): - print(nr) - if nr<len(dir_list): - if version == '': - prob_im = sum(im[im!=0.5])/(im[im!=0.5]).size - else: - prob_im = sum(sum(im))/(r*c) - #print(prob_im) - prob_sample[nr] = prob_im - ax.set_title('Pc '+str(round(prob_im,2))) - #print(prob_sample) - nr += 1 - if ( im.ndim == 2 ): - #ax.imshow(skimage.transform.resize(im, (1024,1024)), cmap=plt.cm.bwr, vmin = 0, vmax = 1) - ax.imshow(im, cmap=plt.cm.bwr, vmin = 0, vmax = 1) - else: - #ax.imshow(skimage.transform.resize(im.astype(np.uint8), (1024,1024))) - ax.imshow(im.astype(np.uint8)) - prob_im_list += [prob_sample] - prob_px_list += [(np.array(im_all_sick[:len(dir_list)])).ravel()] - plt.suptitle('Probability control of sample '+str(directory)+', avg:' + str(round(prob_sample.mean(),2))+ ' std:' + str(round(prob_sample.std(),2))) - plt.savefig(dir_probs + disease +version+'/' + 'probImSick_'+str(directory)+'_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) - plt.show() - nr_list += len(dir_list) - - -#Pixel-wise cluster assignments for sick patients -if True: #sc_fac == 1: - assignment_im_sick = [] - for assignment in assignment_list_sick: - cluster_assignment = assignment.astype(int) - assignment_im_sick += [cluster_assignment.reshape(r,c)] - - nr_list = 0 - for directory in dir_list_sick: - in_dir = dir_sick + directory + '/' - dir_list = [dI for dI in os.listdir(in_dir) if dI[0:len(base_name)]==base_name] - print(dir_list) - im_all_sick = [] - im_all_sick += assignment_im_sick[nr_list:nr_list+len(dir_list)] - im_all_sick += max_im_list_sick[nr_list:nr_list+len(dir_list)] - fig, axs = plt.subplots(2,len(dir_list), figsize = (30,10), sharex=True, sharey=True) - for ax, im in zip(axs.ravel(), im_all_sick): - if ( im.ndim == 2 ): - #ax.imshow(skimage.transform.resize(im, (1024,1024), order=0, preserve_range = True),cmap=plt.cm.gist_ncar,vmin = 0,vmax = nr_clusters-1) - ax.imshow(im,cmap=plt.cm.gist_ncar,vmin = 0,vmax = nr_clusters-1) - else: - #ax.imshow(skimage.transform.resize(im.astype(np.uint8), (1024,1024))) - ax.imshow(im.astype(np.uint8)) - plt.suptitle('Sample '+str(directory)) - plt.savefig(dir_probs + disease + '/' + 'assignmentImSick_'+str(directory)+'_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) - plt.show() - nr_list += len(dir_list) - -endTime_probs = datetime.now() - startTime_probs -print(endTime_probs) - -#%% Print computational time -endTime = datetime.now() - startTime -print(endTime) - -#%% Box plots -fig, ax = plt.subplots(figsize = (15,5)) -ax.set_title('Probability of scans capturing healthy tissue') -array_probs = np.squeeze(np.asarray(prob_im_list)) -bp_healthy = ax.boxplot((array_probs[:12,...]).T, positions = range(12), patch_artist = True) -bp_sick = ax.boxplot(array_probs[12:,...].T, positions = range(13,25), patch_artist = True) - -for element in ['boxes', 'whiskers', 'fliers', 'means', 'medians', 'caps']: - plt.setp(bp_healthy[element], color='black') - -for patch in bp_healthy['boxes']: - patch.set(facecolor='red') - -for element in ['boxes', 'whiskers', 'fliers', 'means', 'medians', 'caps']: - plt.setp(bp_sick[element], color='black') - -for patch in bp_sick['boxes']: - patch.set(facecolor='blue') - -ax.set_xticklabels([i[-4:] for i in dir_list_control]+[i[-4:] for i in dir_list_sick]) -ax.set_ylim(0.35,0.65) -ax.set_ylabel('Probability health') -#ax.set_ylim(0,1) - -ax.tick_params( - axis='both', # changes apply to the x-axis - which='both', # both major and minor ticks are affected - #labelleft = False, # ticks along the top edge are off - #labelbottom=False - ) - -if disease == 'emphysema': - #overlay FEV1 values - ax2 = ax.twinx() - ax2.set_ylabel('FEV1') - FEV1 = np.array([99.00, 98.90,np.nan,116.00,118.70,99.70, 26, 27.20, 31.00, 25.00, 22.00, 33.70]) - patients = np.concatenate((np.arange(0.5, 12.5, 2), np.arange(13.5,25.5,2))) - ax2.plot(patients,FEV1,'kD') - ax2.set_ylim(-20,160) - -plt.savefig(dir_probs + 'boxPlots'+version+'_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) - -if disease == 'sarcoidosis': - #overlay FEV1 values - ax2 = ax.twinx() - ax2.set_ylabel('FVC') - FVC = np.array([99.00, 115.60,np.nan,121.00,118.70,119.50,60.20,54.00,75.30,27.00,48.50,40.30]) - patients = np.concatenate((np.arange(0.5, 12.5, 2), np.arange(13.5,25.5,2))) - ax2.plot(patients,FVC,'kD') - ax2.set_ylim(-20,160) - -plt.savefig(dir_probs + 'boxPlots'+version+'_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) - -#%% Plot lung functions from the clinic - -# cmap = plt.cm.bwr -# it = 0 -# for im in prob_im_control: -# im_tmp = skimage.transform.resize(im,(1024,1024),order=3) -# norm = plt.Normalize(vmin=0, vmax=1) -# im_out = cmap(norm(im_tmp)) -# out_name = '../results/prob_map/control_%02d.png' % int(it) -# skimage.io.imsave(out_name, (255*im_out).astype(np.uint8)) -# it += 1 - -# it = 0 -# for im in prob_im_sick: -# im_tmp = skimage.transform.resize(im,(1024,1024),order=3) -# norm = plt.Normalize(vmin=0, vmax=1) -# im_out = cmap(norm(im_tmp)) -# out_name = '../results/prob_map/sick_%02d.png' % int(it) -# skimage.io.imsave(out_name, (255*im_out).astype(np.uint8)) -# it += 1 - -# #%% -# max_im_list_control = ma.compute_max_im_list(dir_control, '.png', 'frame', 1) -# max_im_list_sick = ma.compute_max_im_list(dir_sick, '.png', 'frame', 1) -# #%% -# it = 0 -# for im in max_im_list_control: -# out_name = '../results/prob_map/control_max_im_%02d.png' % int(it) -# skimage.io.imsave(out_name, (im).astype(np.uint8)) -# it += 1 - -# it = 0 -# for im in max_im_list_sick: -# out_name = '../results/prob_map/sick_max_im_%02d.png' % int(it) -# skimage.io.imsave(out_name, (im).astype(np.uint8)) -# it += 1 - - - - - - diff --git a/code/lung_lif_to_png.py b/code/lung_lif_to_png.py deleted file mode 100644 index 76275fd304d966b5e68ebf9926cab492566f0b33..0000000000000000000000000000000000000000 --- a/code/lung_lif_to_png.py +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Transform lif files to png for easy access and visualization -""" - - -import numpy as np -from readlif.reader import LifFile -import skimage.io -import os -import microscopy_analysis as ma - -#%% -# folder containing lif files -in_dir = '../../data101220/'# data_October2020/sarcoidosis/' #'../../5April2020_control/' #healthy #'../../24December19_emphysema/' #sick -# output folder -out_dir = '../data_corrected_bis/sarcoidosis/' #control/ #/emphysema/ -file_names = [f for f in os.listdir(in_dir) if f.endswith('lif')] -file_names = ma.sort_dirlist_patient(file_names) - -# make output directory if it does not exist -if not os.path.exists(out_dir): - os.mkdir(out_dir) - -# run through all lif files and save as png for easy access and visualization -for file in file_names: - lif_in = LifFile(in_dir + file) - n_im = lif_in.num_images - dir_name_out = (out_dir + (file.split('.')[0]).replace(' ', '_') + '/') - if not os.path.exists(dir_name_out): - os.mkdir(dir_name_out) - for n in range(0,n_im): - im_in = lif_in.get_image(n) - r,c,n_frames = im_in.dims[:3] - n_ch = im_in.channels - if n_ch == 3: - dir_name_out_im = dir_name_out + ('/frame_%02d/' % n) - if not os.path.exists(dir_name_out_im): - os.mkdir(dir_name_out_im) - im_out = np.zeros((r,c,n_ch), dtype='uint8') - for i in range(0,n_frames): - for j in range(0,n_ch): - im_out[:,:,j] = np.array(im_in.get_frame(i,0,j)).transpose() - skimage.io.imsave(dir_name_out_im + '/image_%02d.png' % i, im_out) - diff --git a/code/microscopy_analysis.py b/code/microscopy_analysis.py deleted file mode 100644 index da70b732465a1294461a375f0756dfa73e44b23d..0000000000000000000000000000000000000000 --- a/code/microscopy_analysis.py +++ /dev/null @@ -1,439 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Helping functions for analysis of lif microscopy images -""" - -import os -import skimage.io as io -import skimage.transform -import numpy as np -import local_features as lf - -import matplotlib.pyplot as plt -from math import ceil - -#%% General functions - -# sort list of directories from selected string position -def sort_strings_customStart(strings, string_start_pos = 0): - """Sorts list of strings - - Input(s) - strings: list of strings - string_start_pos (default 0): defines the section of the string on which the ordering is based. - - Output(s) - strings_sortered: list of sorted strings - - Author: Monica Jane Emerson (monj@dtu.dk)""" - - strings_roi = [i[string_start_pos:] for i in strings] - #key indicates that the indices (k) are sorted according to subjects_list[k] - ind = np.array(sorted(range(len(strings_roi)), key=lambda k: strings_roi[k])) - strings_sorted = [strings[i] for i in ind] - - return strings_sorted - -#Provide the contents of a directory sorted -def listdir_custom(directory, string_start_pos = 0, dir_flag = False, base_name = False): - 'monj@dtu.dk' - - if dir_flag: - if base_name: - list_dirs = [dI for dI in os.listdir(directory) if (os.path.isdir(os.path.join(directory,dI)) & dI[0:len(base_name)]==base_name)] - else: - list_dirs = [dI for dI in os.listdir(directory) if os.path.isdir(os.path.join(directory,dI))] - else: - if base_name: - list_dirs = [dI for dI in os.listdir(directory)if dI[0:len(base_name)]==base_name] - else: - list_dirs = [dI for dI in os.listdir(directory)] - - listdirs_sorted = sort_strings_customStart(list_dirs,string_start_pos) - - return listdirs_sorted - -def flatten_list(list): - 'monj@dtu.dk' - - list_flat = [item for sublist in list for item in sublist] - - return list_flat - -#%% IO functions - -#make directory, and subdirectories within, if they don't exist -def make_output_dirs(directory,subdirectories = False): - 'monj@dtu.dk' - - os.makedirs(directory, exist_ok = True) - - if subdirectories: - for subdir in subdirectories: - os.makedirs(directory + subdir + '/', exist_ok = True) - - # def make_output_dirs(directory,subdirectories): - # 'monj@dtu.dk' - - # os.makedirs(directory, exist_ok = True) - # os.makedirs(directory + 'control/', exist_ok = True) - - # for disease in diseases: - # os.makedirs(directory + disease + '/', exist_ok = True) - - -#Reads images starting with base_name from the subdirectories of the input directory. -#Option for reading scaled down versions and in bnw or colour -def read_max_imgs(dir_condition, base_name, sc_fac = 1, colour_mode = 'colour'): - 'monj@dtu.dk' - - sample_list = listdir_custom(dir_condition, string_start_pos = -4, dir_flag = True) - #print(sample_list) - - max_img_list = [] - for sample in sample_list: - sample_dir = dir_condition + '/' + sample + '/' - frame_list = listdir_custom(sample_dir, base_name = base_name) - #print(frame_list) - - frame_img_list = [] - for frame in frame_list: - frame_path = sample_dir + frame - - #Option to load in bnw or colour - if colour_mode == 'bnw': - img = io.imread(frame_path, as_gray = True).astype('uint8') - if sc_fac ==1: - frame_img_list += [img] - else: - frame_img_list += [skimage.transform.rescale(img, sc_fac, preserve_range = True).astype('uint8')] - else: - img = io.imread(frame_path).astype('uint8') - if sc_fac == 1: - frame_img_list += [img] - else: - frame_img_list += [skimage.transform.rescale(img, sc_fac, preserve_range = True, multichannel=True).astype('uint8')] - - max_img_list += [frame_img_list] - #print(frame_img_list[0].dtype) - - return max_img_list - -#%% Functions for intensity inspection and image preprocessing - -# computes the maximum projection image from a directory of images -def get_max_img(in_dir, ext = '.png', n_img = 0): - file_names = [f for f in os.listdir(in_dir) if f.endswith(ext)] - file_names.sort() - if ( n_img < 1 ): - n_img = len(file_names) - img_in = io.imread(in_dir + file_names[0]) - for i in range(1,n_img): - img_in = np.maximum(img_in, io.imread(in_dir + file_names[i])) - return img_in - -# computes a list of maximum projection images from a list of directories -def compute_max_img_list(in_dir, ext, base_name, dir_out = ''): - """by abda - Modified by monj""" - - dir_list = [dI for dI in os.listdir(in_dir) if (os.path.isdir(os.path.join(in_dir,dI)) and dI[0:len(base_name)]==base_name)] - dir_list.sort() - - max_img_list = [] - for d in dir_list: - image_dir_in = in_dir + d + '/' - max_img = get_max_img(image_dir_in, ext) - if dir_out!='': - os.makedirs(dir_out, exist_ok = True) - io.imsave(dir_out + d + '.png', max_img.astype('uint8')) - - max_img_list += [max_img] - - return max_img_list - -# One more level up from compute_max_img_list. Computes a list of lists of maximum -#projection images from a list of directories, each containing a list of directories -#with the set of images that should be combined into a maximum projection. -def comp_max_imgs(dir_condition, base_name, dir_out = ''): - 'monj@dtu.dk' - - dir_list_condition = listdir_custom(dir_condition, string_start_pos = -4, dir_flag = True) - - max_img_list_condition = [] - for directory in dir_list_condition: - dir_in = dir_condition + '/' + directory + '/' - - if dir_out!= '': - subdir_out = dir_in.replace(dir_condition,dir_out) - else: - subdir_out = '' - - max_img_condition = compute_max_img_list(dir_in, '.png', base_name, subdir_out) - max_img_list_condition+= [max_img_condition] - - return max_img_list_condition - - -#TO DO: Eliminate histogram part from this function -def comp_std_perChannel(max_im_list, th_int, dir_condition): - 'monj@dtu.dk' - - sample_list = listdir_custom(dir_condition, string_start_pos = -4, dir_flag = True) - - std_list = [[],[],[]] - for sample, frame_img_list in zip(sample_list, max_im_list): - - for ind,img in enumerate(frame_img_list): - h, w, channels = img.shape - - for channel in range(0,channels): - intensities = img[:,:,channel].ravel() - std_list[channel] += [(intensities[intensities>th_int]).std()] - - return std_list - -def intensity_spread_normalisation(img_list, th_int, mean_std, dir_condition, base_name, dir_results): - 'monj@dtu.dk' - sample_list = listdir_custom(dir_condition, string_start_pos = -4, dir_flag = True) - #print(sample_list) - - img_corr_list = [] - for sample, sample_imgs in zip(sample_list, img_list): - sample_dir = dir_condition + '/' + sample + '/' - #print(sample_dir) - frame_list = listdir_custom(sample_dir, base_name = base_name) - #print(frame_list) - frame_img_corr_list = [] - for frame,img in zip(frame_list,sample_imgs): - h, w, channels = img.shape - - #img_corr = np.empty(img.shape,dtype = 'uint8') - img_corr = img - for channel in range(0,channels): - img_channel = img_corr[:,:,channel] - img_channel[img_channel>th_int] = img_channel[img_channel>th_int]*(mean_std/img_channel.std()) - img_corr[:,:,channel] = img_channel - - frame_img_corr_list += [img_corr] - os.makedirs(dir_results + '/' + sample, exist_ok = True) - io.imsave(dir_results + '/' + sample + '/' + frame +'.png', img_corr) - - img_corr_list += [frame_img_corr_list] - - return img_corr_list - -#def plotHist_perChannel_imgset -def plotHist_perChannel_imgset_list(max_img_list, dir_condition, dir_results = '', name_tag = 'original'): - 'monj@dtu.dk' - - sample_list = listdir_custom(dir_condition, string_start_pos = -4, dir_flag = True) - - for sample, frame_img_list in zip(sample_list, max_img_list): - fig, axs = plt.subplots(4,len(frame_img_list), figsize = (len(frame_img_list)*2,4*2)) - plt.suptitle('Sample '+sample[-4:] + ', acq. date: ' + sample[:6]) - - for ind,img in enumerate(frame_img_list): - h, w, channels = img.shape - axs[0][ind].imshow(img) - - for channel in range(0,channels): - intensities = img[:,:,channel].ravel() - axs[channel+1][ind].hist(intensities, bins = 50) - axs[channel+1][ind].set_aspect(1.0/axs[channel+1][ind].get_data_ratio()) - - if dir_results!= '': - plt.savefig(dir_results + '/' + sample[-4:] + '_' + name_tag + '_perChannelHistograms.png', dpi = 300) - plt.close(fig) - -#def compare_imgpairs -def compare_imgpairs_list(list1_imgsets, list2_imgsets, dir_condition, dir_results = ''): - 'monj@dtu.dk' - - sample_list = listdir_custom(dir_condition, string_start_pos = -4, dir_flag = True) - - for imgset1, imgset2, sample in zip(list1_imgsets, list2_imgsets, sample_list): - fig, axs = plt.subplots(2,len(imgset1), figsize = (2*len(imgset1),2*2),sharex=True,sharey=True) - plt.suptitle('Sample '+sample[-4:] + ', acq. date: ' + sample[:6]) - - for ind, img1, img2 in zip(range(0,len(imgset1)), imgset1, imgset2): - axs[0][ind].imshow(img1) - axs[1][ind].imshow(img2) - - if dir_results!= '': - plt.savefig(dir_results + '/' + sample[-4:] + '_originalVScorrected.png', dpi=300) - plt.close(fig) - -#%%Functions for Feature analysis - -# computes the max projection image and the features into a list -def compute_max_img_feat(in_dir, ext, base_name, sigma, sc_fac, save = False, abs_intensity = True): - dir_list = [dI for dI in os.listdir(in_dir) if (os.path.isdir(os.path.join(in_dir,dI)) and dI[0:len(base_name)]==base_name)] - dir_list.sort() - max_img_list = [] - for d in dir_list: - image_dir = in_dir + d + '/' - max_img = get_max_img(image_dir, ext) - - if save: - dir_name_out_img = in_dir.replace('data','maxProjImages') - os.makedirs(dir_name_out_img, exist_ok = True) - - io.imsave( dir_name_out_img + d + '.png', max_img.astype('uint8')) - - max_img_list += [skimage.transform.rescale(max_img, sc_fac, multichannel=True)] - - feat_list = [] - for max_img in max_img_list: - r,c = max_img.shape[:2] - feat = np.zeros((r*c,45)) - for i in range(0,3): - feat_tmp = lf.get_gauss_feat_im(max_img[:,:,i], sigma) - feat[:,i*15:(i+1)*15] = feat_tmp.reshape(-1,feat_tmp.shape[2]) - if not(abs_intensity): - feat_list += [np.concatenate((feat[:,1:15],feat[:,16:30],feat[:,31:45]),axis = 1)] - else: - feat_list += [feat] - return max_img_list, feat_list - - -# computes a histogram of features from a kmeans object -def compute_assignment_hist(feat_list, kmeans, background_feat = None): - assignment_list = [] - for feat in feat_list: - assignment_list += [kmeans.predict(feat)] - edges = np.arange(kmeans.n_clusters+1)-0.5 # histogram edges halfway between integers - hist = np.zeros(kmeans.n_clusters) - for a in assignment_list: - hist += np.histogram(a,bins=edges)[0] - - sum_hist = np.sum(hist) - hist/= sum_hist - - if background_feat is not None: - assignment_back = kmeans.predict(background_feat) - hist_back = np.histogram(assignment_back,bins=edges)[0] - return hist, assignment_list, hist_back, assignment_back - else: - return hist, assignment_list - - -# image to array of patches -def im2col(A, BSZ, stepsize=1, norm=False): - # Parameters - m,n = A.shape - s0, s1 = A.strides - nrows = m-BSZ[0]+1 - ncols = n-BSZ[1]+1 - shp = BSZ[0],BSZ[1],nrows,ncols - strd = s0,s1,s0,s1 - - out_view = np.lib.stride_tricks.as_strided(A, shape=shp, strides=strd) - out_view_shaped = out_view.reshape(BSZ[0]*BSZ[1],-1)[:,::stepsize] - if norm: - one_norm = np.sum(out_view_shaped,axis=0) - ind_zero_norm = np.where(one_norm !=0) - out_view_shaped[:,ind_zero_norm] = 255*out_view_shaped[:,ind_zero_norm]/one_norm[ind_zero_norm] - return out_view_shaped - -# nd image to array of patches -def ndim2col(A, BSZ, stepsize=1, norm=False): - if(A.ndim == 2): - return im2col(A, BSZ, stepsize, norm) - else: - r,c,l = A.shape - patches = np.zeros((l*BSZ[0]*BSZ[1],(r-BSZ[0]+1)*(c-BSZ[1]+1))) - for i in range(l): - patches[i*BSZ[0]*BSZ[1]:(i+1)*BSZ[0]*BSZ[1],:] = im2col(A[:,:,i],BSZ,stepsize,norm) - return patches - -# nd image to array of patches with mirror padding along boundaries -def ndim2col_pad(A, BSZ, stepsize=1, norm=False): - r,c = A.shape[:2] - if (A.ndim == 2): - l = 1 - else: - l = A.shape[2] - tmp = np.zeros((r+BSZ[0]-1,c+BSZ[1]-1,l)) - fhr = int(np.floor(BSZ[0]/2)) - fhc = int(np.floor(BSZ[1]/2)) - thr = int(BSZ[0]-fhr-1) - thc = int(BSZ[1]-fhc-1) - - tmp[fhr:fhr+r,fhc:fhc+c,:] = A.reshape((r,c,l)) - tmp[:fhr,:] = np.flip(tmp[fhr:fhr*2,:], axis=0) - tmp[fhr+r:,:] = np.flip(tmp[r:r+thr,:], axis=0) - tmp[:,:fhc] = np.flip(tmp[:,fhc:fhc*2], axis=1) - tmp[:,fhc+c:] = np.flip(tmp[:,c:c+thc], axis=1) - tmp = np.squeeze(tmp) - return ndim2col(tmp,BSZ,stepsize,norm) - -#%% Functions for visualisation of learnt features - -def plot_grid_cluster_centers(cluster_centers, cluster_order, patch_size, colour_mode = 'colour', occurrence = ''): - #grid dimensions - size_x = round(len(cluster_order)**(1/2)) - size_y = ceil(len(cluster_order)/size_x) - - #figure format - overhead = 1 - w, h = plt.figaspect(size_x/size_y) - fig, axs = plt.subplots(size_x,size_y, figsize=(1.3*w,1.3*h*(1+overhead/2)), sharex=True, sharey=True) - #print('Grid size: ', grid_size[1], grid_size[2], 'Figure size: ', w, h) - - ax_list = axs.ravel() - for ind, cluster in enumerate(cluster_order): - #print(ind) - if colour_mode == 'bnw': #in bnw + colour give the clusters a uniform colour - cluster_centre = np.reshape(cluster_centers[cluster,:],(patch_size,patch_size)) - ax_list[ind].imshow(cluster_centre.astype('uint8'),cmap='gray') - else: - cluster_centre = np.transpose((np.reshape(cluster_centers[cluster,:],(3,patch_size,patch_size))),(1,2,0)) - ax_list[ind].imshow(cluster_centre.astype('uint8')) - if occurrence !='': - ax_list[ind].set_title(round(occurrence[ind],2)) - else: - ax_list[ind].set_title(cluster) - plt.setp(axs, xticks=[], yticks=[]) - - -# def plot_mapsAndimages(dir_condition, directory_list, map_img, max_img_list, base_name = 'frame', r = 1024, c = 1024): -# nr_list = 0 -# for directory in directory_list: -# in_dir = dir_condition + directory + '/' -# dir_list = [dI for dI in os.listdir(in_dir) if (os.path.isdir(os.path.join(in_dir ,dI)) and dI[0:len(base_name)]==base_name)] -# print(dir_list) -# img_all_control = [] -# img_all_control += map_img[nr_list:nr_list+len(dir_list)] -# img_all_control += max_img_list[nr_list:nr_list+len(dir_list)] -# fig, axs = plt.subplots(2,len(dir_list), sharex=True, sharey=True) -# nr = 0 -# prob_sample = np.zeros((len(dir_list),1)) -# for ax, img in zip(axs.ravel(), img_all_control): -# print(nr) -# if nr<len(dir_list): -# prob_img = sum(sum(img))/(r*c) -# ax.set_title('Pc '+str(round(prob_img,2))) -# prob_sample[nr] = prob_img -# #print(prob_sample) -# nr += 1 -# if ( img.ndimg == 2 ): -# ax.imshow(skimage.transform.resize(img, (1024,1024)), cmap=plt.cm.bwr, vmin = 0, vmax = 1) -# else: -# ax.imshow(skimage.transform.resize(img.astype(np.uint8), (1024,1024))) -# plt.suptitle('Probability control of sample '+str(directory)+', avg:' + str(round(prob_sample.mean(),2))+ ' std:' + str(round(prob_sample.std(),2))) -# plt.savefig(dir_probs + '/control/' + '/probImControl_'+str(directory)+'_%dclusters_%dsigma%ddownscale_absInt%s.png'%(nr_clusters,sigma,1/sc_fac,abs_intensity), dpi=1000) -# plt.show() -# nr_list += len(dir_list) - - - - - - - - - - -