diff --git a/code/image_preprocessing.py b/code/image_preprocessing.py new file mode 100755 index 0000000000000000000000000000000000000000..5dadfec9e2d2137fa3cccf8bed56af562ca77e3f --- /dev/null +++ b/code/image_preprocessing.py @@ -0,0 +1,143 @@ +#%% #!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Preprocessing of the images + +1. Compute maximum projections. +2. Explore the intensities of the maximum projection images. +3. (optional) Correct the intensities. Normalisation across channels and images. +Equalise the standard deviation for intensity values that contain structures. +The air in the backround is removed with a manual threshold which is constant across +images and channels. + +Created on Wed Oct 7 10:34:52 2020 + +@author: monj +""" +import os +import microscopy_analysis as ma +import matplotlib.pyplot as plt +from datetime import datetime +import numpy as np +import skimage.io as io +import copy + +# Turn interactive plotting off +plt.ioff() + +plt.close('all') + +start_time_global = datetime.now() + +#%% I/O directories + +diseases = ['emphysema','diseased', 'sarcoidosis'] +conditions = diseases[:] +conditions.insert(0,'control') + +version = 'corrected_bis' #data identifier: My versions were called: blank, corrected or corrected_bis +preprocessing = 'preprocessed_ignback/' + +# input directories - images are stored in folders starting with the name 'frame' +dir_in = '../data_' + version+'/' +base_name = 'frame' + +#output directories: max projections and intensity inspection +dir_maxProj = '../maxProjImages_'+version+'/' + +dir_maxProj_raw = dir_maxProj + 'raw/' +ma.make_output_dirs(dir_maxProj_raw,conditions) + +dir_maxProj_processed = dir_maxProj + preprocessing #rerun just to make sure not to muck up the results +ma.make_output_dirs(dir_maxProj_processed,conditions) + +dir_results_intinspect = dir_maxProj + preprocessing + 'intensity_inspection/' #rerun just to make sure not to muck up the results +ma.make_output_dirs(dir_results_intinspect,conditions) + +#%% Get maximum projection images + +if len(os.listdir(dir_maxProj_raw + 'control/'))<2: + #Compute maximum projection images (runtime ~= 135 s) + print('Computing maximum projection images') + startTime = datetime.now() + max_img_list = [] + for condition in conditions: + max_img_list += [ma.comp_max_imgs(dir_in + condition, base_name, dir_maxProj_raw + condition)] + endTime = datetime.now() - startTime + print(endTime) +else: + #Read maximum projection images (runtime ~= 20 s ) + print('Reading maximum projection images') + startTime = datetime.now() + max_img_list = [] + for condition in conditions: + max_img_list += [ma.read_max_imgs(dir_maxProj_raw + condition, base_name)] + endTime = datetime.now() - startTime + print(endTime) + +#%% Plot histograms of raw images + +#Takes approx. 29s per condition +for condition, max_img_list_condition in zip(conditions, max_img_list): + ma.plotHist_perChannel_imgset_list(max_img_list_condition, dir_in + condition, dir_results_intinspect + condition, name_tag = 'original') + +#%% Measure the spread of intensities in each channel +"""The spread of intensities is quantified by the standard deviation of the +intensities that are over a threshold. The threshold value is set manually +and it defines the minimum intensity value that is associated with the +presence of protein. Image regions with intensity values under the threshold +are considered as air, i.e. absence of protein.""" + +#Protein presence/absence (air) intensity threshold +th_int = 5 #could it be set automatically based on the data? + +#Compute stds per image channel and, +#for each sample, make a figure with the intensity histograms per image channel. +std_list = [] +for condition, max_img_list_condition in zip(conditions, max_img_list): + std_list += [ma.comp_std_perChannel(max_img_list_condition, th_int, dir_in + condition)] + +# histogram of the intensity-spread per image channel +fig, axs = plt.subplots(len(conditions),4, figsize = (4*2,len(conditions)*2), sharex = True, sharey = True) +plt.suptitle('Distribution of intensity spread across images\n Rows: ' + ', '.join(conditions)) +for ind_c,std_list_condition in enumerate(std_list): + for ind,channel_list in enumerate(std_list_condition): + axs[0][ind+1].title.set_text('Channel ' + str(ind+1)) + axs[ind_c][ind+1].hist(channel_list, bins = 50, density = True) + +# histogram of the intensity-spread per image +axs[0][0].title.set_text('All channels') +std_list_flat = [] +for ind_c,std_list_condition in enumerate(std_list): + std_list_condition_flat = ma.flatten_list(std_list_condition) + axs[ind_c][0].hist(std_list_condition_flat, bins = 50, density = True) + std_list_flat += std_list_condition_flat +plt.show() + +#Average intesity-spread across al image channels +mean_std = sum(std_list)/len(std_list) +print('The average spread of an image channel is ' + str(round(mean_std))) + +#%% Correct intensities +#Consider not using th_int when correcting, only when computing the spread + +#Takes 34s per conditoin +max_img_corr_list = [] +for condition, max_img_list_condition in zip(conditions, max_img_list): + max_img_corr_list += [ma.intensity_spread_normalisation(copy.deepcopy(max_img_list_condition), th_int, mean_std, dir_in + condition, base_name, dir_maxProj_processed + condition)] + +#%% Plot histograms of corrected images + +#Takes 29s +for condition, max_img_corr_list_condition in zip(conditions, max_img_corr_list): + ma.plotHist_perChannel_imgset_list(max_img_corr_list_condition, dir_in + condition, dir_results_intinspect + condition, name_tag = 'corrected') + +#%% Visualise raw vs. corrected images + +for max_img_list_condition, max_img_corr_list_condition, condition in zip(max_img_list, max_img_corr_list, conditions): + ma.compare_imgpairs_list(max_img_list_condition, max_img_corr_list_condition , dir_in + condition, dir_results_intinspect + condition) + +#%%% Print total computational time + +end_time_global = datetime.now() - startTime +print(end_time_global) \ No newline at end of file diff --git a/code/local_features.py b/code/local_features.py new file mode 100644 index 0000000000000000000000000000000000000000..11b5b3b8f8e1ecdb0cd2941d0dc5b050d040a4d6 --- /dev/null +++ b/code/local_features.py @@ -0,0 +1,117 @@ +""" +Helping functions for extracting features from images +""" + +import skimage.io +import numpy as np +import matplotlib.pyplot as plt +import scipy.ndimage + +def get_gauss_feat_im(im, sigma=1, normalize=False): + """Gauss derivative feaures for every image pixel. + Arguments: + image: a 2D image, shape (r,c). + sigma: standard deviation for Gaussian derivatives. + normalize: flag indicating normalization of features. + Returns: + imfeat: a 3D array of size (r,c,15) with a 15-dimentional feature + vector for every image pixel. + Author: vand@dtu.dk, 2020 + """ + + r,c = im.shape + imfeat = np.zeros((r,c,15)) + imfeat[:,:,0] = scipy.ndimage.gaussian_filter(im,sigma,order=0) + imfeat[:,:,1] = scipy.ndimage.gaussian_filter(im,sigma,order=[0,1]) + imfeat[:,:,2] = scipy.ndimage.gaussian_filter(im,sigma,order=[1,0]) + imfeat[:,:,3] = scipy.ndimage.gaussian_filter(im,sigma,order=[0,2]) + imfeat[:,:,4] = scipy.ndimage.gaussian_filter(im,sigma,order=[1,1]) + imfeat[:,:,5] = scipy.ndimage.gaussian_filter(im,sigma,order=[2,0]) + imfeat[:,:,6] = scipy.ndimage.gaussian_filter(im,sigma,order=[0,3]) + imfeat[:,:,7] = scipy.ndimage.gaussian_filter(im,sigma,order=[1,2]) + imfeat[:,:,8] = scipy.ndimage.gaussian_filter(im,sigma,order=[2,1]) + imfeat[:,:,9] = scipy.ndimage.gaussian_filter(im,sigma,order=[3,0]) + imfeat[:,:,10] = scipy.ndimage.gaussian_filter(im,sigma,order=[0,4]) + imfeat[:,:,11] = scipy.ndimage.gaussian_filter(im,sigma,order=[1,3]) + imfeat[:,:,12] = scipy.ndimage.gaussian_filter(im,sigma,order=[2,2]) + imfeat[:,:,13] = scipy.ndimage.gaussian_filter(im,sigma,order=[3,1]) + imfeat[:,:,14] = scipy.ndimage.gaussian_filter(im,sigma,order=[4,0]) + + if normalize: + imfeat -= np.mean(imfeat, axis=(0,1)) + imfeat *= (1/np.std(imfeat, axis=(0,1))) + + return imfeat + +def im2col(im, patch_size=[3,3], stepsize=1): + """Rearrange image patches into columns + Arguments: + image: a 2D image, shape (r,c). + patch size: size of extracted paches. + stepsize: patch step size. + Returns: + patches: a 2D array which in every column has a patch associated + with one image pixel. For stepsize 1, number of returned column + is (r-patch_size[0]+1)*(c-patch_size[0]+1) due to bounary. The + length of columns is pathc_size[0]*patch_size[1]. + """ + + r,c = im.shape + s0, s1 = im.strides + nrows =r-patch_size[0]+1 + ncols = c-patch_size[1]+1 + shp = patch_size[0],patch_size[1],nrows,ncols + strd = s0,s1,s0,s1 + + out_view = np.lib.stride_tricks.as_strided(im, shape=shp, strides=strd) + return out_view.reshape(patch_size[0]*patch_size[1],-1)[:,::stepsize] + + +def ndim2col(im, block_size=[3,3], stepsize=1): + """Rearrange image blocks into columns for N-D image (e.g. RGB image)""""" + if(im.ndim == 2): + return im2col(im, block_size, stepsize) + else: + r,c,l = im.shape + patches = np.zeros((l*block_size[0]*block_size[1], + (r-block_size[0]+1)*(c-block_size[1]+1))) + for i in range(l): + patches[i*block_size[0]*block_size[1]:(i+1)*block_size[0]*block_size[1], + :] = im2col(im[:,:,i],block_size,stepsize) + return patches + +#%% + +if __name__ == '__main__': + + filename = '../example_data/training_image.png' + I = skimage.io.imread(filename) + I = I.astype(np.float) + + + # features based on gaussian derivatives + sigma = 1; + gf = get_gauss_feat_im(I, sigma) + + fig,ax = plt.subplots(3,5) + for j in range(5): + for i in range(3): + ax[i][j].imshow(gf[:,:,5*i+j]) + ax[i][j].set_title(f'layer {5*i+j}') + + + # features based on image patches + I = I[I.shape[0]//2:I.shape[0]//2+20,I.shape[1]//2:I.shape[1]//2+20] # smaller image such that we can see the difference in layers + pf = im2col(I,[3,3]) + pf = pf.reshape((9,I.shape[0]-2,I.shape[1]-2)) + + fig, ax = plt.subplots() + ax.imshow(I) + + fig,ax = plt.subplots(3,3) + for j in range(3): + for i in range(3): + ax[i][j].imshow(pf[3*i+j]) + ax[i][j].set_title(f'layer {3*i+j}') + + diff --git a/code/lung_feature_patch_allPatients.py b/code/lung_feature_patch_allPatients.py new file mode 100755 index 0000000000000000000000000000000000000000..f138775b265b2fe0ae44672d73d305039ba4d3a9 --- /dev/null +++ b/code/lung_feature_patch_allPatients.py @@ -0,0 +1,709 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Script for extracting patches from max-projection images and visualizing their clustering + +Basic functionality by Anders B. Dahl. +- Compute maximum projection images +- Extract patches +- Cluster patches +- Build assignment histograms +- Build probability images from the ratio between the histogram for healthy and diseased. + +Extended functionality by Monica J. Emerson. +- Adapted the above to integrate image normalisation, e.g. modified max proj.image functions. +- Created a pipeline to analyse multiple samples/patients. +- Study of several diseases. +- Added functionality for supporting the analysis of different data versions. +- Computation of health scores per image, sample and patient. +- Visualisation of cluster centres as a grid. +- Boxplots to compare probabilities across samples and to clinical values. +- Normalisation of intensities across images and channels. +- Possibility to ignore the background (air phase). +- Visualisation of assignment images to inspect results and support the development of the approach to ignore background. +- Implementation and investigation of feature variations (colour, bnw, bnw+colour). +- Study of the parameters (nr clusters and scale - relative patch/image size) +- Extended visualisation of cluster centres to support the comparison across diseases and parameters. + a) Visualisation of cluster centres split into channels. + b) Compute a population (p) and condition probability (c) value for each cluster. + c) Identify the presence of "weak" clusters. If they exist, rerun kmeans. + d) Select and visualise characteristic clusters for the conditions based on p and c. + e) Plot all clusters in the population/condition probability space. + f) Order cluster centres according to condition probability +""" + +import numpy as np +import matplotlib.pyplot as plt +import sklearn.cluster +import microscopy_analysis as ma +import skimage.io +import skimage.transform + +import os +from datetime import datetime +import sys +from matplotlib.offsetbox import OffsetImage, AnnotationBbox + +startTime = datetime.now() + +plt.close('all') + +sc_fac = 0.25 #25 #25 #0.5 # scaling factor of the image +patch_size = 17 +nr_clusters = 100 # number of clusters + +#%% Directories + +version = 'corrected_bis' +preprocessing = '' #'/preprocessed_ignback/' #''(none) +colour_mode = 'colour' #'bnw' #colour +disease = ['sarcoidosis']#diseased (mix, 2 of each condition)' #''emphysema' 'sarcoidosis' + + +# input directories - images start with the name 'frame' +dir_in = '../maxProjImages_'+version + preprocessing +# dir_control = dir_in + 'control/' # 200401_109a/' +# dir_sick = dir_in + disease + '/' #191216_100a/' +base_name = 'frame' + + +#output directories +dir_results = '../results_monj/patches/data_' + version+'/' #rerun just to make sure not to muck up the results +os.makedirs(dir_results, exist_ok = True) + +dir_probs = dir_results + disease + '_' + colour_mode + '_%dclusters_%ddownscale_%dpatchsize/'%(nr_clusters,1/sc_fac,patch_size) +ma.make_output_dirs(dir_probs, disease) + +dir_probs_withBack = dir_probs + 'withBackground' +ma.make_output_dirs(dir_probs_withBack, disease) + +# if not os.path.exists(dir_probs): +# os.mkdir(dir_probs) +# os.mkdir(dir_probs + 'control/') +# os.mkdir(dir_probs + disease + '/') +# os.mkdir(dir_probs + 'control_withBackground/') +# os.mkdir(dir_probs + disease + '_withBackground/') + +#%% Read (preprocessed) maximum corrected images +#Read maximum projection images (runtime ~= 20 s ) +print('Reading maximum projection images') + +max_img_list_control = ma.read_max_imgs(dir_in+ 'control', base_name) +max_img_list_sick = ma.read_max_imgs(dir_in+ disease, base_name) + + +# max_im_list_control = ma.read_max_ims(dir_in_max + 'control/' ,base_name,sc_fac,colour_mode) + +# max_im_list_sick = [] +# for sample in dir_list_sick: +# in_dir = dir_sick + sample + '/' +# dir_list = [dI for dI in sorted(os.listdir(in_dir)) if dI[0:len(base_name)]==base_name] +# frames_list = [] +# for ind, frame in enumerate(dir_list): +# frame_path = in_dir + frame +# if colour_mode == 'bnw': +# img = skimage.color.rgb2gray(skimage.io.imread(frame_path).astype(float)) +# max_im_list_sick += [skimage.transform.rescale(img, sc_fac)] +# else: +# img = skimage.io.imread(frame_path).astype(float) +# max_im_list_sick += [skimage.transform.rescale(img, sc_fac, multichannel=True)] + +#TO DO: Rescale max proj. images, overwrite original variables +#TO DO: Compute bnw version, but keep the colour one for displaying it at the end + +#%% Compute patches +patch_feat_list_control = [] +for max_im in max_im_list_control: + patch_feat_list_control += [ma.ndim2col_pad(max_im, (patch_size, patch_size),norm=False).transpose()] + +patch_feat_list_sick = [] +for max_im in max_im_list_sick: + patch_feat_list_sick += [ma.ndim2col_pad(max_im, (patch_size, patch_size),norm=False).transpose()] + +patch_feat_total = [] +patch_feat_total += patch_feat_list_control +patch_feat_total += patch_feat_list_sick + +#%% features for clustering +nr_keep = 10000 # number of features randomly picked for clustering +n_im = len(patch_feat_total) +feat_dim = patch_feat_total[0].shape[1] +patch_feat_to_cluster = np.zeros((nr_keep*n_im,feat_dim)) +f = 0 +for patch_feat in patch_feat_total: + keep_indices = np.random.permutation(np.arange(patch_feat.shape[0]))[:nr_keep] + patch_feat_to_cluster[f:f+nr_keep,:] = patch_feat[keep_indices,:] + f += nr_keep + +#%% k-means clustering +batch_size = 1000 +th_nr_pathesINcluster = 5 + +if os.path.exists(dir_probs + 'array_cluster_centres'+colour_mode+'.npy'): + cluster_centres = np.load(dir_probs + 'array_cluster_centres'+colour_mode+'.npy') + #kmeans = sklearn.cluster.MiniBatchKMeans(n_clusters=nr_clusters, init = cluster_centres, batch_size = batch_size) + #kmeans.fit(patch_feat_to_cluster) + kmeans = sklearn.cluster.MiniBatchKMeans(n_clusters=nr_clusters, batch_size = batch_size) + kmeans.cluster_centers_=cluster_centres + reusing_clusters = True +else: + kmeans = sklearn.cluster.MiniBatchKMeans(n_clusters=nr_clusters, batch_size = batch_size) + kmeans.fit(patch_feat_to_cluster) + all_cluster_centres = kmeans.cluster_centers_ + + #Cluster statistics + features_in_cluster = [] + for cluster in range(0,nr_clusters): + features_in_cluster += [[ind for ind,i in enumerate(kmeans.labels_) if i==cluster]] + nr_feat_in_cluster = [len(i) for i in features_in_cluster] + + nr_weakClusters = len([1 for i in nr_feat_in_cluster if i<th_nr_pathesINcluster]) + + if nr_weakClusters!=0: + sys.exit(str(nr_weakClusters)+ " clusters composed of less than "+str(th_nr_pathesINcluster)+" images") + else: + np.save(dir_probs + 'array_cluster_centres'+colour_mode+'.npy', all_cluster_centres) # .npy extension is added if not given + + +#%% Read background pixels +dir_background = dir_in + 'background/' +dir_background_list = [dI for dI in os.listdir(dir_background) if os.path.isdir(os.path.join(dir_background,dI))] + +fig, axs = plt.subplots(2,len(dir_background_list), sharex=True, sharey=True) +patch_feat_back = [] +for ind, directory in enumerate(dir_background_list): + #load images and corresponding background labels + file_names = [f for f in os.listdir(dir_background +directory) if f.endswith('.png')] + im_file = [f for f in file_names if not f.startswith('back')].pop() + label_file = [f for f in file_names if f.startswith('back')].pop() + im_back = skimage.io.imread(dir_background + directory + '/' + im_file).astype('uint8') + label_back = skimage.color.rgb2gray(skimage.io.imread(dir_background + directory + '/' + label_file).astype('float')) + label_back += -np.min(label_back) + label_back = label_back.astype('bool') + #plot imagesand corresonding labels + axs[0][ind].imshow(im_back) + axs[1][ind].imshow(label_back,'gray') + plt.show() + #compute features + if colour_mode == 'bnw': + im_back = skimage.transform.rescale(skimage.color.rgb2gray(im_back.astype(float)), sc_fac, multichannel=False) + else : + im_back = skimage.transform.rescale(im_back.astype(float), sc_fac, multichannel=True) + im_feat = ma.ndim2col_pad(im_back, (patch_size, patch_size)).transpose() + patch_feat_back += [im_feat[(skimage.transform.rescale(label_back, sc_fac, multichannel=False)==True).ravel(),:]] + +#%% Plot all cluster centres + +plot_grid_cluster_centres(kmeans.cluster_centers_) + +if nr_clusters==100: + fig, axs = plt.subplots(10,10, figsize=(5,5), sharex=True, sharey=True) +if nr_clusters==200: + w, h = plt.figaspect(2.) + fig, axs = plt.subplots(20,10, figsize=(w,h), sharex=True, sharey=True) +if nr_clusters==1000: + fig, axs = plt.subplots(100,100, figsize=(5,5), sharex=True, sharey=True) + +intensities = [] +for ax, cluster_nr in zip(axs.ravel(), np.arange(0,nr_clusters)): + if colour_mode == 'bnw': + cluster_centre = np.reshape(kmeans.cluster_centers_[cluster_nr,:],(patch_size,patch_size)) + intensities += [sum((cluster_centre).ravel())] + ax.imshow(cluster_centre.astype('uint8'),cmap='gray') + else: + cluster_centre = np.transpose((np.reshape(kmeans.cluster_centers_[cluster_nr,:],(3,patch_size,patch_size))),(1,2,0)) + intensities += [sum((np.max(cluster_centre,2)).ravel())] + ax.imshow(cluster_centre.astype('uint8')) + +plt.setp(axs, xticks=[], yticks=[]) +plt.savefig(dir_probs + 'clusterCentres_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) + + +#%% Histograms for background, healthy and sick +#hist_control, assignment_list_control, hist_back, assignment_back = ma.compute_assignment_hist(patch_feat_list_control, kmeans, background_feat=im_feat_back) + +hist_background, assignment_list_background = ma.compute_assignment_hist(patch_feat_back, kmeans) +hist_control, assignment_list_control= ma.compute_assignment_hist(patch_feat_list_control, kmeans) +hist_sick, assignment_list_sick = ma.compute_assignment_hist(patch_feat_list_sick, kmeans) + +#%% Cluster centres in the 2d space determined by the relationshop between histogram +occurrence_ratio = hist_control/hist_sick +occurrence_ratio[occurrence_ratio<1] = -1/occurrence_ratio[occurrence_ratio<1] +populated = hist_control+hist_sick + +plt.hist2d(occurrence_ratio,populated) + +fig_control, ax_control = plt.subplots(figsize=(15,15)) +ax_control.scatter(populated[occurrence_ratio>1], occurrence_ratio[occurrence_ratio>1]) +ax_control.set_ylim(0.8,7) +ax_control.set_xlim(0,0.08) + +fig_sick, ax_sick = plt.subplots(figsize=(15,15)) +ax_sick.scatter(populated[occurrence_ratio<1], -occurrence_ratio[occurrence_ratio<1]) +ax_sick.set_ylim(0.8,7) +ax_sick.set_xlim(0,0.08) + +for x0, y0, cluster_nr in zip(populated, occurrence_ratio, np.arange(0,nr_clusters)): + if colour_mode == 'bnw': + cluster_centre = np.reshape(kmeans.cluster_centers_[cluster_nr,:],(patch_size,patch_size)) + else: + cluster_centre = np.transpose((np.reshape(kmeans.cluster_centers_[cluster_nr,:],(3,patch_size,patch_size))),(1,2,0)) + if y0>0: + if colour_mode == 'bnw': + ab = AnnotationBbox(OffsetImage(cluster_centre.astype('uint8'),cmap='gray'), (x0, y0), frameon=False) + else: + ab = AnnotationBbox(OffsetImage(cluster_centre.astype('uint8')), (x0, y0), frameon=False) + ax_control.add_artist(ab) + plt.savefig(dir_probs + 'controlClusterCentres_2Dspace_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) + else: + if colour_mode == 'bnw': + ab = AnnotationBbox(OffsetImage(cluster_centre.astype('uint8'),cmap='gray'), (x0, -y0), frameon=False) + else: + ab = AnnotationBbox(OffsetImage(cluster_centre.astype('uint8')), (x0, -y0), frameon=False) + ax_sick.add_artist(ab) + plt.savefig(dir_probs + 'sickClusterCentres_2Dspace_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) + + + +#%% show bar plot of healthy and sick + +fig, ax = plt.subplots(1,1) +ax.bar(np.array(range(0,nr_clusters)), hist_background, width = 1) +plt.show() +plt.savefig(dir_probs + 'backgroundHistogram_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) + +fig, ax = plt.subplots(1,1) +ax.bar(np.array(range(0,nr_clusters))-0.25, hist_control, width = 0.5, label='Control', color = 'r') +ax.bar(np.array(range(0,nr_clusters))+0.25, hist_sick, width = 0.5, label='Sick', color = 'b') +ax.legend() +plt.savefig(dir_probs + 'assignmentHistograms_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) + + +#%% Find background and characteristic clusters + +#background clusters +clusters_background_intBased = [i for i in range(len(intensities)) if intensities[i] < 7000] +clusters_background_annotBased = [ind for ind, value in enumerate(hist_background) if value>0] + +clusters_background = list(set(clusters_background_intBased) | set(clusters_background_annotBased)) +print('Background clusters'+str(clusters_background)) + +#characteristic clusters +th_proportion = 2#2 #2.4 +th_populated = 0.01#0.005#0.015 + +clusters_sick = (hist_sick>th_populated)&(hist_sick>th_proportion*hist_control) +clusters_sick = [ind for ind,value in enumerate(clusters_sick) if value == True] +clusters_control = (hist_control>th_populated)&(hist_control>th_proportion*hist_sick) +clusters_control = [ind for ind,value in enumerate(clusters_control) if value == True] + +#eliminate backgorund clusters if contained here +clusters_control = [i for i in clusters_control if i not in clusters_background] +clusters_sick = [i for i in clusters_sick if i not in clusters_background] + +print('Clusters characteristic of the ' + disease + ' tissue',clusters_sick) +print('Clusters characteristic of the control tissue',clusters_control) + +#%% Plot centres of the characteristic clusters +cluster_centres_control = kmeans.cluster_centers_[clusters_control] + +#control clusters and contrast enhanced +cluster_centres_control = kmeans.cluster_centers_[clusters_control] +fig, axs = plt.subplots(1,len(clusters_control), figsize=(len(clusters_control)*3,3), sharex=True, sharey=True) +fig.suptitle('Cluster centres for control') +if colour_mode!='bnw': + fig_split, axs_split = plt.subplots(3,len(clusters_control), figsize=(len(clusters_control)*3,9), sharex=True, sharey=True) + fig_split.suptitle('Control centres split channels (contrast enhanced)') +for l in np.arange(0,len(clusters_control)): + if colour_mode == 'bnw': + cluster_centre = np.reshape(cluster_centres_control[l,:],(patch_size,patch_size)) + axs[l].imshow(cluster_centre.astype(np.uint8),cmap='gray') + axs[l].axis('off') + axs[l].set_title(clusters_control[l]) + else: + cluster_centre = np.transpose((np.reshape(cluster_centres_control[l,:],(3,patch_size,patch_size))),(1,2,0)) + axs_split[0][l].imshow(cluster_centre[...,0].astype(np.uint8),cmap='gray') + axs_split[0][l].axis('off') + axs_split[0][l].set_title(clusters_control[l]) + axs_split[1][l].imshow(cluster_centre[...,1].astype(np.uint8),cmap='gray') + axs_split[1][l].axis('off') + axs_split[2][l].imshow(cluster_centre[...,2].astype(np.uint8),cmap='gray') + axs_split[2][l].axis('off') + plt.figure(fig_split.number), + plt.savefig(dir_probs + 'clusterCentres_control_splitContrastEnhanced_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) + axs[l].imshow(cluster_centre.astype(np.uint8)) + axs[l].axis('off') + axs[l].set_title(clusters_control[l]) + plt.figure(fig.number), + plt.savefig(dir_probs + 'clusterCentres_control_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) + +#sick clusters and contrast enhanced +cluster_centres_sick = kmeans.cluster_centers_[clusters_sick] +fig, axs = plt.subplots(1,len(clusters_sick), figsize=(len(clusters_sick)*3,3), sharex=True, sharey=True) +fig.suptitle('Cluster centres for sick') +if colour_mode!='bnw': + fig_split, axs_split = plt.subplots(3,len(clusters_sick), figsize=(len(clusters_sick)*3,9), sharex=True, sharey=True) + fig_split.suptitle('sick centres split channels (contrast enhanced)') +for l in np.arange(0,len(clusters_sick)): + if colour_mode == 'bnw': + cluster_centre = np.reshape(cluster_centres_sick[l,:],(patch_size,patch_size)) + axs[l].imshow(cluster_centre.astype(np.uint8),cmap='gray') + axs[l].axis('off') + axs[l].set_title(clusters_sick[l]) + else: + cluster_centre = np.transpose((np.reshape(cluster_centres_sick[l,:],(3,patch_size,patch_size))),(1,2,0)) + axs_split[0][l].imshow(cluster_centre[...,0].astype(np.uint8),cmap='gray') + axs_split[0][l].axis('off') + axs_split[0][l].set_title(clusters_sick[l]) + axs_split[1][l].imshow(cluster_centre[...,1].astype(np.uint8),cmap='gray') + axs_split[1][l].axis('off') + axs_split[2][l].imshow(cluster_centre[...,2].astype(np.uint8),cmap='gray') + axs_split[2][l].axis('off') + plt.figure(fig_split.number), + plt.savefig(dir_probs + 'clusterCentres_'+disease+'_splitContrastEnhanced_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) + axs[l].imshow(cluster_centre.astype(np.uint8)) + axs[l].axis('off') + axs[l].set_title(clusters_sick[l]) + plt.figure(fig.number), + plt.savefig(dir_probs + 'clusterCentres_'+disease+'_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) + +if colour_mode!='bnw': + #plot sick and control clusters with the same intensity range + max_value = [] + min_value = [] + for l in np.arange(0,len(clusters_control)): + cluster_centre = np.transpose((np.reshape(cluster_centres_control[l,:],(3,patch_size,patch_size))),(1,2,0)) + max_value += [cluster_centre.max()] + min_value += [cluster_centre.min()] + + for l in np.arange(0,len(clusters_sick)): + cluster_centre = np.transpose((np.reshape(cluster_centres_sick[l,:],(3,patch_size,patch_size))),(1,2,0)) + max_value += [cluster_centre.max()] + min_value += [cluster_centre.min()] + + range_max = max(max_value) + range_min = min(min_value) + + fig_split, axs_split = plt.subplots(3,len(clusters_control), figsize=(len(clusters_control)*3,9),sharex=True, sharey=True) + fig_split.suptitle('Control centres split channels (fixed intensity range)') + for l in np.arange(0,len(clusters_control)): + cluster_centre = np.transpose((np.reshape(cluster_centres_control[l,:],(3,patch_size,patch_size))),(1,2,0)) + im = np.zeros(cluster_centre.shape) + im[...,0] = cluster_centre[...,0] + axs_split[0][l].imshow(im.astype(np.uint8)) + axs_split[0][l].axis('off') + axs_split[0][l].set_title(clusters_control[l]) + im = np.zeros(cluster_centre.shape) + im[...,1] = cluster_centre[...,1] + axs_split[1][l].imshow(im.astype(np.uint8)) + axs_split[1][l].axis('off') + im = np.zeros(cluster_centre.shape) + im[...,2] = cluster_centre[...,2] + axs_split[2][l].imshow(im.astype(np.uint8)) + axs_split[2][l].axis('off') + plt.savefig(dir_probs + 'clusterCentres_control_split_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) + + + fig_split, axs_split = plt.subplots(3,len(clusters_sick), figsize=(len(clusters_sick)*3,9),sharex=True, sharey=True) + fig_split.suptitle('sick centres split channels (fixed intensity range)') + for l in np.arange(0,len(clusters_sick)): + cluster_centre = np.transpose((np.reshape(cluster_centres_sick[l,:],(3,patch_size,patch_size))),(1,2,0)) + im = np.zeros(cluster_centre.shape) + im[...,0] = cluster_centre[...,0] + axs_split[0][l].imshow(im.astype(np.uint8),vmin = range_min, vmax = range_max) + axs_split[0][l].axis('off') + im = np.zeros(cluster_centre.shape) + im[...,1] = cluster_centre[...,1] + axs_split[0][l].set_title(clusters_sick[l]) + axs_split[1][l].imshow(im.astype(np.uint8),vmin = range_min, vmax = range_max) + axs_split[1][l].axis('off') + im = np.zeros(cluster_centre.shape) + im[...,2] = cluster_centre[...,2] + axs_split[2][l].imshow(im.astype(np.uint8),vmin = range_min, vmax = range_max) + axs_split[2][l].axis('off') + plt.savefig(dir_probs + 'clusterCentres_'+disease+'_split_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) + + + +#%% display + +startTime_probs = datetime.now() + +r,c = max_im_list_control[0].shape[:2] + +version = '_withBackground' #''#'_withBackground' #'' + +# prob_control_back = hist_control/(hist_control+hist_sick) +# prob_sick_back = hist_sick/(hist_control+hist_sick) +prob_control= hist_control/(hist_control+hist_sick) +prob_sick = hist_sick/(hist_control+hist_sick) + +#Assign equal probability to the background clusters +# prob_sick = prob_sick_back +# prob_control = prob_control_back +if version == '': + prob_sick[clusters_background] = 0.5 + prob_control[clusters_background] = 0.5 +else: + if not os.path.exists(dir_probs + 'control'+version+'/'): + os.mkdir(dir_probs + 'control'+version+'/') + os.mkdir(dir_probs + disease +version+'/') + + +#Pixel-wise probabilities for control patients +prob_im_control = [] +for assignment in assignment_list_control: + prob = prob_control[assignment.astype(int)] + prob_im_control += [prob.reshape(r,c)] + +# prob_im_control_back = [] +# for assignment in assignment_list_control: +# prob_back = prob_control_back[assignment.astype(int)] +# prob_im_control_back += [prob_back.reshape(r,c)] + +prob_im_list = [] +prob_px_list = [] +nr_list = 0 +for directory in dir_list_control: + in_dir = dir_control + directory + '/' + dir_list = [dI for dI in os.listdir(in_dir) if dI[0:len(base_name)]==base_name] + print(dir_list) + im_all_control = [] + im_all_control += prob_im_control[nr_list:nr_list+len(dir_list)] + im_all_control += max_im_list_control[nr_list:nr_list+len(dir_list)] + fig, axs = plt.subplots(2,len(dir_list), figsize = (30,10), sharex=True, sharey=True) + nr = 0 + prob_sample = np.zeros((len(dir_list),1)) + for ax, im in zip(axs.ravel(), im_all_control): + print(nr) + if nr<len(dir_list): + if version == '': + prob_im = sum(im[im!=0.5])/(im[im!=0.5]).size + else: + prob_im = sum(sum(im))/(r*c) + prob_sample[nr] = prob_im + + ax.set_title('Pc '+str(round(prob_im,2))) + #print(prob_sample) + # elif nr<2*len(dir_list): + # prob_im_back = sum(im)/(r*c) + + nr += 1 + if nr<=len(im_all_control)/2: + #ax.imshow(skimage.transform.resize(im, (1024,1024)), cmap=plt.cm.bwr, vmin = 0, vmax = 1) + ax.imshow(im, cmap=plt.cm.bwr, vmin = 0, vmax = 1) + else: + #ax.imshow(skimage.transform.resize(im.astype(np.uint8), (1024,1024))) + if colour_mode == 'bnw': + ax.imshow(im.astype(np.uint8),cmap='gray') + else: + ax.imshow(im.astype(np.uint8)) + prob_im_list += [prob_sample] + #prob_px_list += [(np.array(im_all_control[:len(dir_list)])).ravel()] + plt.suptitle('Probability control of sample '+str(directory)+', avg:' + str(round(prob_sample.mean(),2))+ ' std:' + str(round(prob_sample.std(),2))) + plt.savefig(dir_probs + 'control'+version+'/' + 'probImControl_'+str(directory)+'_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) + plt.show() + nr_list += len(dir_list) + +#Pixel-wise cluster assignments for control patients +if True: #sc_fac == 1: + assignment_im_control = [] + for assignment in assignment_list_control: + cluster_assignment = assignment.astype(int) + assignment_im_control += [cluster_assignment.reshape(r,c)] + + nr_list = 0 + for directory in dir_list_control: + in_dir = dir_control + directory + '/' + dir_list = [dI for dI in os.listdir(in_dir) if dI[0:len(base_name)]==base_name] + print(dir_list) + im_all_control = [] + im_all_control += assignment_im_control[nr_list:nr_list+len(dir_list)] + im_all_control += max_im_list_control[nr_list:nr_list+len(dir_list)] + fig, axs = plt.subplots(2,len(dir_list), figsize = (30,10), sharex=True, sharey=True) + for ax, im in zip(axs.ravel(), im_all_control): + if ( im.ndim == 2 ): + #ax.imshow(skimage.transform.resize(im, (1024,1024), order=0, preserve_range = True),cmap=plt.cm.gist_ncar,vmin = 0,vmax = nr_clusters-1) + ax.imshow(im,cmap=plt.cm.gist_ncar,vmin = 0,vmax = nr_clusters-1) + + else: + #ax.imshow(skimage.transform.resize(im.astype(np.uint8), (1024,1024))) + ax.imshow(im.astype(np.uint8)) + plt.suptitle('Sample '+str(directory)) + plt.savefig(dir_probs + 'control/' + 'assignmentImControl_'+str(directory)+'_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) + plt.show() + nr_list += len(dir_list) + +#Pixel-wise probabilities for sick patients +prob_im_sick = [] +for assignment in assignment_list_sick: + prob = prob_control[assignment.astype(int)] + prob_im_sick += [prob.reshape(r,c)] + +nr_list = 0 +for directory in dir_list_sick: + in_dir = dir_sick + directory + '/' + dir_list = [dI for dI in os.listdir(in_dir) if dI[0:len(base_name)]==base_name] + print(dir_list) + im_all_sick = [] + im_all_sick += prob_im_sick[nr_list:nr_list+len(dir_list)] + im_all_sick += max_im_list_sick[nr_list:nr_list+len(dir_list)] + fig, axs = plt.subplots(2,len(dir_list), figsize = (30,10), sharex=True, sharey=True) + nr = 0 + prob_sample = np.zeros((len(dir_list),1)) + for ax, im in zip(axs.ravel(), im_all_sick): + print(nr) + if nr<len(dir_list): + if version == '': + prob_im = sum(im[im!=0.5])/(im[im!=0.5]).size + else: + prob_im = sum(sum(im))/(r*c) + #print(prob_im) + prob_sample[nr] = prob_im + ax.set_title('Pc '+str(round(prob_im,2))) + #print(prob_sample) + nr += 1 + if ( im.ndim == 2 ): + #ax.imshow(skimage.transform.resize(im, (1024,1024)), cmap=plt.cm.bwr, vmin = 0, vmax = 1) + ax.imshow(im, cmap=plt.cm.bwr, vmin = 0, vmax = 1) + else: + #ax.imshow(skimage.transform.resize(im.astype(np.uint8), (1024,1024))) + ax.imshow(im.astype(np.uint8)) + prob_im_list += [prob_sample] + prob_px_list += [(np.array(im_all_sick[:len(dir_list)])).ravel()] + plt.suptitle('Probability control of sample '+str(directory)+', avg:' + str(round(prob_sample.mean(),2))+ ' std:' + str(round(prob_sample.std(),2))) + plt.savefig(dir_probs + disease +version+'/' + 'probImSick_'+str(directory)+'_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) + plt.show() + nr_list += len(dir_list) + + +#Pixel-wise cluster assignments for sick patients +if True: #sc_fac == 1: + assignment_im_sick = [] + for assignment in assignment_list_sick: + cluster_assignment = assignment.astype(int) + assignment_im_sick += [cluster_assignment.reshape(r,c)] + + nr_list = 0 + for directory in dir_list_sick: + in_dir = dir_sick + directory + '/' + dir_list = [dI for dI in os.listdir(in_dir) if dI[0:len(base_name)]==base_name] + print(dir_list) + im_all_sick = [] + im_all_sick += assignment_im_sick[nr_list:nr_list+len(dir_list)] + im_all_sick += max_im_list_sick[nr_list:nr_list+len(dir_list)] + fig, axs = plt.subplots(2,len(dir_list), figsize = (30,10), sharex=True, sharey=True) + for ax, im in zip(axs.ravel(), im_all_sick): + if ( im.ndim == 2 ): + #ax.imshow(skimage.transform.resize(im, (1024,1024), order=0, preserve_range = True),cmap=plt.cm.gist_ncar,vmin = 0,vmax = nr_clusters-1) + ax.imshow(im,cmap=plt.cm.gist_ncar,vmin = 0,vmax = nr_clusters-1) + else: + #ax.imshow(skimage.transform.resize(im.astype(np.uint8), (1024,1024))) + ax.imshow(im.astype(np.uint8)) + plt.suptitle('Sample '+str(directory)) + plt.savefig(dir_probs + disease + '/' + 'assignmentImSick_'+str(directory)+'_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) + plt.show() + nr_list += len(dir_list) + +endTime_probs = datetime.now() - startTime_probs +print(endTime_probs) + +#%% Print computational time +endTime = datetime.now() - startTime +print(endTime) + +#%% Box plots +fig, ax = plt.subplots(figsize = (15,5)) +ax.set_title('Probability of scans capturing healthy tissue') +array_probs = np.squeeze(np.asarray(prob_im_list)) +bp_healthy = ax.boxplot((array_probs[:12,...]).T, positions = range(12), patch_artist = True) +bp_sick = ax.boxplot(array_probs[12:,...].T, positions = range(13,25), patch_artist = True) + +for element in ['boxes', 'whiskers', 'fliers', 'means', 'medians', 'caps']: + plt.setp(bp_healthy[element], color='black') + +for patch in bp_healthy['boxes']: + patch.set(facecolor='red') + +for element in ['boxes', 'whiskers', 'fliers', 'means', 'medians', 'caps']: + plt.setp(bp_sick[element], color='black') + +for patch in bp_sick['boxes']: + patch.set(facecolor='blue') + +ax.set_xticklabels([i[-4:] for i in dir_list_control]+[i[-4:] for i in dir_list_sick]) +ax.set_ylim(0.35,0.65) +ax.set_ylabel('Probability health') +#ax.set_ylim(0,1) + +ax.tick_params( + axis='both', # changes apply to the x-axis + which='both', # both major and minor ticks are affected + #labelleft = False, # ticks along the top edge are off + #labelbottom=False + ) + +if disease == 'emphysema': + #overlay FEV1 values + ax2 = ax.twinx() + ax2.set_ylabel('FEV1') + FEV1 = np.array([99.00, 98.90,np.nan,116.00,118.70,99.70, 26, 27.20, 31.00, 25.00, 22.00, 33.70]) + patients = np.concatenate((np.arange(0.5, 12.5, 2), np.arange(13.5,25.5,2))) + ax2.plot(patients,FEV1,'kD') + ax2.set_ylim(-20,160) + +plt.savefig(dir_probs + 'boxPlots'+version+'_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) + +if disease == 'sarcoidosis': + #overlay FEV1 values + ax2 = ax.twinx() + ax2.set_ylabel('FVC') + FVC = np.array([99.00, 115.60,np.nan,121.00,118.70,119.50,60.20,54.00,75.30,27.00,48.50,40.30]) + patients = np.concatenate((np.arange(0.5, 12.5, 2), np.arange(13.5,25.5,2))) + ax2.plot(patients,FVC,'kD') + ax2.set_ylim(-20,160) + +plt.savefig(dir_probs + 'boxPlots'+version+'_%dclusters_%ddownscale_%dpatchsize.png'%(nr_clusters,1/sc_fac,patch_size), dpi=300) + +#%% Plot lung functions from the clinic + +# cmap = plt.cm.bwr +# it = 0 +# for im in prob_im_control: +# im_tmp = skimage.transform.resize(im,(1024,1024),order=3) +# norm = plt.Normalize(vmin=0, vmax=1) +# im_out = cmap(norm(im_tmp)) +# out_name = '../results/prob_map/control_%02d.png' % int(it) +# skimage.io.imsave(out_name, (255*im_out).astype(np.uint8)) +# it += 1 + +# it = 0 +# for im in prob_im_sick: +# im_tmp = skimage.transform.resize(im,(1024,1024),order=3) +# norm = plt.Normalize(vmin=0, vmax=1) +# im_out = cmap(norm(im_tmp)) +# out_name = '../results/prob_map/sick_%02d.png' % int(it) +# skimage.io.imsave(out_name, (255*im_out).astype(np.uint8)) +# it += 1 + +# #%% +# max_im_list_control = ma.compute_max_im_list(dir_control, '.png', 'frame', 1) +# max_im_list_sick = ma.compute_max_im_list(dir_sick, '.png', 'frame', 1) +# #%% +# it = 0 +# for im in max_im_list_control: +# out_name = '../results/prob_map/control_max_im_%02d.png' % int(it) +# skimage.io.imsave(out_name, (im).astype(np.uint8)) +# it += 1 + +# it = 0 +# for im in max_im_list_sick: +# out_name = '../results/prob_map/sick_max_im_%02d.png' % int(it) +# skimage.io.imsave(out_name, (im).astype(np.uint8)) +# it += 1 + + + + + + diff --git a/code/lung_lif_to_png.py b/code/lung_lif_to_png.py new file mode 100644 index 0000000000000000000000000000000000000000..76275fd304d966b5e68ebf9926cab492566f0b33 --- /dev/null +++ b/code/lung_lif_to_png.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Transform lif files to png for easy access and visualization +""" + + +import numpy as np +from readlif.reader import LifFile +import skimage.io +import os +import microscopy_analysis as ma + +#%% +# folder containing lif files +in_dir = '../../data101220/'# data_October2020/sarcoidosis/' #'../../5April2020_control/' #healthy #'../../24December19_emphysema/' #sick +# output folder +out_dir = '../data_corrected_bis/sarcoidosis/' #control/ #/emphysema/ +file_names = [f for f in os.listdir(in_dir) if f.endswith('lif')] +file_names = ma.sort_dirlist_patient(file_names) + +# make output directory if it does not exist +if not os.path.exists(out_dir): + os.mkdir(out_dir) + +# run through all lif files and save as png for easy access and visualization +for file in file_names: + lif_in = LifFile(in_dir + file) + n_im = lif_in.num_images + dir_name_out = (out_dir + (file.split('.')[0]).replace(' ', '_') + '/') + if not os.path.exists(dir_name_out): + os.mkdir(dir_name_out) + for n in range(0,n_im): + im_in = lif_in.get_image(n) + r,c,n_frames = im_in.dims[:3] + n_ch = im_in.channels + if n_ch == 3: + dir_name_out_im = dir_name_out + ('/frame_%02d/' % n) + if not os.path.exists(dir_name_out_im): + os.mkdir(dir_name_out_im) + im_out = np.zeros((r,c,n_ch), dtype='uint8') + for i in range(0,n_frames): + for j in range(0,n_ch): + im_out[:,:,j] = np.array(im_in.get_frame(i,0,j)).transpose() + skimage.io.imsave(dir_name_out_im + '/image_%02d.png' % i, im_out) + diff --git a/code/microscopy_analysis.py b/code/microscopy_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..90acc0bb5aeb05aea76b603e5e43a95c5105fecd --- /dev/null +++ b/code/microscopy_analysis.py @@ -0,0 +1,406 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Helping functions for analysis of lif microscopy images +""" + +import os +import skimage.io as io +import skimage.transform +import numpy as np +import local_features as lf + +import matplotlib.pyplot as plt + + +#%% General functions + +# sort list of directories from selected string position +def sort_strings_customStart(strings, string_start_pos = 0): + """Sorts list of strings + + Input(s) + strings: list of strings + string_start_pos (default 0): defines the section of the string on which the ordering is based. + + Output(s) + strings_sortered: list of sorted strings + + Author: Monica Jane Emerson (monj@dtu.dk)""" + + strings_roi = [i[string_start_pos:] for i in strings] + #key indicates that the indices (k) are sorted according to subjects_list[k] + ind = np.array(sorted(range(len(strings_roi)), key=lambda k: strings_roi[k])) + strings_sorted = [strings[i] for i in ind] + + return strings_sorted + +#Provide the contents of a directory sorted +def listdir_custom(directory, string_start_pos = 0, dir_flag = False, base_name = False): + 'monj@dtu.dk' + + if dir_flag: + if base_name: + list_dirs = [dI for dI in os.listdir(directory) if (os.path.isdir(os.path.join(directory,dI)) & dI[0:len(base_name)]==base_name)] + else: + list_dirs = [dI for dI in os.listdir(directory) if os.path.isdir(os.path.join(directory,dI))] + else: + if base_name: + list_dirs = [dI for dI in os.listdir(directory)if dI[0:len(base_name)]==base_name] + else: + list_dirs = [dI for dI in os.listdir(directory)] + + listdirs_sorted = sort_strings_customStart(list_dirs,string_start_pos) + + return listdirs_sorted + +def flatten_list(list): + 'monj@dtu.dk' + + list_flat = [item for sublist in list for item in sublist] + + return list_flat + +#%% IO functions + +#make directory, and subdirectories within, if they don't exist +def make_output_dirs(directory,subdirectories = False): + 'monj@dtu.dk' + + os.makedirs(directory, exist_ok = True) + + if subdirectories: + for subdir in subdirectories: + os.makedirs(directory + subdir + '/', exist_ok = True) + + # def make_output_dirs(directory,subdirectories): + # 'monj@dtu.dk' + + # os.makedirs(directory, exist_ok = True) + # os.makedirs(directory + 'control/', exist_ok = True) + + # for disease in diseases: + # os.makedirs(directory + disease + '/', exist_ok = True) + + +#Reads images starting with base_name from the subdirectories of the input directory +def read_max_imgs(dir_condition, base_name, sc_fac = 1, colour_mode = 'colour'): + 'monj@dtu.dk' + + sample_list = listdir_custom(dir_condition, string_start_pos = -4, dir_flag = True) + #print(sample_list) + + max_img_list = [] + for sample in sample_list: + sample_dir = dir_condition + '/' + sample + '/' + frame_list = listdir_custom(sample_dir, base_name = base_name) + #print(frame_list) + + frame_img_list = [] + for frame in frame_list: + frame_path = sample_dir + frame + + #Option to load in bnw or colour + if colour_mode == 'bnw': + img = io.imread(frame_path, as_gray = True).astype('uint8') + frame_img_list += [skimage.transform.rescale(img, sc_fac, preserve_range = True).astype('uint8')] + else: + img = io.imread(frame_path).astype('uint8') + #print(img.dtype) + frame_img_list += [skimage.transform.rescale(img, sc_fac, preserve_range = True, multichannel=True).astype('uint8')] + + max_img_list += [frame_img_list] + #print(frame_img_list[0].dtype) + + return max_img_list + +#%% Functions for intensity inspection and image preprocessing + +# computes the maximum projection image from a directory of images +def get_max_img(in_dir, ext = '.png', n_img = 0): + file_names = [f for f in os.listdir(in_dir) if f.endswith(ext)] + file_names.sort() + if ( n_img < 1 ): + n_img = len(file_names) + img_in = io.imread(in_dir + file_names[0]) + for i in range(1,n_img): + img_in = np.maximum(img_in, io.imread(in_dir + file_names[i])) + return img_in + +# computes a list of maximum projection images from a list of directories +def compute_max_img_list(in_dir, ext, base_name, dir_out = ''): + """by abda + Modified by monj""" + + dir_list = [dI for dI in os.listdir(in_dir) if (os.path.isdir(os.path.join(in_dir,dI)) and dI[0:len(base_name)]==base_name)] + dir_list.sort() + + max_img_list = [] + for d in dir_list: + image_dir_in = in_dir + d + '/' + max_img = get_max_img(image_dir_in, ext) + if dir_out!='': + os.makedirs(dir_out, exist_ok = True) + io.imsave(dir_out + d + '.png', max_img.astype('uint8')) + + max_img_list += [max_img] + + return max_img_list + +# One more level up from compute_max_img_list. Computes a list of lists of maximum +#projection images from a list of directories, each containing a list of directories +#with the set of images that should be combined into a maximum projection. +def comp_max_imgs(dir_condition, base_name, dir_out = ''): + 'monj@dtu.dk' + + dir_list_condition = listdir_custom(dir_condition, string_start_pos = -4, dir_flag = True) + + max_img_list_condition = [] + for directory in dir_list_condition: + dir_in = dir_condition + '/' + directory + '/' + + if dir_out!= '': + subdir_out = dir_in.replace(dir_condition,dir_out) + else: + subdir_out = '' + + max_img_condition = compute_max_img_list(dir_in, '.png', base_name, subdir_out) + max_img_list_condition+= [max_img_condition] + + return max_img_list_condition + + +#TO DO: Eliminate histogram part from this function +def comp_std_perChannel(max_im_list, th_int, dir_condition): + 'monj@dtu.dk' + + sample_list = listdir_custom(dir_condition, string_start_pos = -4, dir_flag = True) + + std_list = [[],[],[]] + for sample, frame_img_list in zip(sample_list, max_im_list): + + for ind,img in enumerate(frame_img_list): + h, w, channels = img.shape + + for channel in range(0,channels): + intensities = img[:,:,channel].ravel() + std_list[channel] += [(intensities[intensities>th_int]).std()] + + return std_list + +def intensity_spread_normalisation(img_list, th_int, mean_std, dir_condition, base_name, dir_results): + 'monj@dtu.dk' + sample_list = listdir_custom(dir_condition, string_start_pos = -4, dir_flag = True) + #print(sample_list) + + img_corr_list = [] + for sample, sample_imgs in zip(sample_list, img_list): + sample_dir = dir_condition + '/' + sample + '/' + #print(sample_dir) + frame_list = listdir_custom(sample_dir, base_name = base_name) + #print(frame_list) + frame_img_corr_list = [] + for frame,img in zip(frame_list,sample_imgs): + h, w, channels = img.shape + + #img_corr = np.empty(img.shape,dtype = 'uint8') + img_corr = img + for channel in range(0,channels): + img_channel = img_corr[:,:,channel] + img_channel[img_channel>th_int] = img_channel[img_channel>th_int]*(mean_std/img_channel.std()) + img_corr[:,:,channel] = img_channel + + frame_img_corr_list += [img_corr] + os.makedirs(dir_results + '/' + sample, exist_ok = True) + io.imsave(dir_results + '/' + sample + '/' + frame +'.png', img_corr) + + img_corr_list += [frame_img_corr_list] + + return img_corr_list + +#def plotHist_perChannel_imgset +def plotHist_perChannel_imgset_list(max_img_list, dir_condition, dir_results = '', name_tag = 'original'): + 'monj@dtu.dk' + + sample_list = listdir_custom(dir_condition, string_start_pos = -4, dir_flag = True) + + for sample, frame_img_list in zip(sample_list, max_img_list): + fig, axs = plt.subplots(4,len(frame_img_list), figsize = (len(frame_img_list)*2,4*2)) + plt.suptitle('Sample '+sample[-4:] + ', acq. date: ' + sample[:6]) + + for ind,img in enumerate(frame_img_list): + h, w, channels = img.shape + axs[0][ind].imshow(img) + + for channel in range(0,channels): + intensities = img[:,:,channel].ravel() + axs[channel+1][ind].hist(intensities, bins = 50) + axs[channel+1][ind].set_aspect(1.0/axs[channel+1][ind].get_data_ratio()) + + if dir_results!= '': + plt.savefig(dir_results + '/' + sample[-4:] + '_' + name_tag + '_perChannelHistograms.png', dpi = 300) + plt.close(fig) + +#def compare_imgpairs +def compare_imgpairs_list(list1_imgsets, list2_imgsets, dir_condition, dir_results = ''): + 'monj@dtu.dk' + + sample_list = listdir_custom(dir_condition, string_start_pos = -4, dir_flag = True) + + for imgset1, imgset2, sample in zip(list1_imgsets, list2_imgsets, sample_list): + fig, axs = plt.subplots(2,len(imgset1), figsize = (2*len(imgset1),2*2),sharex=True,sharey=True) + plt.suptitle('Sample '+sample[-4:] + ', acq. date: ' + sample[:6]) + + for ind, img1, img2 in zip(range(0,len(imgset1)), imgset1, imgset2): + axs[0][ind].imshow(img1) + axs[1][ind].imshow(img2) + + if dir_results!= '': + plt.savefig(dir_results + '/' + sample[-4:] + '_originalVScorrected.png', dpi=300) + plt.close(fig) + +#%%Functions for Feature analysis + +# computes the max projection image and the features into a list +def compute_max_img_feat(in_dir, ext, base_name, sigma, sc_fac, save = False, abs_intensity = True): + dir_list = [dI for dI in os.listdir(in_dir) if (os.path.isdir(os.path.join(in_dir,dI)) and dI[0:len(base_name)]==base_name)] + dir_list.sort() + max_img_list = [] + for d in dir_list: + image_dir = in_dir + d + '/' + max_img = get_max_img(image_dir, ext) + + if save: + dir_name_out_img = in_dir.replace('data','maxProjImages') + os.makedirs(dir_name_out_img, exist_ok = True) + + io.imsave( dir_name_out_img + d + '.png', max_img.astype('uint8')) + + max_img_list += [skimage.transform.rescale(max_img, sc_fac, multichannel=True)] + + feat_list = [] + for max_img in max_img_list: + r,c = max_img.shape[:2] + feat = np.zeros((r*c,45)) + for i in range(0,3): + feat_tmp = lf.get_gauss_feat_im(max_img[:,:,i], sigma) + feat[:,i*15:(i+1)*15] = feat_tmp.reshape(-1,feat_tmp.shape[2]) + if not(abs_intensity): + feat_list += [np.concatenate((feat[:,1:15],feat[:,16:30],feat[:,31:45]),axis = 1)] + else: + feat_list += [feat] + return max_img_list, feat_list + + +# computes a histogram of features from a kmeans object +def compute_assignment_hist(feat_list, kmeans, background_feat = None): + assignment_list = [] + for feat in feat_list: + assignment_list += [kmeans.predict(feat)] + edges = np.arange(kmeans.n_clusters+1)-0.5 # histogram edges halfway between integers + hist = np.zeros(kmeans.n_clusters) + for a in assignment_list: + hist += np.histogram(a,bins=edges)[0] + + sum_hist = np.sum(hist) + hist/= sum_hist + + if background_feat is not None: + assignment_back = kmeans.predict(background_feat) + hist_back = np.histogram(assignment_back,bins=edges)[0] + return hist, assignment_list, hist_back, assignment_back + else: + return hist, assignment_list + + +# image to array of patches +def im2col(A, BSZ, stepsize=1, norm=False): + # Parameters + m,n = A.shape + s0, s1 = A.strides + nrows = m-BSZ[0]+1 + ncols = n-BSZ[1]+1 + shp = BSZ[0],BSZ[1],nrows,ncols + strd = s0,s1,s0,s1 + + out_view = np.lib.stride_tricks.as_strided(A, shape=shp, strides=strd) + out_view_shaped = out_view.reshape(BSZ[0]*BSZ[1],-1)[:,::stepsize] + if norm: + one_norm = np.sum(out_view_shaped,axis=0) + ind_zero_norm = np.where(one_norm !=0) + out_view_shaped[:,ind_zero_norm] = 255*out_view_shaped[:,ind_zero_norm]/one_norm[ind_zero_norm] + return out_view_shaped + +# nd image to array of patches +def ndim2col(A, BSZ, stepsize=1, norm=False): + if(A.ndim == 2): + return im2col(A, BSZ, stepsize, norm) + else: + r,c,l = A.shape + patches = np.zeros((l*BSZ[0]*BSZ[1],(r-BSZ[0]+1)*(c-BSZ[1]+1))) + for i in range(l): + patches[i*BSZ[0]*BSZ[1]:(i+1)*BSZ[0]*BSZ[1],:] = im2col(A[:,:,i],BSZ,stepsize,norm) + return patches + +# nd image to array of patches with mirror padding along boundaries +def ndim2col_pad(A, BSZ, stepsize=1, norm=False): + r,c = A.shape[:2] + if (A.ndim == 2): + l = 1 + else: + l = A.shape[2] + tmp = np.zeros((r+BSZ[0]-1,c+BSZ[1]-1,l)) + fhr = int(np.floor(BSZ[0]/2)) + fhc = int(np.floor(BSZ[1]/2)) + thr = int(BSZ[0]-fhr-1) + thc = int(BSZ[1]-fhc-1) + + tmp[fhr:fhr+r,fhc:fhc+c,:] = A.reshape((r,c,l)) + tmp[:fhr,:] = np.flip(tmp[fhr:fhr*2,:], axis=0) + tmp[fhr+r:,:] = np.flip(tmp[r:r+thr,:], axis=0) + tmp[:,:fhc] = np.flip(tmp[:,fhc:fhc*2], axis=1) + tmp[:,fhc+c:] = np.flip(tmp[:,c:c+thc], axis=1) + tmp = np.squeeze(tmp) + return ndim2col(tmp,BSZ,stepsize,norm) + + + +# def plot_mapsAndimages(dir_condition, directory_list, map_img, max_img_list, base_name = 'frame', r = 1024, c = 1024): +# nr_list = 0 +# for directory in directory_list: +# in_dir = dir_condition + directory + '/' +# dir_list = [dI for dI in os.listdir(in_dir) if (os.path.isdir(os.path.join(in_dir ,dI)) and dI[0:len(base_name)]==base_name)] +# print(dir_list) +# img_all_control = [] +# img_all_control += map_img[nr_list:nr_list+len(dir_list)] +# img_all_control += max_img_list[nr_list:nr_list+len(dir_list)] +# fig, axs = plt.subplots(2,len(dir_list), sharex=True, sharey=True) +# nr = 0 +# prob_sample = np.zeros((len(dir_list),1)) +# for ax, img in zip(axs.ravel(), img_all_control): +# print(nr) +# if nr<len(dir_list): +# prob_img = sum(sum(img))/(r*c) +# ax.set_title('Pc '+str(round(prob_img,2))) +# prob_sample[nr] = prob_img +# #print(prob_sample) +# nr += 1 +# if ( img.ndimg == 2 ): +# ax.imshow(skimage.transform.resize(img, (1024,1024)), cmap=plt.cm.bwr, vmin = 0, vmax = 1) +# else: +# ax.imshow(skimage.transform.resize(img.astype(np.uint8), (1024,1024))) +# plt.suptitle('Probability control of sample '+str(directory)+', avg:' + str(round(prob_sample.mean(),2))+ ' std:' + str(round(prob_sample.std(),2))) +# plt.savefig(dir_probs + '/control/' + '/probImControl_'+str(directory)+'_%dclusters_%dsigma%ddownscale_absInt%s.png'%(nr_clusters,sigma,1/sc_fac,abs_intensity), dpi=1000) +# plt.show() +# nr_list += len(dir_list) + + + + + + + + + + +