Skip to content
Snippets Groups Projects
image_preprocessing.py 5.61 KiB
Newer Older
  • Learn to ignore specific revisions
  • monj's avatar
    monj committed
    #%% #!/usr/bin/env python3
    # -*- coding: utf-8 -*-
    """
    Preprocessing of the images
    
    1. Compute maximum projections.
    2. Explore the intensities of the maximum projection images.
    3. (optional) Correct the intensities. Normalisation across channels and images.
    Equalise the standard deviation for intensity values that contain structures.
    The air in the backround is removed with a manual threshold which is constant across
    images and channels.
    
    Created on Wed Oct  7 10:34:52 2020
    
    @author: monj
    """
    import os
    import microscopy_analysis as ma
    import matplotlib.pyplot as plt
    from datetime import datetime
    import numpy as np
    import skimage.io as io
    import copy
    
    # Turn interactive plotting off
    plt.ioff()
    
    plt.close('all')
    
    start_time_global = datetime.now()
    
    #%% I/O directories
    
    diseases = ['emphysema','diseased', 'sarcoidosis']
    conditions = diseases[:]
    conditions.insert(0,'control')
    
    version = 'corrected_bis' #data identifier: My versions were called: blank, corrected or corrected_bis
    preprocessing = 'preprocessed_ignback/'
    
    # input directories - images are stored in folders starting with the name 'frame'
    dir_in = '../data_' + version+'/'
    base_name = 'frame'
    
    #output directories: max projections and intensity inspection
    dir_maxProj = '../maxProjImages_'+version+'/'
    
    dir_maxProj_raw = dir_maxProj + 'raw/'
    ma.make_output_dirs(dir_maxProj_raw,conditions)
    
    dir_maxProj_processed = dir_maxProj + preprocessing #rerun just to make sure not to muck up the results
    ma.make_output_dirs(dir_maxProj_processed,conditions)
        
    dir_results_intinspect = dir_maxProj + preprocessing + 'intensity_inspection/' #rerun just to make sure not to muck up the results
    ma.make_output_dirs(dir_results_intinspect,conditions)
    
    #%% Get maximum projection images
    
    if len(os.listdir(dir_maxProj_raw + 'control/'))<2:
        #Compute maximum projection images (runtime ~= 135 s)
        print('Computing maximum projection images')
        startTime = datetime.now()
        max_img_list = []
        for condition in conditions:
            max_img_list += [ma.comp_max_imgs(dir_in + condition, base_name, dir_maxProj_raw + condition)]
        endTime = datetime.now() - startTime
        print(endTime)
    else:
        #Read maximum projection images (runtime ~= 20 s )
        print('Reading maximum projection images')
        startTime = datetime.now()
        max_img_list = []
        for condition in conditions:
            max_img_list += [ma.read_max_imgs(dir_maxProj_raw + condition, base_name)]
        endTime = datetime.now() - startTime
        print(endTime)
    
    #%% Plot histograms of raw images
    
    #Takes approx. 29s per condition
    for condition, max_img_list_condition in zip(conditions, max_img_list):
        ma.plotHist_perChannel_imgset_list(max_img_list_condition, dir_in + condition, dir_results_intinspect + condition, name_tag = 'original')
    
    #%% Measure the spread of intensities in each channel
    """The spread of intensities is quantified by the standard deviation of the 
    intensities that are over a threshold. The threshold value is set manually 
    and it defines the minimum intensity value that is associated with the
    presence of protein. Image regions with intensity values under the threshold
    are considered as air, i.e. absence of protein."""
    
    #Protein presence/absence (air) intensity threshold
    th_int = 5 #could it be set automatically based on the data?
    
    #Compute stds per image channel and,
    #for each sample, make a figure with the intensity histograms per image channel.
    std_list = []
    for condition, max_img_list_condition in zip(conditions, max_img_list):
        std_list += [ma.comp_std_perChannel(max_img_list_condition, th_int, dir_in + condition)] 
    
    # histogram of the intensity-spread per image channel 
    fig, axs = plt.subplots(len(conditions),4, figsize = (4*2,len(conditions)*2), sharex = True, sharey = True)
    plt.suptitle('Distribution of intensity spread across images\n Rows: ' +  ', '.join(conditions))
    for ind_c,std_list_condition in enumerate(std_list):  
        for ind,channel_list in enumerate(std_list_condition):
            axs[0][ind+1].title.set_text('Channel ' + str(ind+1))
            axs[ind_c][ind+1].hist(channel_list, bins = 50, density = True)
      
    # histogram of the intensity-spread per image  
    axs[0][0].title.set_text('All channels')
    std_list_flat = []
    for ind_c,std_list_condition in enumerate(std_list):
        std_list_condition_flat = ma.flatten_list(std_list_condition) 
        axs[ind_c][0].hist(std_list_condition_flat, bins = 50, density = True)    
        std_list_flat += std_list_condition_flat
    plt.show()    
        
    #Average intesity-spread across al image channels
    mean_std = sum(std_list)/len(std_list)
    print('The average spread of an image channel is ' + str(round(mean_std)))
     
    #%% Correct intensities
    #Consider not using th_int when correcting, only when computing the spread
    
    #Takes 34s per conditoin
    max_img_corr_list = []
    for condition, max_img_list_condition in zip(conditions, max_img_list):
        max_img_corr_list += [ma.intensity_spread_normalisation(copy.deepcopy(max_img_list_condition), th_int, mean_std, dir_in + condition, base_name, dir_maxProj_processed + condition)]
    
    #%% Plot histograms of corrected images
    
    #Takes 29s
    for condition, max_img_corr_list_condition in zip(conditions, max_img_corr_list):
       ma.plotHist_perChannel_imgset_list(max_img_corr_list_condition, dir_in + condition, dir_results_intinspect + condition, name_tag = 'corrected')
    
    #%% Visualise raw vs. corrected images
    
    for max_img_list_condition, max_img_corr_list_condition, condition in zip(max_img_list, max_img_corr_list, conditions):
        ma.compare_imgpairs_list(max_img_list_condition, max_img_corr_list_condition , dir_in + condition, dir_results_intinspect + condition)  
    
    #%%% Print total computational time
    
    end_time_global = datetime.now() - startTime
    print(end_time_global)