diff --git a/FeatureEstimation.py b/FeatureEstimation.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ea34d005991cd0361cf91f9b51c4c9865d8a165
--- /dev/null
+++ b/FeatureEstimation.py
@@ -0,0 +1,2733 @@
+# -*- coding: utf-8 -*-
+"""
+Updated Oct 18 2022
+
+@author: Qianliang Li (glia@dtu.dk)
+
+This script contains the code to estimate the following EEG features:
+    1. Power Spectral Density
+    2. Frontal Theta/Beta Ratio
+    3. Asymmetry
+    4. Peak Alpha Frequency
+    5. 1/f Exponents
+    6. Microstates
+    7. Long-Range Temporal Correlation (DFA Exponent)
+Source localization and functional connectivity
+    8. Imaginary part of Coherence
+    9. Weighted Phase Lag Index
+    10. (Orthogonalized) Power Envelope Correlations
+    11. Granger Causality
+
+It should be run after Preprocessing.py
+
+All features are saved in pandas DataFrame format for the machine learning
+pipeline
+
+Note that the code has not been changed to fit the demonstration dataset,
+thus just running it might introduce some errors.
+The code is provided to show how we performed the feature estimation
+and if you are adapting the code, you should check if it fits your dataset
+e.g. the questionnaire data, sensors and source parcellation etc
+
+The script was written in Spyder. The outline panel can be used to navigate
+the different parts easier (Default shortcut: Ctrl + Shift + O)
+"""
+
+# Set working directory
+import os
+wkdir = "/home/glia/EEG"
+os.chdir(wkdir)
+
+# Load all libraries from the Preamble
+from Preamble import *
+
+# %% Load preprocessed epochs and questionnaire data
+load_path = "./PreprocessedData"
+
+# Get filenames
+files = []
+for r, d, f in os.walk(load_path):
+    for file in f:
+        if ".fif" in file:
+            files.append(os.path.join(r, file))
+files.sort()
+
+# Subject IDs
+Subject_id = [0] * len(files)
+for i in range(len(files)):
+    temp = files[i].split("\\")
+    temp = temp[-1].split("_")
+    Subject_id[i] = int(temp[0])
+
+n_subjects = len(Subject_id)
+
+# Load Final epochs
+final_epochs = [0]*n_subjects
+for n in range(n_subjects):
+    final_epochs[n] = mne.read_epochs(fname = os.path.join(files[n]),
+                    verbose=0)
+
+# Load dropped epochs - used for gap idx in microstates
+bad_subjects = [12345] # list with subjects that were excluded due to too many dropped epochs/chs
+Drop_epochs_df = pd.read_pickle("./Preprocessing/dropped_epochs.pkl")
+
+good_subject_df_idx = [not i in bad_subjects for i in Drop_epochs_df["Subject_ID"]]
+Drop_epochs_df = Drop_epochs_df.loc[good_subject_df_idx,:]
+Drop_epochs_df = Drop_epochs_df.sort_values(by=["Subject_ID"]).reset_index(drop=True)
+
+### Load questionnaire data
+# For the purposes of this demonstration I will make a dummy dataframe
+# The original code imported csv files with questionnaire data and group status
+final_qdf = pd.DataFrame({"Subject_ID":Subject_id,
+                          "Age":[23,26],
+                          "Gender":[0,0],
+                          "Group_status":[0,1],
+                          "PCL_total":[33,56],
+                          "Q1":[1.2, 2.3],
+                          "Q2":[1.7, 1.5],
+                          "Qn":[2.1,1.0]})
+
+# Define cases as >= 44 total PCL
+# Type: numpy array with subject id
+cases = np.array(final_qdf["Subject_ID"][final_qdf["PCL_total"]>=44])
+n_groups = 2
+Groups = ["CTRL", "PTSD"]
+
+# Define folder for saving features
+Feature_savepath = "./Features/"
+Stat_savepath = "./Statistics/"
+Model_savepath = "./Model/"
+
+# %% Power spectrum features
+Freq_Bands = {"delta": [1.25, 4.0],
+              "theta": [4.0, 8.0],
+              "alpha": [8.0, 13.0],
+              "beta": [13.0, 30.0],
+              "gamma": [30.0, 49.0]}
+ch_names = final_epochs[0].info["ch_names"]
+n_channels = final_epochs[0].info["nchan"]
+
+# Pre-allocate memory
+power_bands = [0]*len(final_epochs)
+
+def power_band_estimation(n):
+    # Get index for eyes open and eyes closed
+    EC_index = final_epochs[n].events[:,2] == 1
+    EO_index = final_epochs[n].events[:,2] == 2
+    
+    # Calculate the power spectral density
+    psds, freqs = psd_multitaper(final_epochs[n], fmin = 1, fmax = 50) # output (epochs, channels, freqs)
+    
+    temp_power_band = []
+    
+    for fmin, fmax in Freq_Bands.values():
+        # Calculate the power each frequency band
+        psds_band = psds[:, :, (freqs >= fmin) & (freqs < fmax)].sum(axis=-1)
+        # Calculate the mean for each eye status
+        psds_band_eye = np.array([psds_band[EC_index,:].mean(axis=0),
+                                      psds_band[EO_index,:].mean(axis=0)])
+        # Append for each freq band
+        temp_power_band.append(psds_band_eye)
+        # Output: List with the 5 bands, and each element is a 2D array with eye status as 1st dimension and channels as 2nd dimension
+    
+    # The list is reshaped and absolute and relative power are calculated
+    abs_power_band = np.reshape(temp_power_band, (5, 2, n_channels))
+    abs_power_band = 10.*np.log10(abs_power_band) # Convert to decibel scale
+    
+    rel_power_band = np.reshape(temp_power_band, (5, 2, n_channels))
+    rel_power_band = rel_power_band/np.sum(rel_power_band, axis=0, keepdims=True)
+    # each eye condition and channel have been normalized to power in all 5 frequencies for that given eye condition and channel
+    
+    # Make one list in 1 dimension
+    abs_power_values = np.concatenate(np.concatenate(abs_power_band, axis=0), axis=0)
+    rel_power_values = np.concatenate(np.concatenate(rel_power_band, axis=0), axis=0)
+    ## Output: First the channels, then the eye status and finally the frequency bands are concatenated
+    ## E.g. element 26 is 3rd channel, eyes open, first frequency band
+    #assert abs_power_values[26] == abs_power_band[0,1,2]
+    #assert abs_power_values[47] == abs_power_band[0,1,23] # +21 channels to last
+    #assert abs_power_values[50] == abs_power_band[1,0,2] # once all channels have been changed then the freq is changed and eye status
+    
+    # Get result
+    res = np.concatenate([abs_power_values,rel_power_values],axis=0)
+    return n, res
+
+with concurrent.futures.ProcessPoolExecutor() as executor:
+    for n, result in executor.map(power_band_estimation, range(len(final_epochs))): # Function and arguments
+        power_bands[n] = result
+
+# Combine all data into one dataframe
+# First the columns are prepared
+n_subjects = len(Subject_id)
+
+# The group status (PTSD/CTRL) is made using the information about the cases
+Group_status = np.array(["CTRL"]*n_subjects)
+Group_status[np.array([i in cases for i in Subject_id])] = "PTSD"
+
+# A variable that codes the channels based on A/P localization is also made
+Frontal_chs = ["Fp1", "Fpz", "Fp2", "AFz", "Fz", "F3", "F4", "F7", "F8"]
+Central_chs = ["Cz", "C3", "C4", "T7", "T8", "FT7", "FC3", "FCz", "FC4", "FT8", "TP7", "CP3", "CPz", "CP4", "TP8"]
+Posterior_chs = ["Pz", "P3", "P4", "P7", "P8", "POz", "O1", "O2", "Oz"]
+
+Brain_region_labels = ["Frontal","Central","Posterior"]
+Brain_region = np.array(ch_names, dtype = "<U9")
+Brain_region[np.array([i in Frontal_chs for i in ch_names])] = Brain_region_labels[0]
+Brain_region[np.array([i in Central_chs for i in ch_names])] = Brain_region_labels[1]
+Brain_region[np.array([i in Posterior_chs for i in ch_names])] = Brain_region_labels[2]
+
+# A variable that codes the channels based on M/L localization
+Left_chs = ["Fp1", "F3", "F7", "FC3", "FT7", "C3", "T7", "CP3", "TP7", "P3", "P7", "O1"]
+Right_chs = ["Fp2", "F4", "F8", "FC4", "FT8", "C4", "T8", "CP4", "TP8", "P4", "P8", "O2"]
+Mid_chs = ["Fpz", "AFz", "Fz", "FCz", "Cz", "CPz", "Pz", "POz", "Oz"]
+
+Brain_side = np.array(ch_names, dtype = "<U5")
+Brain_side[np.array([i in Left_chs for i in ch_names])] = "Left"
+Brain_side[np.array([i in Right_chs for i in ch_names])] = "Right"
+Brain_side[np.array([i in Mid_chs for i in ch_names])] = "Mid"
+
+# Eye status is added
+eye_status = list(final_epochs[0].event_id.keys())
+n_eye_status = len(eye_status)
+
+# Frequency bands
+freq_bands_name = list(Freq_Bands.keys())
+n_freq_bands = len(freq_bands_name)
+
+# Quantification (Abs/Rel)
+quant_status = ["Absolute", "Relative"]
+n_quant_status = len(quant_status)
+
+# The dataframe is made by combining all the unlisted pds values
+# Each row correspond to a different channel. It is reset after all channel names have been used
+# Each eye status element is repeated n_channel times, before it is reset
+# Each freq_band element is repeated n_channel * n_eye_status times, before it is reset
+# Each quantification status element is repeated n_channel * n_eye_status * n_freq_bands times, before it is reset
+power_df = pd.DataFrame(data = {"Subject_ID": [ele for ele in Subject_id for i in range(n_eye_status*n_quant_status*n_freq_bands*n_channels)],
+                                "Group_status": [ele for ele in Group_status for i in range(n_eye_status*n_quant_status*n_freq_bands*n_channels)],
+                                "Channel": ch_names*(n_eye_status*n_quant_status*n_freq_bands*n_subjects),
+                                "Brain_region": list(Brain_region)*(n_eye_status*n_quant_status*n_freq_bands*n_subjects),
+                                "Brain_side": list(Brain_side)*(n_eye_status*n_quant_status*n_freq_bands*n_subjects),
+                                "Eye_status": [ele for ele in eye_status for i in range(n_channels)]*n_quant_status*n_freq_bands*n_subjects,
+                                "Freq_band": [ele for ele in freq_bands_name for i in range(n_channels*n_eye_status)]*n_quant_status*n_subjects,
+                                "Quant_status": [ele for ele in quant_status for i in range(n_channels*n_eye_status*n_freq_bands)]*n_subjects,
+                                "PSD": list(np.concatenate(power_bands, axis=0))
+                                })
+# Absolute power is in decibels (10*log10(power))
+
+# Fix Freq_band categorical order
+power_df["Freq_band"] = power_df["Freq_band"].astype("category").\
+            cat.reorder_categories(list(Freq_Bands.keys()), ordered=True)
+# Fix Brain_region categorical order
+power_df["Brain_region"] = power_df["Brain_region"].astype("category").\
+            cat.reorder_categories(Brain_region_labels, ordered=True)
+
+# Save the dataframe
+power_df.to_pickle(os.path.join(Feature_savepath,"Power_df.pkl"))
+
+
+# %% Theta-beta ratio
+# Frontal theta/beta ratio has been implicated in cognitive control of attention
+power_df = pd.read_pickle(os.path.join(Feature_savepath,"Power_df.pkl"))
+
+eye_status = list(final_epochs[0].event_id)
+n_eye_status = len(eye_status)
+
+# Subset frontal absolute power
+power_df_sub1 = power_df[(power_df["Quant_status"] == "Absolute")&
+                         (power_df["Brain_region"] == "Frontal")]
+
+# Calculate average frontal power
+frontal_theta_mean_subject = power_df_sub1[power_df_sub1["Freq_band"] == "theta"].\
+    groupby(["Subject_ID","Group_status","Eye_status"]).mean().reset_index()
+frontal_beta_mean_subject = power_df_sub1[power_df_sub1["Freq_band"] == "beta"].\
+    groupby(["Subject_ID","Group_status","Eye_status"]).mean().reset_index()
+
+# Convert from dB to raw power
+frontal_theta_mean_subject["PSD"] = 10**(frontal_theta_mean_subject["PSD"]/10)
+frontal_beta_mean_subject["PSD"] = 10**(frontal_beta_mean_subject["PSD"]/10)
+
+# Calculate mean for each group and take ratio for whole group
+# To confirm trend observed in PSD plots
+mean_group_f_theta = frontal_theta_mean_subject.iloc[:,1:].groupby(["Group_status","Eye_status"]).mean()
+mean_group_f_beta = frontal_beta_mean_subject.iloc[:,1:].groupby(["Group_status","Eye_status"]).mean()
+mean_group_f_theta_beta_ratio = mean_group_f_theta/mean_group_f_beta
+
+# Calculate ratio for each subject
+frontal_theta_beta_ratio = frontal_theta_mean_subject.copy()
+frontal_theta_beta_ratio["PSD"] = frontal_theta_mean_subject["PSD"]/frontal_beta_mean_subject["PSD"]
+
+# Take the natural log of ratio 
+frontal_theta_beta_ratio["PSD"] = np.log(frontal_theta_beta_ratio["PSD"])
+
+# Rename and save feature
+frontal_theta_beta_ratio.rename(columns={"PSD":"TBR"},inplace=True)
+# Add dummy variable for re-using plot code
+dummy_variable = ["Frontal Theta Beta Ratio"]*frontal_theta_beta_ratio.shape[0]
+frontal_theta_beta_ratio.insert(3, "Measurement", dummy_variable )
+
+frontal_theta_beta_ratio.to_pickle(os.path.join(Feature_savepath,"fTBR_df.pkl"))
+
+# %% Frequency bands asymmetry
+# Defined as ln(right) - ln(left)
+# Thus we should only work with the absolute values and undo the dB transformation
+# Here I avg over all areas. I.e. mean((ln(F4)-ln(F3),(ln(F8)-ln(F7),(ln(Fp2)-ln(Fp1))) for frontal
+ROI = ["Frontal", "Central", "Posterior"]
+qq = "Absolute" # only calculate asymmetry for absolute
+# Pre-allocate memory
+asymmetry = np.zeros(shape=(len(np.unique(power_df["Subject_ID"])),
+                             len(np.unique(power_df["Eye_status"])),
+                             len(list(Freq_Bands.keys())),
+                             len(ROI)))
+
+def calculate_asymmetry(i):
+    ii = np.unique(power_df["Subject_ID"])[i]
+    temp_asymmetry = np.zeros(shape=(len(np.unique(power_df["Eye_status"])),
+                             len(list(Freq_Bands.keys())),
+                             len(ROI)))
+    for e in range(len(np.unique(power_df["Eye_status"]))):
+        ee = np.unique(power_df["Eye_status"])[e]
+        for f in range(len(list(Freq_Bands.keys()))):
+            ff = list(Freq_Bands.keys())[f]
+            
+            # Get the specific part of the df
+            temp_power_df = power_df[(power_df["Quant_status"] == qq) &
+                                     (power_df["Eye_status"] == ee) &
+                                     (power_df["Subject_ID"] == ii) &
+                                     (power_df["Freq_band"] == ff)].copy()
+            
+            # Convert from dB to raw power
+            temp_power_df.loc[:,"PSD"] = np.array(10**(temp_power_df["PSD"]/10))
+            
+            # Calculate the power asymmetry
+            for r in range(len(ROI)):
+                rr = ROI[r]
+                temp_power_roi_df = temp_power_df[(temp_power_df["Brain_region"] == rr)&
+                                                  ~(temp_power_df["Brain_side"] == "Mid")]
+                # Sort using channel names to make sure F8-F7 and not F4-F7 etc.
+                temp_power_roi_df = temp_power_roi_df.sort_values("Channel").reset_index(drop=True)
+                # Get the log power
+                R_power = temp_power_roi_df[(temp_power_roi_df["Brain_side"] == "Right")]["PSD"]
+                ln_R_power = np.log(R_power) # get log power
+                L_power = temp_power_roi_df[(temp_power_roi_df["Brain_side"] == "Left")]["PSD"]
+                ln_L_power = np.log(L_power) # get log power
+                # Pairwise subtraction followed by averaging
+                asymmetry_value = np.mean(np.array(ln_R_power) - np.array(ln_L_power))
+                # Save it to the array
+                temp_asymmetry[e,f,r] = asymmetry_value
+    # Print progress
+    print("{} out of {} finished testing".format(i+1,n_subjects))
+    return i, temp_asymmetry
+
+with concurrent.futures.ProcessPoolExecutor() as executor:
+    for i, res in executor.map(calculate_asymmetry, range(len(np.unique(power_df["Subject_ID"])))): # Function and arguments
+        asymmetry[i,:,:,:] = res
+
+# Prepare conversion of array to df using flatten
+n_subjects = len(Subject_id)
+
+# The group status (PTSD/CTRL) is made using the information about the cases
+Group_status = np.array(["CTRL"]*n_subjects)
+Group_status[np.array([i in cases for i in Subject_id])] = "PTSD"
+
+# Eye status is added
+eye_status = list(final_epochs[0].event_id.keys())
+n_eye_status = len(eye_status)
+
+# Frequency bands
+freq_bands_name = list(Freq_Bands.keys())
+n_freq_bands = len(freq_bands_name)
+
+# ROIs
+n_ROI = len(ROI)
+
+# Make the dataframe                
+asymmetry_df = pd.DataFrame(data = {"Subject_ID": [ele for ele in Subject_id for i in range(n_eye_status*n_freq_bands*n_ROI)],
+                                     "Group_status": [ele for ele in Group_status for i in range(n_eye_status*n_freq_bands*n_ROI)],
+                                     "Eye_status": [ele for ele in eye_status for i in range(n_freq_bands*n_ROI)]*(n_subjects),
+                                     "Freq_band": [ele for ele in freq_bands_name for i in range(n_ROI)]*(n_subjects*n_eye_status),
+                                     "ROI": list(ROI)*(n_subjects*n_eye_status*n_freq_bands),
+                                     "Asymmetry_score": asymmetry.flatten(order="C")
+                                     })
+# Flatten with order=C means that it first goes through last axis,
+# then repeat along 2nd last axis, and then repeat along 3rd last etc
+
+# Asymmetry numpy to pandas conversion check
+random_point=321
+asymmetry_df.iloc[random_point]
+
+i = np.where(np.unique(power_df["Subject_ID"]) == asymmetry_df.iloc[random_point]["Subject_ID"])[0]
+e = np.where(np.unique(power_df["Eye_status"]) == asymmetry_df.iloc[random_point]["Eye_status"])[0]
+f = np.where(np.array(list(Freq_Bands.keys())) == asymmetry_df.iloc[random_point]["Freq_band"])[0]
+r = np.where(np.array(ROI) == asymmetry_df.iloc[random_point]["ROI"])[0]
+
+assert asymmetry[i,e,f,r] == asymmetry_df.iloc[random_point]["Asymmetry_score"]
+
+# Save the dataframe
+asymmetry_df.to_pickle(os.path.join(Feature_savepath,"asymmetry_df.pkl"))
+
+# %% Using FOOOF
+# Peak alpha frequency (PAF) and 1/f exponent (OOF)
+# Using the FOOOF algorithm (Fitting oscillations and one over f)
+# Published by Donoghue et al, 2020 in Nature Neuroscience
+# To start, FOOOF takes the freqs and power spectra as input
+n_channels = final_epochs[0].info["nchan"]
+ch_names = final_epochs[0].info["ch_names"]
+sfreq = final_epochs[0].info["sfreq"]
+Freq_Bands = {"delta": [1.25, 4.0],
+              "theta": [4.0, 8.0],
+              "alpha": [8.0, 13.0],
+              "beta": [13.0, 30.0],
+              "gamma": [30.0, 49.0]}
+n_freq_bands = len(Freq_Bands)
+
+# From visual inspection there seems to be problem if PSD is too steep at the start
+# To overcome this problem, we try multiple start freq
+OOF_r2_thres = 0.95 # a high threshold as we allow for overfitting
+PAF_r2_thres = 0.90 # a more lenient threshold for PAF, as it is usually still captured even if fit for 1/f is not perfect
+freq_start_it_range = [2,3,4,5,6]
+freq_end = 40 # Stop freq at 40Hz to not be influenced by the Notch Filter
+
+eye_status = list(final_epochs[0].event_id.keys())
+n_eye_status = len(eye_status)
+
+PAF_data = np.zeros((n_subjects,n_eye_status,n_channels,3)) # CF, power, band_width
+OOF_data = np.zeros((n_subjects,n_eye_status,n_channels,2)) # offset and exponent
+
+def FOOOF_estimation(i):
+    PAF_data0 = np.zeros((n_eye_status,n_channels,3)) # CF, power, band_width
+    OOF_data0 = np.zeros((n_eye_status,n_channels,2)) # offset and exponent
+    # Get Eye status
+    eye_idx = [final_epochs[i].events[:,2] == 1, final_epochs[i].events[:,2] == 2] # EC and EO
+    # Calculate the power spectral density
+    psd, freqs = psd_multitaper(final_epochs[i], fmin = 1, fmax = 50) # output (epochs, channels, freqs)
+    # Retrieve psds for the 2 conditions and calculate mean across epochs
+    psds = []
+    for e in range(n_eye_status):
+        # Get the epochs for specific eye condition
+        temp_psd = psd[eye_idx[e],:,:]
+        # Calculate the mean across epochs
+        temp_psd = np.mean(temp_psd, axis=0)
+        # Save
+        psds.append(temp_psd)
+    # Try multiple start freq
+    PAF_data00 = np.zeros((n_eye_status,n_channels,len(freq_start_it_range),3)) # CF, power, band_width
+    OOF_data00 = np.zeros((n_eye_status,n_channels,len(freq_start_it_range),2)) # offset and exponent
+    r2s00 = np.zeros((n_eye_status,n_channels,len(freq_start_it_range)))
+    for e in range(n_eye_status):
+        psds_avg = psds[e]
+        for f in range(len(freq_start_it_range)):
+            # Initiate FOOOF group for analysis of multiple PSD
+            fg = fooof.FOOOFGroup()
+            # Set the frequency range to fit the model
+            freq_range = [freq_start_it_range[f], freq_end] # variable freq start to 49Hz
+            # Fit to each source PSD separately, but in parallel
+            fg.fit(freqs,psds_avg,freq_range,n_jobs=1)
+            # Extract aperiodic parameters
+            aps = fg.get_params('aperiodic_params')
+            # Extract peak parameters
+            peaks = fg.get_params('peak_params')
+            # Extract goodness-of-fit metrics
+            r2s = fg.get_params('r_squared')
+            # Save OOF and r2s
+            OOF_data00[e,:,f] = aps
+            r2s00[e,:,f] = r2s
+            # Find the alpha peak with greatest power
+            for c in range(n_channels):
+                peaks0 = peaks[peaks[:,3] == c]
+                # Subset the peaks within the alpha band
+                in_alpha_band = (peaks0[:,0] >= Freq_Bands["alpha"][0]) & (peaks0[:,0] <= Freq_Bands["alpha"][1])
+                if sum(in_alpha_band) > 0: # Any alpha peaks?
+                    # Choose the peak with the highest power
+                    max_alpha_idx = np.argmax(peaks0[in_alpha_band,1])
+                    # Save results
+                    PAF_data00[e,c,f] = peaks0[in_alpha_band][max_alpha_idx,:-1]
+                else:
+                    # No alpha peaks
+                    PAF_data00[e,c,f] = [np.nan]*3
+    # Check criterias
+    good_fits_OOF = (r2s00 > OOF_r2_thres) & (OOF_data00[:,:,:,1] > 0) # r^2 > 0.95 and exponent > 0
+    good_fits_PAF = (r2s00 > PAF_r2_thres) & (np.isfinite(PAF_data00[:,:,:,0])) # r^2 > 0.90 and detected peak in alpha band
+    # Save the data or NaN if criterias were not fulfilled
+    for e in range(n_eye_status):
+        for c in range(n_channels):
+            if sum(good_fits_OOF[e,c]) == 0: # no good OOF estimation
+                OOF_data0[e,c] = [np.nan]*2
+            else: # Save OOF associated with greatest r^2 that fulfilled criterias
+                OOF_data0[e,c] = OOF_data00[e,c,np.argmax(r2s00[e,c,good_fits_OOF[e,c]])]
+            if sum(good_fits_PAF[e,c]) == 0: # no good PAF estimation
+                PAF_data0[e,c] = [np.nan]*3
+            else: # Save PAF associated with greatest r^2 that fulfilled criterias
+                PAF_data0[e,c] = PAF_data00[e,c,np.argmax(r2s00[e,c,good_fits_PAF[e,c]])]
+    print("Finished {} out of {} subjects".format(i+1,n_subjects))
+    return i, PAF_data0, OOF_data0
+
+# Get current time
+c_time1 = time.localtime()
+c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
+print(c_time1)
+
+with concurrent.futures.ProcessPoolExecutor() as executor:
+    for i, PAF_result, OOF_result in executor.map(FOOOF_estimation, range(n_subjects)): # Function and arguments
+        PAF_data[i] = PAF_result
+        OOF_data[i] = OOF_result
+
+# Save data
+with open(Feature_savepath+"PAF_data_arr.pkl", "wb") as file:
+    pickle.dump(PAF_data, file)
+with open(Feature_savepath+"OOF_data_arr.pkl", "wb") as file:
+    pickle.dump(OOF_data, file)
+
+# Get current time
+c_time2 = time.localtime()
+c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
+print("Started", c_time1, "\nFinished",c_time2)
+
+# Convert to Pandas dataframe (only keep mean parameter for PAF)
+# The dimensions will each be a column with numbers and the last column will be the actual values
+ori = PAF_data[:,:,:,0]
+arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, ori.shape), indexing="ij"))) + [ori.ravel()])
+PAF_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Channel", "Value"])
+# Change from numerical coding to actual values
+
+index_values = [Subject_id,eye_status,ch_names]
+temp_df = PAF_data_df.copy() # make temp df to not sequential overwrite what is changed
+for col in range(len(index_values)):
+    col_name = PAF_data_df.columns[col]
+    for shape in range(ori.shape[col]):
+        temp_df.loc[PAF_data_df.iloc[:,col] == shape,col_name]\
+        = index_values[col][shape]
+PAF_data_df = temp_df # replace original df 
+
+# Add group status
+Group_status = np.array(["CTRL"]*len(PAF_data_df["Subject_ID"]))
+Group_status[np.array([i in cases for i in PAF_data_df["Subject_ID"]])] = "PTSD"
+# Add to dataframe
+PAF_data_df.insert(3, "Group_status", Group_status)
+
+# Global peak alpha
+PAF_data_df_global = PAF_data_df.groupby(["Subject_ID", "Group_status", "Eye_status"]).mean().reset_index() # by default pandas mean skip nan
+# Add dummy variable for re-using plot code
+dummy_variable = ["Global Peak Alpha Frequency"]*PAF_data_df_global.shape[0]
+PAF_data_df_global.insert(3, "Measurement", dummy_variable )
+
+# Regional peak alpha
+# A variable that codes the channels based on A/P localization is also made
+Frontal_chs = ["Fp1", "Fpz", "Fp2", "AFz", "Fz", "F3", "F4", "F7", "F8"]
+Central_chs = ["Cz", "C3", "C4", "T7", "T8", "FT7", "FC3", "FCz", "FC4", "FT8", "TP7", "CP3", "CPz", "CP4", "TP8"]
+Posterior_chs = ["Pz", "P3", "P4", "P7", "P8", "POz", "O1", "O2", "Oz"]
+
+Brain_region = np.array(ch_names, dtype = "<U9")
+Brain_region[np.array([i in Frontal_chs for i in ch_names])] = "Frontal"
+Brain_region[np.array([i in Central_chs for i in ch_names])] = "Central"
+Brain_region[np.array([i in Posterior_chs for i in ch_names])] = "Posterior"
+
+PAF_data_df.insert(4, "Brain_region", list(Brain_region)*int(PAF_data_df.shape[0]/len(Brain_region)))
+
+# Save the dataframes
+PAF_data_df.to_pickle(os.path.join(Feature_savepath,"PAF_data_FOOOF_df.pkl"))
+PAF_data_df_global.to_pickle(os.path.join(Feature_savepath,"PAF_data_FOOOF_global_df.pkl"))
+
+# Convert to Pandas dataframe (only keep exponent parameter for OOF)
+# The dimensions will each be a column with numbers and the last column will be the actual values
+ori = OOF_data[:,:,:,1]
+arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, ori.shape), indexing="ij"))) + [ori.ravel()])
+PAF_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Channel", "Value"])
+# Change from numerical coding to actual values
+
+index_values = [Subject_id,eye_status,ch_names]
+temp_df = PAF_data_df.copy() # make temp df to not sequential overwrite what is changed
+for col in range(len(index_values)):
+    col_name = PAF_data_df.columns[col]
+    for shape in range(ori.shape[col]):
+        temp_df.loc[PAF_data_df.iloc[:,col] == shape,col_name]\
+        = index_values[col][shape]
+OOF_data_df = temp_df # replace original df 
+
+# Add group status
+Group_status = np.array(["CTRL"]*len(OOF_data_df["Subject_ID"]))
+Group_status[np.array([i in cases for i in OOF_data_df["Subject_ID"]])] = "PTSD"
+# Add to dataframe
+OOF_data_df.insert(3, "Group_status", Group_status)
+
+# Regional OOF
+OOF_data_df.insert(4, "Brain_region", list(Brain_region)*int(PAF_data_df.shape[0]/len(Brain_region)))
+
+# Save the dataframes
+OOF_data_df.to_pickle(os.path.join(Feature_savepath,"OOF_data_FOOOF_df.pkl"))
+
+# %% Microstate analysis
+# The function takes the data as a numpy array (n_t, n_ch)
+# The data is already re-referenced to common average
+# Variables for the clustering function are extracted
+sfreq = final_epochs[0].info["sfreq"]
+eye_status = list(final_epochs[0].event_id.keys())
+n_eye_status = len(eye_status)
+ch_names = final_epochs[0].info["ch_names"]
+n_channels = len(ch_names)
+locs = np.zeros((n_channels,2)) # xy coordinates of the electrodes
+for c in range(n_channels):
+    locs[c] = final_epochs[0].info["chs"][c]["loc"][0:2]
+
+# The epochs are transformed to numpy arrays
+micro_data = []
+EC_micro_data = []
+EO_micro_data = []
+for i in range(n_subjects):
+    # Transform data to correct shape
+    micro_data.append(final_epochs[i].get_data()) # get data
+    arr_shape = micro_data[i].shape # get shape
+    micro_data[i] = micro_data[i].swapaxes(1,2) # swap ch and time axis
+    micro_data[i] = micro_data[i].reshape(arr_shape[0]*arr_shape[2],arr_shape[1]) # reshape by combining epochs and times
+    # Get indices for eyes open and closed
+    EC_index = final_epochs[i].events[:,2] == 1
+    EO_index = final_epochs[i].events[:,2] == 2
+    # Repeat with 4s * sample frequency to correct for concatenation of times and epochs
+    EC_index = np.repeat(EC_index,4*sfreq)
+    EO_index = np.repeat(EO_index,4*sfreq)
+    # Save data where it is divided into eye status
+    EC_micro_data.append(micro_data[i][EC_index])
+    EO_micro_data.append(micro_data[i][EO_index])
+
+# Global explained variance and Cross-validation criterion is used to determine number of microstates
+# First all data is concatenated to find the optimal number of maps for all data
+micro_data_all = np.vstack(micro_data)
+
+# Determine the number of clusters
+# I use a slightly modified kmeans function which returns the cv_min
+global_gev = []
+cv_criterion = []
+for n_maps in range(2,7):
+    maps, L, gfp_peaks, gev, cv_min = kmeans_return_all(micro_data_all, n_maps)
+    global_gev.append(np.sum(gev))
+    cv_criterion.append(cv_min)
+# Save run results
+cluster_results = np.array([global_gev,cv_criterion])
+np.save("Microstate_n_cluster_test_results.npy", cluster_results) # (gev/cv_crit, n_maps from 2 to 6)
+
+#cluster_results = np.load("Microstate_n_cluster_test_results.npy")
+#global_gev = cluster_results[0,:]
+#cv_criterion = cluster_results[1,:]
+
+# Evaluate best n_maps
+plt.figure()
+plt.plot(np.linspace(2,6,len(cv_criterion)),(cv_criterion/np.sum(cv_criterion)), label="CV Criterion")
+plt.plot(np.linspace(2,6,len(cv_criterion)),(global_gev/np.sum(global_gev)), label="GEV")
+plt.legend()
+plt.ylabel("Normalized to total")
+# The lower CV the better.
+# But the higher GEV the better.
+# Based on the plots and the recommendation by vong Wegner & Laufs 2018
+# we used 4 microstates
+
+# In order to compare between groups, I fix the microstates by clustering on data from both groups
+# Due to instability of maps when running multiple times, I increased n_maps from 4 to 6
+n_maps = 4
+mode = ["aahc", "kmeans", "kmedoids", "pca", "ica"][1]
+
+# K-means is stochastic, thus I run it multiple times in order to find the maps with highest GEV
+# Each K-means is run 5 times and best map is chosen. But I do this 10 times more, so in total 50 times!
+n_run = 10
+# Pre-allocate memory
+microstate_cluster_results = []
+
+# Parallel processing can only be implemented by ensuring different seeds
+# Otherwise the iteration would be the same.
+# However the k-means already use parallel processing so making outer loop with
+# concurrent processes make it use too many processors
+# Get current time
+c_time1 = time.localtime()
+c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
+print(c_time1)
+
+for r in range(n_run):
+    maps = [0]*2
+    m_labels = [0]*2
+    gfp_peaks = [0]*2
+    gev = [0]*2
+    # Eyes closed
+    counter = 0
+    maps_, x_, gfp_peaks_, gev_ = clustering(
+        np.vstack(np.array(EC_micro_data)), sfreq, ch_names, locs, mode, n_maps, doplot=False) # doplot=True is bugged
+    maps[counter] = maps_
+    m_labels[counter] = x_
+    gfp_peaks[counter] = gfp_peaks_
+    gev[counter] = gev_
+    counter += 1
+    # Eyes open
+    maps_, x_, gfp_peaks_, gev_ = clustering(
+        np.vstack(np.array(EO_micro_data)), sfreq, ch_names, locs, mode, n_maps, doplot=False) # doplot=True is bugged
+    maps[counter] = maps_
+    m_labels[counter] = x_
+    gfp_peaks[counter] = gfp_peaks_
+    gev[counter] = gev_
+    counter += 1
+    
+    microstate_cluster_results.append([maps, m_labels, gfp_peaks, gev])
+    print("Finished {} out of {}".format(r+1, n_run))
+
+# Get current time
+c_time2 = time.localtime()
+c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
+print("Started", c_time1, "\nFinished",c_time2)
+
+# Save the results
+with open(Feature_savepath+"Microstate_4_maps_10x5_k_means_results.pkl", "wb") as file:
+    pickle.dump(microstate_cluster_results, file)
+
+# # Load
+# with open(Feature_savepath+"Microstate_4_maps_10x5_k_means_results.pkl", "rb") as file:
+#     microstate_cluster_results = pickle.load(file)
+
+# Find the best maps (Highest GEV across all the K-means clusters)
+EC_total_gevs = np.sum(np.vstack(np.array(microstate_cluster_results)[:,3,0]), axis=1) # (runs, maps/labels/gfp/gev, ec/eo)
+EO_total_gevs = np.sum(np.vstack(np.array(microstate_cluster_results)[:,3,1]), axis=1)
+Best_EC_idx = np.argmax(EC_total_gevs)
+Best_EO_idx = np.argmax(EO_total_gevs)
+# Update the variables for the best maps
+maps = [microstate_cluster_results[Best_EC_idx][0][0],microstate_cluster_results[Best_EO_idx][0][1]]
+m_labels = [microstate_cluster_results[Best_EC_idx][1][0],microstate_cluster_results[Best_EO_idx][1][1]]
+gfp_peaks = [microstate_cluster_results[Best_EC_idx][2][0],microstate_cluster_results[Best_EO_idx][2][1]]
+gev = [microstate_cluster_results[Best_EC_idx][3][0],microstate_cluster_results[Best_EO_idx][3][1]]
+
+# Plot the maps
+plt.style.use('default')
+labels = ["EC", "EO"]
+for i in range(len(labels)):    
+    fig, axarr = plt.subplots(1, n_maps, figsize=(20,5))
+    fig.patch.set_facecolor('white')
+    for imap in range(n_maps):
+        mne.viz.plot_topomap(maps[i][imap,:], pos = final_epochs[0].info, axes = axarr[imap]) # plot
+        axarr[imap].set_title("GEV: {:.2f}".format(gev[i][imap]), fontsize=16, fontweight="bold") # title
+    fig.suptitle("Microstates: {}".format(labels[i]), fontsize=20, fontweight="bold")
+
+# Manual re-order the maps
+# Due the random initiation of K-means this have to be modified every time clusters are made!
+# Assign map labels (e.g. 0, 2, 1, 3)
+order = [0]*2
+order[0] = [3,0,1,2] # EC
+order[1] = [3,1,0,2] # EO
+for i in range(len(order)):
+    maps[i] = maps[i][order[i],:] # re-order maps
+    gev[i] = gev[i][order[i]] # re-order GEV
+    # Make directory to find and replace map labels
+    dic0 = {value:key for key, value in enumerate(order[i])}
+    m_labels[i][:] = [dic0.get(n, n) for n in m_labels[i]] # re-order labels
+
+# The maps seems to be correlated both negatively and positively (see spatial correlation plots)
+# Thus the sign of the map does not really reflect which areas are positive or negative (absolute)
+# But more which areas are different during each state (relatively)
+# I can therefore change the sign of the map for the visualizaiton
+sign_swap = [[1,-1,1,1],[1,1,1,-1]]
+for i in range(len(order)):
+    for m in range(n_maps):
+        maps[i][m] *= sign_swap[i][m]
+
+# Plot the maps and save
+save_path = "/home/glia/Analysis/Figures/Microstates/"
+labels = ["EC", "EO"]
+for i in range(len(labels)):    
+    fig, axarr = plt.subplots(1, n_maps, figsize=(20,5))
+    fig.patch.set_facecolor('white')
+    for imap in range(n_maps):
+        mne.viz.plot_topomap(maps[i][imap,:], pos = final_epochs[0].info, axes = axarr[imap]) # plot
+        axarr[imap].set_title("GEV: {:.2f}".format(gev[i][imap]), fontsize=16, fontweight="bold") # title
+    fig.suptitle("Microstates: {} - Total GEV: {:.2f}".format(labels[i],sum(gev[i])), fontsize=20, fontweight="bold")
+    # Save the figure
+    fig.savefig(os.path.join(save_path,str("Microstates_{}".format(labels[i]) + ".png")))
+
+# Calculate spatial correlation between maps and actual data points (topography)
+# The sign of the map is changed so the correlation is positive
+# By default the code looks for highest spatial correlation (regardless of sign)
+# Thus depending on random initiation point the map might be opposite
+plt.style.use('ggplot')
+def spatial_correlation(data, maps):
+    n_t = data.shape[0]
+    n_ch = data.shape[1]
+    data = data - data.mean(axis=1, keepdims=True)
+
+    # GFP peaks
+    gfp = np.std(data, axis=1)
+    gfp_peaks = locmax(gfp)
+    gfp_values = gfp[gfp_peaks]
+    gfp2 = np.sum(gfp_values**2) # normalizing constant in GEV
+    n_gfp = gfp_peaks.shape[0]
+
+    # Spatial correlation
+    C = np.dot(data, maps.T)
+    C /= (n_ch*np.outer(gfp, np.std(maps, axis=1)))
+    L = np.argmax(C**2, axis=1) # C is squared here which means the maps do no retain information about the sign of the correlation
+    
+    return C
+
+C_EC = spatial_correlation(np.vstack(np.array(EC_micro_data)), maps[0])
+C_EO = spatial_correlation(np.vstack(np.array(EO_micro_data)), maps[1])
+C = [C_EC, C_EO]
+
+# Plot the distribution of spatial correlation for each label and each map
+labels = ["EC", "EO"]
+for i in range(len(labels)):
+    fig, axarr = plt.subplots(n_maps, n_maps, figsize=(16,16))
+    for Lmap in range(n_maps):
+        for Mmap in range(n_maps):
+            sns.distplot(C[i][m_labels[i] == Lmap,Mmap], ax = axarr[Lmap,Mmap])
+            axarr[Lmap,Mmap].set_xlabel("Spatial correlation")
+    plt.suptitle("Distribution of spatial correlation_{}".format(labels[i]), fontsize=20, fontweight="bold")
+    # Add common x and y axis labels by making one big axis
+    fig.add_subplot(111, frameon=False)
+    plt.tick_params(labelcolor="none", top="off", bottom="off", left="off", right="off") # hide tick labels and ticks
+    plt.grid(False) # remove global grid
+    plt.xlabel("Microstate number", labelpad=20)
+    plt.ylabel("Label number", labelpad=10)
+    fig.savefig(os.path.join(save_path,str("Microstates_Spatial_Correlation_Label_State_{}".format(labels[i]) + ".png")))
+
+# Plot the distribution of spatial correlation for all data and each map
+labels = ["EC", "EO"]
+for i in range(len(labels)):
+    fig, axarr = plt.subplots(1,n_maps, figsize=(20,5))
+    for imap in range(n_maps):
+        sns.distplot(C[i][:,imap], ax = axarr[imap])
+        plt.xlabel("Spatial correlation")
+    plt.suptitle("Distribution of spatial correlation", fontsize=20, fontweight="bold")
+    # Add common x and y axis labels by making one big axis
+    fig.add_subplot(111, frameon=False)
+    plt.tick_params(labelcolor="none", top="off", bottom="off", left="off", right="off") # hide tick labels and ticks
+    plt.grid(False) # remove global grid
+    plt.xlabel("Microstate number", labelpad=20)
+    plt.ylabel("Label number")
+
+# Prepare for calculation of transition matrix
+# I modified the function, so it takes the list argument gap_index
+# gap_index should have the indices right before gaps in data
+
+# Gaps: Between dropped epochs, trials (eo/ec) and subjects
+# The between subjects gaps is removed by dividing the data into subjects
+n_trials = 5
+n_epoch_length = final_epochs[0].get_data().shape[2]
+
+micro_labels = []
+micro_subject_EC_idx = [0]
+micro_subject_EO_idx = [0]
+gaps_idx = []
+gaps_trials_idx = []
+for i in range(n_subjects):
+    # Get indices for subject
+    micro_subject_EC_idx.append(micro_subject_EC_idx[i]+EC_micro_data[i].shape[0])
+    temp_EC = m_labels[0][micro_subject_EC_idx[i]:micro_subject_EC_idx[i+1]]
+    # Get labels for subject i EO
+    micro_subject_EO_idx.append(micro_subject_EO_idx[i]+EO_micro_data[i].shape[0])
+    temp_EO = m_labels[1][micro_subject_EO_idx[i]:micro_subject_EO_idx[i+1]]
+    # Save
+    micro_labels.append([temp_EC,temp_EO]) # (subject, eye)
+    
+    # Get indices with gaps
+    # Dropped epochs are first considered
+    # Each epoch last 4s, which correspond to 2000 samples and a trial is 15 epochs - dropped epochs
+    # Get epochs for each condition
+    EC_drop_epochs = Drop_epochs_df.iloc[i,1:][Drop_epochs_df.iloc[i,1:] <= 75].to_numpy()
+    EO_drop_epochs = Drop_epochs_df.iloc[i,1:][(Drop_epochs_df.iloc[i,1:] >= 75)&
+                                            (Drop_epochs_df.iloc[i,1:] <= 150)].to_numpy()
+    # Get indices for the epochs for EC that were dropped and correct for changing index due to drop
+    EC_drop_epochs_gaps_idx = []
+    counter = 0
+    for d in range(len(EC_drop_epochs)):
+        drop_epoch_number = EC_drop_epochs[d]
+        Drop_epoch_idx = (drop_epoch_number-counter)*n_epoch_length # counter subtracted as the drop index is before dropped
+        EC_drop_epochs_gaps_idx.append(Drop_epoch_idx-1) # -1 for point just before gap
+        counter += 1
+    # Negative index might occur if the first epochs were removed. This index is not needed for transition matrix
+    if len(EC_drop_epochs_gaps_idx) > 0:
+        for d in range(len(EC_drop_epochs_gaps_idx)): # check all, e.g. if epoch 0,1,2,3 are dropped then all should be caught
+            if EC_drop_epochs_gaps_idx[0] == -1:
+                EC_drop_epochs_gaps_idx = EC_drop_epochs_gaps_idx[1:len(EC_drop_epochs)]
+    
+    # Get indices for the epochs for EO that were dropped and correct for changing index due to drop
+    EO_drop_epochs_gaps_idx = []
+    counter = 0
+    for d in range(len(EO_drop_epochs)):
+        drop_epoch_number = EO_drop_epochs[d]-75
+        Drop_epoch_idx = (drop_epoch_number-counter)*n_epoch_length # counter subtracted as the drop index is before dropped
+        EO_drop_epochs_gaps_idx.append(Drop_epoch_idx-1) # -1 for point just before gap
+        counter += 1
+    # Negative index might occur if the first epoch was removed. This index is not needed for transition matrix
+    if len(EO_drop_epochs_gaps_idx) > 0:
+        for d in range(len(EO_drop_epochs_gaps_idx)): # check all, e.g. if epoch 0,1,2,3 are dropped then all should be caught
+            if EO_drop_epochs_gaps_idx[0] == -1:
+                EO_drop_epochs_gaps_idx = EO_drop_epochs_gaps_idx[1:len(EO_drop_epochs)]
+    
+    # Gaps between trials
+    Trial_indices = [0, 15, 30, 45, 60, 75] # all the indices for start and end of the 5 trials
+    EC_trial_gaps_idx = []
+    EO_trial_gaps_idx = []
+    counter_EC = 0
+    counter_EO = 0
+    for t in range(len(Trial_indices)-2): # -2 as start and end is not used in transition matrix
+        temp_drop = EC_drop_epochs[(EC_drop_epochs >= Trial_indices[t])&
+                            (EC_drop_epochs < Trial_indices[t+1])]
+        # Correct the trial id for any potential drops within that trial
+        counter_EC += len(temp_drop)
+        trial_idx_corrected_for_drops = 15*(t+1)-counter_EC
+        EC_trial_gaps_idx.append((trial_idx_corrected_for_drops*n_epoch_length)-1) # multiply id with length of epoch and subtract 1
+        
+        temp_drop = EO_drop_epochs[(EO_drop_epochs >= Trial_indices[t]+75)&
+                            (EO_drop_epochs < Trial_indices[t+1]+75)]
+        # Correct the trial id for any potential drops within that trial
+        counter_EO += len(temp_drop)
+        trial_idx_corrected_for_drops = 15*(t+1)-counter_EO
+        EO_trial_gaps_idx.append((trial_idx_corrected_for_drops*n_epoch_length)-1) # multiply id with length of epoch and subtract 1
+    
+    # Concatenate all drop indices
+    gaps_idx.append([np.unique(np.sort(EC_drop_epochs_gaps_idx+EC_trial_gaps_idx)),
+                    np.unique(np.sort(EO_drop_epochs_gaps_idx+EO_trial_gaps_idx))])
+    # Make on with trial gaps only for use in LRTC analysis
+    gaps_trials_idx.append([EC_trial_gaps_idx,EO_trial_gaps_idx])
+
+# Save the gap idx files
+np.save("Gaps_idx.npy",np.array(gaps_idx))
+np.save("Gaps_trials_idx.npy",np.array(gaps_trials_idx))
+
+# %% Calculate microstate features
+# Symbol distribution (also called ratio of time covered RTT)
+# Transition matrix
+# Shannon entropy
+EC_p_hat = p_empirical(m_labels[0], n_maps)
+EO_p_hat = p_empirical(m_labels[1], n_maps)
+# Sanity check: Overall between EC and EO
+
+microstate_time_data = np.zeros((n_subjects,n_eye_status,n_maps))
+microstate_transition_data = np.zeros((n_subjects,n_eye_status,n_maps,n_maps))
+microstate_entropy_data = np.zeros((n_subjects,n_eye_status))
+for i in range(n_subjects):
+    # Calculate ratio of time covered
+    temp_EC_p_hat = p_empirical(micro_labels[i][0], n_maps)
+    temp_EO_p_hat = p_empirical(micro_labels[i][1], n_maps)
+    # Calculate transition matrix
+    temp_EC_T_hat = T_empirical(micro_labels[i][0], n_maps, gaps_idx[i][0])
+    temp_EO_T_hat = T_empirical(micro_labels[i][1], n_maps, gaps_idx[i][1])
+    # Calculate Shannon entropy
+    temp_EC_h_hat = H_1(micro_labels[i][0], n_maps)
+    temp_EO_h_hat = H_1(micro_labels[i][1], n_maps)
+    
+    # Save the data
+    microstate_time_data[i,0,:] = temp_EC_p_hat
+    microstate_time_data[i,1,:] = temp_EO_p_hat
+    microstate_transition_data[i,0,:,:] = temp_EC_T_hat
+    microstate_transition_data[i,1,:,:] = temp_EO_T_hat
+    microstate_entropy_data[i,0] = temp_EC_h_hat/max_entropy(n_maps) # ratio of max entropy
+    microstate_entropy_data[i,1] = temp_EO_h_hat/max_entropy(n_maps) # ratio of max entropy
+
+# Save transition data
+np.save(Feature_savepath+"microstate_transition_data.npy", microstate_transition_data)
+# Convert transition data to dataframe for further processing with other features
+# Transition matrix should be read as probability of row to column
+microstate_transition_data_arr =\
+     microstate_transition_data.reshape((n_subjects,n_eye_status,n_maps*n_maps)) # flatten 4 x 4 matrix to 1D
+transition_info = ["M1->M1", "M1->M2", "M1->M3", "M1->M4",
+                   "M2->M1", "M2->M2", "M2->M3", "M2->M4",
+                   "M3->M1", "M3->M2", "M3->M3", "M3->M4",
+                   "M4->M1", "M4->M2", "M4->M3", "M4->M4"]
+
+arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, microstate_transition_data_arr.shape), indexing="ij"))) + [microstate_transition_data_arr.ravel()])
+microstate_transition_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Transition", "Value"])
+# Change from numerical coding to actual values
+eye_status = list(final_epochs[0].event_id.keys())
+
+index_values = [Subject_id,eye_status,transition_info]
+for col in range(len(index_values)):
+    col_name = microstate_transition_data_df.columns[col]
+    for shape in reversed(range(microstate_transition_data_arr.shape[col])): # notice this is the shape of original numpy array. Not shape of DF
+        microstate_transition_data_df.loc[microstate_transition_data_df.iloc[:,col] == shape,col_name]\
+        = index_values[col][shape]
+
+# Add group status
+Group_status = np.array(["CTRL"]*len(microstate_transition_data_df["Subject_ID"]))
+Group_status[np.array([i in cases for i in microstate_transition_data_df["Subject_ID"]])] = "PTSD"
+# Add to dataframe
+microstate_transition_data_df.insert(2, "Group_status", Group_status)
+
+# Save df
+microstate_transition_data_df.to_pickle(os.path.join(Feature_savepath,"microstate_transition_data_df.pkl"))
+
+# Convert time covered data to Pandas dataframe
+# The dimensions will each be a column with numbers and the last column will be the actual values
+arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, microstate_time_data.shape), indexing="ij"))) + [microstate_time_data.ravel()])
+microstate_time_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Microstate", "Value"])
+# Change from numerical coding to actual values
+eye_status = list(final_epochs[0].event_id.keys())
+microstates = [1,2,3,4]
+
+index_values = [Subject_id,eye_status,microstates]
+for col in range(len(index_values)):
+    col_name = microstate_time_df.columns[col]
+    for shape in reversed(range(microstate_time_data.shape[col])): # notice this is the shape of original numpy array. Not shape of DF
+        microstate_time_df.loc[microstate_time_df.iloc[:,col] == shape,col_name]\
+        = index_values[col][shape]
+# Reversed in inner loop is used to avoid sequencial data being overwritten.
+# E.g. if 0 is renamed to 1, then the next loop all 1's will be renamed to 2
+
+# Add group status
+Group_status = np.array(["CTRL"]*len(microstate_time_df["Subject_ID"]))
+Group_status[np.array([i in cases for i in microstate_time_df["Subject_ID"]])] = "PTSD"
+# Add to dataframe
+microstate_time_df.insert(2, "Group_status", Group_status)
+
+# Save df
+microstate_time_df.to_pickle(os.path.join(Feature_savepath,"microstate_time_df.pkl"))
+
+# Transition data - mean
+# Get index for groups
+PTSD_idx = np.array([i in cases for i in Subject_id])
+CTRL_idx = np.array([not i in cases for i in Subject_id])
+n_groups = 2
+
+microstate_transition_data_mean = np.zeros((n_groups,n_eye_status,n_maps,n_maps))
+microstate_transition_data_mean[0,:,:,:] = np.mean(microstate_transition_data[PTSD_idx,:,:,:], axis=0)
+microstate_transition_data_mean[1,:,:,:] = np.mean(microstate_transition_data[CTRL_idx,:,:,:], axis=0)
+
+# Convert entropy data to Pandas dataframe
+# The dimensions will each be a column with numbers and the last column will be the actual values
+arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, microstate_entropy_data.shape), indexing="ij"))) + [microstate_entropy_data.ravel()])
+microstate_entropy_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Value"])
+# Change from numerical coding to actual values
+eye_status = list(final_epochs[0].event_id.keys())
+
+index_values = [Subject_id,eye_status]
+for col in range(len(index_values)):
+    col_name = microstate_entropy_df.columns[col]
+    for shape in reversed(range(microstate_entropy_data.shape[col])): # notice this is the shape of original numpy array. Not shape of DF
+        microstate_entropy_df.loc[microstate_entropy_df.iloc[:,col] == shape,col_name]\
+        = index_values[col][shape]
+# Reversed in inner loop is used to avoid sequencial data being overwritten.
+# E.g. if 0 is renamed to 1, then the next loop all 1's will be renamed to 2
+
+# Add group status
+Group_status = np.array(["CTRL"]*len(microstate_entropy_df["Subject_ID"]))
+Group_status[np.array([i in cases for i in microstate_entropy_df["Subject_ID"]])] = "PTSD"
+# Add to dataframe
+microstate_entropy_df.insert(2, "Group_status", Group_status)
+# Add dummy variable for re-using plot code
+dummy_variable = ["Entropy"]*len(Group_status)
+microstate_entropy_df.insert(3, "Measurement", dummy_variable)
+
+# Save df
+microstate_entropy_df.to_pickle(os.path.join(Feature_savepath,"microstate_entropy_df.pkl"))
+
+# %% Long-range temporal correlations (LRTC)
+"""
+See Hardstone et al, 2012
+Hurst exponent estimation steps:
+    1. Preprocess
+    2. Band-pass filter for frequency band of interest
+    3. Hilbert transform to obtain amplitude envelope
+    4. Perform DFA
+        4.1 Compute cumulative sum of time series to create signal profile
+        4.2 Define set of window sizes (see below)
+        4.3 Remove the linear trend using least-squares for each window
+        4.4 Calculate standard deviation for each window and take the mean
+        4.5 Plot fluctuation function (Standard deviation) as function
+            for all window sizes, on double logarithmic scale
+        4.6 The DFA exponent alpha correspond to Hurst exponent
+            f(L) = sd = L^alpha (with alpha as linear coefficient in log plot)
+
+If 0 < alpha < 0.5: The process exhibits anti-correlations
+If 0.5 < alpha < 1: The process exhibits positive correlations
+If alpha = 0.5: The process is indistinguishable from a random process
+If 1.0 < alpha < 2.0: The process is non-stationary. H = alpha - 1
+
+Window sizes should be equally spaced on a logarithmic scale
+Sizes should be at least 4 samples and up to 10% of total signal length
+Filters can influence neighboring samples, thus filters should be tested
+on white noise to estimate window sizes that are unaffected by filters
+
+filter_length=str(2*1/fmin)+"s" # cannot be used with default transition bandwidth
+
+"""
+# From simulations with white noise I determined window size thresholds for the 5 frequency bands:
+thresholds = [7,7,7,6.5,6.5]
+# And their corresponding log step sizes
+with open("LRTC_log_win_sizes.pkl", "rb") as filehandle:
+    log_win_sizes = pickle.load(filehandle)
+
+# Variables for the the different conditions
+# Sampling frequency
+sfreq = final_epochs[0].info["sfreq"]
+# Channels
+ch_names = final_epochs[0].info["ch_names"]
+n_channels = len(ch_names)
+# Frequency
+Freq_Bands = {"delta": [1.25, 4.0],
+              "theta": [4.0, 8.0],
+              "alpha": [8.0, 13.0],
+              "beta": [13.0, 30.0],
+              "gamma": [30.0, 49.0]}
+n_freq_bands = len(Freq_Bands)
+# Eye status
+eye_status = list(final_epochs[0].event_id.keys())
+n_eye_status = len(eye_status)
+
+### Estimating Hurst exponent for the data
+# The data should be re-referenced to common average (Already done)
+
+# Data are transformed to numpy arrays
+# Then divided into EO and EC and further into each of the 5 trials
+# So DFA is estimated for each trial separately, which was concluded from simulations
+gaps_trials_idx = np.load("Gaps_trials_idx.npy") # re-used from microstate analysis
+n_trials = 5
+
+H_data = []
+for i in range(n_subjects):
+    # Transform data to correct shape
+    temp_arr = final_epochs[i].get_data() # get data
+    arr_shape = temp_arr.shape # get shape
+    temp_arr = temp_arr.swapaxes(1,2) # swap ch and time axis
+    temp_arr = temp_arr.reshape(arr_shape[0]*arr_shape[2],arr_shape[1]) # reshape by combining epochs and times
+    # Get indices for eyes open and closed
+    EC_index = final_epochs[i].events[:,2] == 1
+    EO_index = final_epochs[i].events[:,2] == 2
+    # Repeat with 4s * sample frequency to correct for concatenation of times and epochs
+    EC_index = np.repeat(EC_index,4*sfreq)
+    EO_index = np.repeat(EO_index,4*sfreq)
+    # Divide into eye status
+    EC_data = temp_arr[EC_index]
+    EO_data = temp_arr[EO_index]
+    # Divide into trials
+    EC_gap_idx = np.array([0]+list(gaps_trials_idx[i,0])+[len(EC_data)])
+    EO_gap_idx = np.array([0]+list(gaps_trials_idx[i,1])+[len(EO_data)])
+    
+    EC_trial_data = []
+    EO_trial_data = []
+    for t in range(n_trials):
+        EC_trial_data.append(EC_data[EC_gap_idx[t]:EC_gap_idx[t+1]])
+        EO_trial_data.append(EO_data[EO_gap_idx[t]:EO_gap_idx[t+1]])
+        
+    # Save data
+    H_data.append([EC_trial_data,EO_trial_data]) # output [subject][eye][trial][time,ch]
+
+# Calculate H for each subject, eye status, trial, freq and channel
+H_arr = np.zeros((n_subjects,n_eye_status,n_trials,n_channels,n_freq_bands))
+w_len = [len(ele) for ele in log_win_sizes]
+DFA_arr = np.empty((n_subjects,n_eye_status,n_trials,n_channels,n_freq_bands,2,np.max(w_len)))
+DFA_arr[:] = np.nan
+
+# Get current time
+c_time1 = time.localtime()
+c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
+print("Started",c_time1)
+
+# Nolds are already using all cores so multiprocessing with make it slower
+# Warning occurs when R2 is estimated during detrending - but R2 is not used
+warnings.simplefilter("ignore")
+for i in range(n_subjects):
+    # Pre-allocate memory
+    DFA_temp = np.empty((n_eye_status,n_trials,n_channels,n_freq_bands,2,np.max(w_len)))
+    DFA_temp[:] = np.nan
+    H_temp = np.empty((n_eye_status,n_trials,n_channels,n_freq_bands))
+    for e in range(n_eye_status):
+        for trial in range(n_trials):
+            for c in range(n_channels):
+                # Get the data
+                signal = H_data[i][e][trial][:,c]
+                
+                counter = 0 # prepare counter
+                for fmin, fmax in Freq_Bands.values():
+                    # Filter for each freq band
+                    signal_filtered = mne.filter.filter_data(signal, sfreq=sfreq, verbose=0,
+                                                  l_freq=fmin, h_freq=fmax)
+                    # Hilbert transform
+                    analytic_signal = scipy.signal.hilbert(signal_filtered)
+                    # Get Amplitude envelope
+                    # np.abs is the same as np.linalg.norm, i.e. the length for complex input which is the amplitude
+                    ampltude_envelope = np.abs(analytic_signal)
+                    # Perform DFA using predefined window sizes from simulation
+                    a, dfa_data = nolds.dfa(ampltude_envelope,
+                                            nvals=np.exp(log_win_sizes[counter]).astype("int"),
+                                            debug_data=True)
+                    # Save DFA results
+                    DFA_temp[e,trial,c,counter,:,0:w_len[counter]] = dfa_data[0:2]
+                    H_temp[e,trial,c,counter] = a
+                    # Update counter
+                    counter += 1
+
+    # Print run status
+    print("Finished {} out of {}".format(i+1,n_subjects))
+    # Save the results
+    H_arr[i] = H_temp
+    DFA_arr[i] = DFA_temp
+
+warnings.simplefilter("default")
+
+# Get current time
+c_time2 = time.localtime()
+c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
+print("Started", c_time1, "\nCurrent Time",c_time2)
+
+# Save the DFA analysis data 
+np.save(Feature_savepath+"DFA_arr.npy", DFA_arr)
+np.save(Feature_savepath+"H_arr.npy", H_arr)
+
+# Load
+DFA_arr = np.load(Feature_savepath+"DFA_arr.npy")
+H_arr = np.load(Feature_savepath+"H_arr.npy")
+
+# Average the Hurst Exponent across trials
+H_arr = np.mean(H_arr, axis=2)
+
+# Convert to Pandas dataframe (Hurst exponent)
+# The dimensions will each be a column with numbers and the last column will be the actual values
+arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, H_arr.shape), indexing="ij"))) + [H_arr.ravel()])
+H_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Channel", "Freq_band", "Value"])
+# Change from numerical coding to actual values
+eye_status = list(final_epochs[0].event_id.keys())
+ch_name = final_epochs[0].info["ch_names"]
+
+index_values = [Subject_id,eye_status,ch_name,list(Freq_Bands.keys())]
+for col in range(len(index_values)):
+    col_name = H_data_df.columns[col]
+    for shape in range(H_arr.shape[col]): # notice this is the shape of original numpy array. Not shape of DF
+        H_data_df.loc[H_data_df.iloc[:,col] == shape,col_name]\
+        = index_values[col][shape]
+
+# Add group status
+Group_status = np.array(["CTRL"]*len(H_data_df["Subject_ID"]))
+Group_status[np.array([i in cases for i in H_data_df["Subject_ID"]])] = "PTSD"
+# Add to dataframe
+H_data_df.insert(2, "Group_status", Group_status)
+
+# Fix Freq_band categorical order
+H_data_df["Freq_band"] = H_data_df["Freq_band"].astype("category").\
+            cat.reorder_categories(list(Freq_Bands.keys()), ordered=True)
+
+# Global Hurst exponent
+H_data_df_global = H_data_df.groupby(["Subject_ID", "Eye_status", "Freq_band"]).mean().reset_index() # by default pandas mean skip nan
+# Add group status (cannot use group_by as each subject only have 1 group, not both)
+Group_status = np.array(["CTRL"]*len(H_data_df_global["Subject_ID"]))
+Group_status[np.array([i in cases for i in H_data_df_global["Subject_ID"]])] = "PTSD"
+# Add to dataframe
+H_data_df_global.insert(2, "Group_status", Group_status)
+# Add dummy variable for re-using plot code
+dummy_variable = ["Global Hurst Exponent"]*H_data_df_global.shape[0]
+H_data_df_global.insert(3, "Measurement", dummy_variable )
+
+# Save the data
+H_data_df.to_pickle(os.path.join(Feature_savepath,"H_data_df.pkl"))
+H_data_df_global.to_pickle(os.path.join(Feature_savepath,"H_data_global_df.pkl"))
+
+# %% Source localization of sensor data
+# Using non-interpolated channels
+# Even interpolated channels during preprocessing and visual inspection
+# are dropped
+
+# Prepare epochs for estimation of source connectivity
+source_epochs = [0]*n_subjects
+for i in range(n_subjects):
+    source_epochs[i] = final_epochs[i].copy()
+
+### Make forward solutions
+# A forward solution is first made for all individuals with no dropped channels
+# Afterwards individual forward solutions are made for subjects with bad
+# channels that were interpolated in preprocessing and these are dropped
+# First forward operator is computed using a template MRI for each dataset
+fs_dir = "/home/glia/MNE-fsaverage-data/fsaverage"
+subjects_dir = os.path.dirname(fs_dir)
+trans = "fsaverage"
+src = os.path.join(fs_dir, "bem", "fsaverage-ico-5-src.fif")
+bem = os.path.join(fs_dir, "bem", "fsaverage-5120-5120-5120-bem-sol.fif")
+
+# Read the template sourcespace
+sourcespace = mne.read_source_spaces(src)
+
+temp_idx = 0 # Index with subject that had no bad channels
+subject_eeg = source_epochs[temp_idx].copy()
+subject_eeg.set_eeg_reference(projection=True) # needed for inverse modelling
+# Make forward solution
+fwd = mne.make_forward_solution(subject_eeg.info, trans=trans, src=src,
+                            bem=bem, eeg=True, mindist=5.0, n_jobs=1)
+# Save forward operator
+fname_fwd = "./Source_fwd/fsaverage-fwd.fif"
+mne.write_forward_solution(fname_fwd, fwd, overwrite=True)
+
+# A specific forward solution is also made for each subject with bad channels
+with open("./Preprocessing/bad_ch.pkl", "rb") as file:
+   bad_ch = pickle.load(file)
+
+All_bad_ch = bad_ch
+All_drop_epochs = dropped_epochs_df
+All_dropped_ch = []
+
+Bad_ch_idx = [idx for idx, item in enumerate(All_bad_ch) if item != 0]
+Bad_ch_subjects = All_drop_epochs["Subject_ID"][Bad_ch_idx]
+# For each subject with bad channels, drop the channels and make forward operator
+for n in range(len(Bad_ch_subjects)):
+    Subject = Bad_ch_subjects.iloc[n]
+    try:
+        Subject_idx = Subject_id.index(Subject)
+        # Get unique bad channels
+        Bad_ch0 = All_bad_ch[Bad_ch_idx[n]]
+        Bad_ch1 = []
+        for i2 in range(len(Bad_ch0)):
+            if type(Bad_ch0[i2]) == list:
+                for i3 in range(len(Bad_ch0[i2])):
+                    Bad_ch1.append(Bad_ch0[i2][i3])
+            elif type(Bad_ch0[i2]) == str:
+                Bad_ch1.append(Bad_ch0[i2])
+        Bad_ch1 = np.unique(Bad_ch1)
+        # Drop the bad channels
+        source_epochs[Subject_idx].drop_channels(Bad_ch1)
+        # Save the overview of dropped channels
+        All_dropped_ch.append([Subject,Subject_idx,Bad_ch1])
+        # Make forward operator
+        subject_eeg = source_epochs[Subject_idx].copy()
+        subject_eeg.set_eeg_reference(projection=True) # needed for inverse modelling
+        # Make forward solution
+        fwd = mne.make_forward_solution(subject_eeg.info, trans=trans, src=src,
+                                    bem=bem, eeg=True, mindist=5.0, n_jobs=1)
+        # Save forward operator
+        fname_fwd = "./Source_fwd/fsaverage_{}-fwd.fif".format(Subject)
+        mne.write_forward_solution(fname_fwd, fwd, overwrite=True)
+    except:
+        print(Subject,"was already dropped")
+
+with open("./Preprocessing/All_datasets_bad_ch.pkl", "wb") as filehandle:
+    pickle.dump(All_dropped_ch, filehandle)
+
+
+# %% Load forward operators
+# Re-use for all subjects without dropped channels
+fname_fwd = "./Source_fwd/fsaverage-fwd.fif"
+fwd = mne.read_forward_solution(fname_fwd)
+
+fwd_list = [fwd]*n_subjects
+
+# Use specific forward solutions for subjects with dropped channels
+with open("./Preprocessing/All_datasets_bad_ch.pkl", "rb") as file:
+   All_dropped_ch = pickle.load(file)
+
+for i in range(len(All_dropped_ch)):
+    Subject = All_dropped_ch[i][0]
+    Subject_idx = All_dropped_ch[i][1]
+    fname_fwd = "./Source_fwd/fsaverage_{}-fwd.fif".format(Subject)
+    fwd = mne.read_forward_solution(fname_fwd)
+    fwd_list[Subject_idx] = fwd
+
+# Check the correct number of channels are present in fwd
+random_point = int(np.random.randint(0,len(All_dropped_ch)-1,1))
+assert len(fwds[All_dropped_ch[random_point][1]].ch_names) == source_epochs[All_dropped_ch[random_point][1]].info["nchan"]
+
+# %% Make parcellation
+# After mapping to source space, I end up with 20484 vertices
+# but I wanted to map to fewer sources and not many more
+# Thus I need to perform parcellation
+# Get labels for FreeSurfer "aparc" cortical parcellation (example with 74 labels/hemi - Destriuex)
+labels_aparc = mne.read_labels_from_annot("fsaverage", parc="aparc.a2009s",
+                                    subjects_dir=subjects_dir)
+labels_aparc = labels_aparc[:-2] # remove unknowns
+
+labels_aparc_names = [label.name for label in labels_aparc]
+
+# Manually adding the 31 ROIs (14-lh/rh + 3 in midline) from Toll et al, 2020
+# Making fuction to take subset of a label
+def label_subset(label, subset, name="ROI_name"):
+    label_subset = mne.Label(label.vertices[subset], label.pos[subset,:],
+                         label.values[subset], label.hemi,
+                         name = "{}-{}".format(name,label.hemi),
+                         subject = label.subject, color = None)
+    return label_subset
+
+### Visual area 1 (V1 and somatosensory cortex BA1-3)
+label_filenames = ["lh.V1.label", "rh.V1.label",
+                   "lh.BA1.label", "rh.BA1.label",
+                   "lh.BA2.label", "rh.BA2.label",
+                   "lh.BA3a.label", "rh.BA3a.label",
+                   "lh.BA3b.label", "rh.BA3b.label"]
+labels0 = [0]*len(label_filenames)
+for i, filename in enumerate(label_filenames):
+    labels0[i] = mne.read_label(os.path.join(fs_dir, "label", filename), subject="fsaverage")
+# Add V1 to final label variable
+labels = labels0[:2]
+# Rename to remove redundant hemi information
+labels[0].name = "V1-{}".format(labels[0].hemi)
+labels[1].name = "V1-{}".format(labels[1].hemi)
+# Assign a color
+labels[0].color = matplotlib.colors.to_rgba("salmon")
+labels[1].color = matplotlib.colors.to_rgba("salmon")
+# Combine Brodmann Areas for SMC. Only use vertices ones to avoid duplication error
+SMC_labels = labels0[2:]
+for hem in range(2):
+    SMC_p1 = SMC_labels[hem]
+    for i in range(1,len(SMC_labels)//2):
+        SMC_p2 = SMC_labels[hem+2*i]
+        p2_idx = np.isin(SMC_p2.vertices, SMC_p1.vertices, invert=True)
+        SMC_p21 = label_subset(SMC_p2, p2_idx, "SMC")
+        SMC_p1 = SMC_p1.__add__(SMC_p21)
+    SMC_p1.name = SMC_p21.name
+    # Assign a color
+    SMC_p1.color = matplotlib.colors.to_rgba("orange")
+    labels.append(SMC_p1)
+
+### Inferior frontal junction
+# Located at junction between inferior frontal and inferior precentral sulcus
+label_aparc_names0 = ["S_front_inf","S_precentral-inf-part"]
+temp_labels = []
+for i in range(len(label_aparc_names0)):
+    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+    for i2 in range(len(labels_aparc_idx)):
+        temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
+
+pos1 = temp_labels[0].pos
+pos2 = temp_labels[2].pos
+distm = scipy.spatial.distance.cdist(pos1,pos2)
+# Find the closest points between the 2 ROIs
+l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.001))[0]) # q chosen to correspond to around 10% of ROI
+l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[1]) # q chosen to correspond to around 10% of ROI
+
+IFJ_label_p1 = label_subset(temp_labels[0], l1_idx, "IFJ")
+IFJ_label_p2 = label_subset(temp_labels[2], l2_idx, "IFJ")
+# Combine the 2 parts
+IFJ_label = IFJ_label_p1.__add__(IFJ_label_p2)
+IFJ_label.name = IFJ_label_p1.name
+# Assign a color
+IFJ_label.color = matplotlib.colors.to_rgba("chartreuse")
+# Append to final list
+labels.append(IFJ_label)
+
+# Do the same for the right hemisphere
+pos1 = temp_labels[1].pos
+pos2 = temp_labels[3].pos
+distm = scipy.spatial.distance.cdist(pos1,pos2)
+# Find the closest points between the 2 ROIs
+l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.00075))[0]) # q chosen to correspond to around 10% of ROI
+l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[1]) # q chosen to correspond to around 10% of ROI
+IFJ_label_p1 = label_subset(temp_labels[1], l1_idx, "IFJ")
+IFJ_label_p2 = label_subset(temp_labels[3], l2_idx, "IFJ")
+# Combine the 2 parts
+IFJ_label = IFJ_label_p1.__add__(IFJ_label_p2)
+IFJ_label.name = IFJ_label_p1.name
+# Assign a color
+IFJ_label.color = matplotlib.colors.to_rgba("chartreuse")
+# Append to final list
+labels.append(IFJ_label)
+
+### Intraparietal sulcus
+label_aparc_names0 = ["S_intrapariet_and_P_trans"]
+labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[0])]
+for i in range(len(labels_aparc_idx)):
+    labels.append(labels_aparc[labels_aparc_idx[i]].copy())
+    labels[-1].name = "IPS-{}".format(labels[-1].hemi)
+
+### Frontal eye field as intersection between middle frontal gyrus and precentral gyrus
+label_aparc_names0 = ["G_front_middle","G_precentral"]
+temp_labels = []
+for i in range(len(label_aparc_names0)):
+    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+    for i2 in range(len(labels_aparc_idx)):
+        temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
+
+# Take 10% of middle frontal gyrus closest to precentral gyrus (most posterior)
+temp_label0 = temp_labels[0]
+G_fm_y = temp_label0.pos[:,1]
+thres_G_fm_y = np.sort(G_fm_y)[len(G_fm_y)//10]
+idx_p1 = np.where(G_fm_y<thres_G_fm_y)[0]
+FEF_label_p1 = label_subset(temp_label0, idx_p1, "FEF")
+# Take 10% closest for precentral gyrus (most anterior)
+temp_label0 = temp_labels[2]
+# I cannot only use y (anterior/posterior) but also need to restrict z-position
+G_pre_cen_z = temp_label0.pos[:,2]
+thres_G_pre_cen_z = 0.04 # visually inspected threshold
+G_pre_cen_y = temp_label0.pos[:,1]
+thres_G_pre_cen_y = np.sort(G_pre_cen_y[G_pre_cen_z>thres_G_pre_cen_z])[-len(G_pre_cen_y)//10] # notice - for anterior
+idx_p2 = np.where((G_pre_cen_y>thres_G_pre_cen_y) & (G_pre_cen_z>thres_G_pre_cen_z))[0]
+FEF_label_p2 = label_subset(temp_label0, idx_p2, "FEF")
+# Combine the 2 parts
+FEF_label = FEF_label_p1.__add__(FEF_label_p2)
+FEF_label.name = FEF_label_p1.name
+# Assign a color
+FEF_label.color = matplotlib.colors.to_rgba("aqua")
+# Append to final list
+labels.append(FEF_label)
+
+# Do the same for the right hemisphere
+temp_label0 = temp_labels[1]
+G_fm_y = temp_label0.pos[:,1]
+thres_G_fm_y = np.sort(G_fm_y)[len(G_fm_y)//10]
+idx_p1 = np.where(G_fm_y<thres_G_fm_y)[0]
+FEF_label_p1 = label_subset(temp_label0, idx_p1, "FEF")
+
+temp_label0 = temp_labels[3]
+G_pre_cen_z = temp_label0.pos[:,2]
+thres_G_pre_cen_z = 0.04 # visually inspected threshold
+G_pre_cen_y = temp_label0.pos[:,1]
+thres_G_pre_cen_y = np.sort(G_pre_cen_y[G_pre_cen_z>thres_G_pre_cen_z])[-len(G_pre_cen_y)//10] # notice - for anterior
+idx_p2 = np.where((G_pre_cen_y>thres_G_pre_cen_y) & (G_pre_cen_z>thres_G_pre_cen_z))[0]
+FEF_label_p2 = label_subset(temp_label0, idx_p2, "FEF")
+# Combine the 2 parts
+FEF_label = FEF_label_p1.__add__(FEF_label_p2)
+FEF_label.name = FEF_label_p1.name
+# Assign a color
+FEF_label.color = matplotlib.colors.to_rgba("aqua")
+# Append to final list
+labels.append(FEF_label)
+
+### Supplementary eye fields
+# Located at caudal end of frontal gyrus and upper part of paracentral sulcus
+label_aparc_names0 = ["G_and_S_paracentral","G_front_sup"]
+temp_labels = []
+for i in range(len(label_aparc_names0)):
+    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+    for i2 in range(len(labels_aparc_idx)):
+        temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
+
+pos1 = temp_labels[0].pos
+pos2 = temp_labels[2].pos
+distm = scipy.spatial.distance.cdist(pos1,pos2)
+# Find the closest points between the 2 ROIs
+l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[0]) # q chosen to correspond to around 15% of ROI
+l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.005))[1]) # q chosen to correspond to around 10% of ROI
+# Notice that superior frontal gyrus is around 4 times bigger than paracentral
+len(l1_idx)/pos1.shape[0]
+len(l2_idx)/pos2.shape[0]
+# Only use upper part
+z_threshold = 0.06 # visually inspected
+l1_idx = l1_idx[pos1[l1_idx,2] > z_threshold]
+l2_idx = l2_idx[pos2[l2_idx,2] > z_threshold]
+
+SEF_label_p1 = label_subset(temp_labels[0], l1_idx, "SEF")
+SEF_label_p2 = label_subset(temp_labels[2], l2_idx, "SEF")
+# Combine the 2 parts
+SEF_label = SEF_label_p1.__add__(SEF_label_p2)
+SEF_label.name = SEF_label_p1.name
+# Assign a color
+SEF_label.color = matplotlib.colors.to_rgba("royalblue")
+# Append to final list
+labels.append(SEF_label)
+
+# Do the same for the right hemisphere
+pos1 = temp_labels[1].pos
+pos2 = temp_labels[3].pos
+distm = scipy.spatial.distance.cdist(pos1,pos2)
+# Find the closest points between the 2 ROIs
+l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[0]) # q chosen to correspond to around 15% of ROI
+l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.005))[1]) # q chosen to correspond to around 10% of ROI
+# Notice that superior frontal gyrus is around 4 times bigger than paracentral
+len(l1_idx)/pos1.shape[0]
+len(l2_idx)/pos2.shape[0]
+# Only use upper part
+z_threshold = 0.06 # visually inspected
+l1_idx = l1_idx[pos1[l1_idx,2] > z_threshold]
+l2_idx = l2_idx[pos2[l2_idx,2] > z_threshold]
+
+SEF_label_p1 = label_subset(temp_labels[1], l1_idx, "SEF")
+SEF_label_p2 = label_subset(temp_labels[3], l2_idx, "SEF")
+# Combine the 2 parts
+SEF_label = SEF_label_p1.__add__(SEF_label_p2)
+SEF_label.name = SEF_label_p1.name
+# Assign a color
+SEF_label.color = matplotlib.colors.to_rgba("royalblue")
+# Append to final list
+labels.append(SEF_label)
+
+### Posterior cingulate cortex
+label_aparc_names0 = ["G_cingul-Post-dorsal", "G_cingul-Post-ventral"]
+temp_labels = []
+for i in range(len(label_aparc_names0)):
+    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+    for i2 in range(len(labels_aparc_idx)):
+        temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
+labels0 = []
+for hem in range(2):
+    PCC_p1 = temp_labels[hem]
+    for i in range(1,len(temp_labels)//2):
+        PCC_p2 = temp_labels[hem+2*i]
+        PCC_p1 = PCC_p1.__add__(PCC_p2)
+    PCC_p1.name = "PCC-{}".format(PCC_p1.hemi)
+    labels0.append(PCC_p1)
+# Combine the 2 hemisphere in 1 label
+labels.append(labels0[0].__add__(labels0[1]))
+
+### Medial prefrontal cortex
+# From their schematic it looks like rostral 1/4 of superior frontal gyrus
+label_aparc_names0 = ["G_front_sup"]
+temp_labels = []
+for i in range(len(label_aparc_names0)):
+    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+    for i2 in range(len(labels_aparc_idx)):
+        temp_labels0 = labels_aparc[labels_aparc_idx[i2]].copy()
+        temp_labels0 = temp_labels0.split(4, subjects_dir=subjects_dir)[3]
+        temp_labels0.name = "mPFC-{}".format(temp_labels0.hemi)
+        temp_labels.append(temp_labels0)
+# Combine the 2 hemisphere in 1 label
+labels.append(temp_labels[0].__add__(temp_labels[1]))
+
+### Angular gyrus
+label_aparc_names0 = ["G_pariet_inf-Angular"]
+for i in range(len(label_aparc_names0)):
+    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+    for i2 in range(len(labels_aparc_idx)):
+        temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
+        temp_labels.name = "ANG-{}".format(temp_labels.hemi)
+        labels.append(temp_labels)
+
+### Posterior middle frontal gyrus
+label_aparc_names0 = ["G_front_middle"]
+for i in range(len(label_aparc_names0)):
+    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+    for i2 in range(len(labels_aparc_idx)):
+        temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
+        temp_labels = temp_labels.split(2, subjects_dir=subjects_dir)[0]
+        temp_labels.name = "PMFG-{}".format(temp_labels.hemi)
+        labels.append(temp_labels)
+
+### Inferior parietal lobule
+# From their parcellation figure seems to be rostral angular gyrus and posterior supramarginal gyrus
+label_aparc_names0 = ["G_pariet_inf-Angular","G_pariet_inf-Supramar"]
+temp_labels = []
+for i in range(len(label_aparc_names0)):
+    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+    for i2 in range(len(labels_aparc_idx)):
+        temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
+# Split angular in 2 and get rostral part
+temp_labels[0] = temp_labels[0].split(2, subjects_dir=subjects_dir)[1]
+temp_labels[1] = temp_labels[1].split(2, subjects_dir=subjects_dir)[1]
+# Split supramarginal in 2 and get posterior part
+temp_labels[2] = temp_labels[2].split(2, subjects_dir=subjects_dir)[0]
+temp_labels[3] = temp_labels[3].split(2, subjects_dir=subjects_dir)[0]
+
+for hem in range(2):
+    PCC_p1 = temp_labels[hem]
+    for i in range(1,len(temp_labels)//2):
+        PCC_p2 = temp_labels[hem+2*i]
+        PCC_p1 = PCC_p1.__add__(PCC_p2)
+    PCC_p1.name = "IPL-{}".format(PCC_p1.hemi)
+    labels.append(PCC_p1)
+
+### Orbital gyrus
+# From their figure it seems to correspond to orbital part of inferior frontal gyrus
+label_aparc_names0 = ["G_front_inf-Orbital"]
+for i in range(len(label_aparc_names0)):
+    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+    for i2 in range(len(labels_aparc_idx)):
+        temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
+        temp_labels.name = "ORB-{}".format(temp_labels.hemi)
+        labels.append(temp_labels)
+
+### Middle temporal gyrus
+# From their figure it seems to only be 1/4 of MTG at the 2nd to last caudal part
+label_aparc_names0 = ["G_temporal_middle"]
+for i in range(len(label_aparc_names0)):
+    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+    for i2 in range(len(labels_aparc_idx)):
+        temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
+        temp_labels = temp_labels.split(4, subjects_dir=subjects_dir)[1]
+        temp_labels.name = "MTG-{}".format(temp_labels.hemi)
+        labels.append(temp_labels)
+
+### Anterior middle frontal gyrus
+label_aparc_names0 = ["G_front_middle"]
+for i in range(len(label_aparc_names0)):
+    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+    for i2 in range(len(labels_aparc_idx)):
+        temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
+        temp_labels = temp_labels.split(2, subjects_dir=subjects_dir)[1]
+        temp_labels.name = "AMFG-{}".format(temp_labels.hemi)
+        labels.append(temp_labels)
+
+### Insula
+label_aparc_names0 = ["G_Ins_lg_and_S_cent_ins","G_insular_short"]
+temp_labels = []
+for i in range(len(label_aparc_names0)):
+    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+    for i2 in range(len(labels_aparc_idx)):
+        temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
+for hem in range(2):
+    PCC_p1 = temp_labels[hem]
+    for i in range(1,len(temp_labels)//2):
+        PCC_p2 = temp_labels[hem+2*i]
+        PCC_p1 = PCC_p1.__add__(PCC_p2)
+    PCC_p1.name = "INS-{}".format(PCC_p1.hemi)
+    labels.append(PCC_p1)
+
+### (Dorsal) Anterior Cingulate Cortex
+label_aparc_names0 = ["G_and_S_cingul-Ant"]
+temp_labels = []
+for i in range(len(label_aparc_names0)):
+    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+    for i2 in range(len(labels_aparc_idx)):
+        temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
+        temp_labels[-1].name = "ACC-{}".format(temp_labels[-1].hemi)
+# Combine the 2 hemisphere in 1 label
+labels.append(temp_labels[0].__add__(temp_labels[1]))
+
+### Supramarginal Gyrus
+label_aparc_names0 = ["G_pariet_inf-Supramar"]
+for i in range(len(label_aparc_names0)):
+    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+    for i2 in range(len(labels_aparc_idx)):
+        temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
+        temp_labels.name = "SUP-{}".format(temp_labels.hemi)
+        labels.append(temp_labels)
+
+print("{} ROIs have been defined".format(len(labels)))
+
+# # Visualize positions
+# fig = plt.figure()
+# ax = fig.add_subplot(111, projection="3d")
+# for i in range(0,3):
+#     temp_pos = temp_labels[i].pos
+#     ax.scatter(temp_pos[:,0],temp_pos[:,1],temp_pos[:,2], marker="o", alpha=0.1)
+# # Add to plot
+# ax.scatter(labels[-1].pos[:,0],labels[-1].pos[:,1],labels[-1].pos[:,2], marker="o")
+
+# # Visualize the labels
+# # temp_l = labels_aparc[labels_aparc_idx[0]]
+# temp_l = labels[-2]
+# l_stc = stc[100].in_label(temp_l)
+# l_stc.vertices
+
+# l_stc.plot(**surfer_kwargs)
+
+# Save the annotation file
+with open("custom_aparc2009_Li_et_al_2022.pkl", "wb") as file:
+    pickle.dump(labels, file)
+
+# %% Calculate orthogonalized power envelope connectivity in source space
+# In non-interpolated channels
+# Updated 22/1 - 2021 to use delta = 1/81 and assumption
+# about non-correlated and equal variance noise covariance matrix for channels
+
+# Load
+with open("custom_aparc2009_Li_et_al_2022.pkl", "rb") as file:
+    labels = pickle.load(file)
+label_names = [label.name for label in labels]
+
+# Define function to estimate PEC
+def PEC_estimation(x, freq_bands, sfreq=200):
+    """
+    This function takes a source timeseries signal x and performs:
+        1. Bandpass filtering
+        2. Hilbert transform to yield analytical signal
+        3. Compute all to all connectivity by iteratively computing for each pair
+            a. Orthogonalization
+            b. Computing power envelopes by squaring the signals |x|^2
+            c. Log-transform to enhance normality
+            d. Pearson's correlation between each pair
+            e. Fisher's r-to-z transform to enhance normality
+    The code has been optimized by inspiration from MNE-Python's function:
+    mne.connectivity.enelope_correlation.
+    
+    In MNE-python version < 0.22 there was a bug, but after the fix in 0.22
+    the mne function is equivalent to my implementation, although they don't
+    use epsilon but gives same result with a RuntimeWarning about log(0)
+    
+    IMPORTANT NOTE:
+        Filtering introduce artifacts for first and last timepoint
+    The values are very low, more than 1e-12 less than the others
+    If they are not removed, then they will heavily influence Pearson's
+    correlation as it is outlier sensitive
+    
+    Inputs:
+        x - The signal in source space as np.array with shape (ROIs,Timepoints)
+        freq_bands - The frequency bands of interest as a dictionary e.g.
+                     {"alpha": [8.0, 13.0], "beta": [13.0, 30.0]}
+        sfreq - The sampling frequency in Hertz
+    
+    Output:
+        The pairwise connectivity matrix
+    """
+    n_roi, n_timepoints = x.shape
+    n_freq_bands = len(freq_bands)
+    
+    epsilon = 1e-100 # small value to prevent log(0) errors
+    
+    # Filter the signal in the different freq bands
+    PEC_con0 = np.zeros((n_roi,n_roi,n_freq_bands))
+    for fname, frange in freq_bands.items():
+        fmin, fmax = [float(interval) for interval in frange]
+        signal_filtered = mne.filter.filter_data(x, sfreq, fmin, fmax,
+                                          fir_design="firwin", verbose=0)
+        # Filtering on finite signals will yield very low values for first
+        # and last timepoint, which can create outliers. E.g. 1e-29 compared to 1e-14
+        # Outlier sensitive methods, like Pearson's correlation, is therefore
+        # heavily affected and this systematic error is removed by removing
+        # the first and last timepoint
+        signal_filtered = signal_filtered[:,1:-1]
+        
+        # Hilbert transform
+        analytic_signal = scipy.signal.hilbert(signal_filtered)
+        # I will use x and y to keep track of orthogonalization
+        x0 = analytic_signal
+        # Get power envelope
+        x0_mag = np.abs(x0)
+        # Get scaled conjugate used for orthogonalization estimation
+        x0_conj_scaled = x0.conj()
+        x0_conj_scaled /= x0_mag
+        # Take square power envelope
+        PEx = np.square(x0_mag)
+        # Take log transform
+        lnPEx = np.log(PEx+epsilon)
+        # Remove mean for Pearson correlation calculation
+        lnPEx_nomean = lnPEx - np.mean(lnPEx, axis=-1, keepdims=True) # normalize each roi timeseries
+        # Get std for Pearson correlation calculation
+        lnPEx_std = np.std(lnPEx, axis=-1)
+        lnPEx_std[lnPEx_std == 0] = 1 # Prevent std = 0 problems
+        # Prepare con matrix
+        con0 = np.zeros((n_roi,n_roi))
+        for roi_r, y0 in enumerate(x0): # for each y0
+            # Calculate orthogonalized signal y with respect to x for all x
+            # Using y_ort = imag(y*x_conj/|x|)
+            # I checked the formula in temp_v3 and it works as intended
+            # I want to orthogonalize element wise for each timepoint
+            y0_ort = (y0*x0_conj_scaled).imag
+            # Here y0_ort.shape = (n_roi, n_timepoints)
+            # So y is current roi and the first axis gives each x it is orthogonalized to
+            # Take the abs to get power envelope
+            y0_ort = np.abs(y0_ort)
+            # Prevent log(0) error when calculating y_ort on y
+            y0_ort[roi_r] = 1. # this will be 0 zero after mean subtraction
+            # Take square power envelope
+            PEy = np.square(y0_ort) # squared power envelope
+            # Take log transform
+            lnPEy = np.log(PEy+epsilon)
+            # Remove mean for pearson correlation calculation
+            lnPEy_nomean = lnPEy - np.mean(lnPEy, axis=-1, keepdims=True)
+            # Get std for Pearson correlation calculation
+            lnPEy_std = np.std(lnPEy, axis=-1)
+            lnPEy_std[lnPEy_std == 0] = 1.
+            # Pearson correlation is expectation of X_nomean * Y_nomean for each time-series divided with standard deviations
+            PEC = np.mean(lnPEx_nomean*lnPEy_nomean, axis=-1)
+            PEC /= lnPEx_std
+            PEC /= lnPEy_std
+            con0[roi_r] = PEC
+        # The con0 connectivity matrix should be read as correlation between
+        # orthogonalized y (row number) and x (column number)
+        # It is not symmetrical, as cor(roi2_ort, roi1) is not cor(roi1_ort, roi2)
+        # To make it symmetrical the average of the absolute correlation
+        # of the 2 possibilities for each pair are taken
+        con0 = np.abs(con0)
+        con0 = (con0.T+con0)/2.
+        # Fisher's z transform - which is equivalent to arctanh
+        con0 = np.arctanh(con0)
+        # The diagonal is not 0 as I wanted to avoid numerical errors with log(0)
+        # and used a small epsilon value. Thus the diagonal is explicitly set to 0
+        
+        # Save to array
+        PEC_con0[:,:,list(freq_bands.keys()).index(fname)] = con0
+    return PEC_con0
+
+# Prepare variables
+Freq_Bands = {"delta": [1.25, 4.0],
+              "theta": [4.0, 8.0],
+              "alpha": [8.0, 13.0],
+              "beta": [13.0, 30.0],
+              "gamma": [30.0, 49.0]}
+n_freq_bands = len(Freq_Bands)
+n_roi = len(labels)
+
+# Get current time
+c_time1 = time.localtime()
+c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
+print(c_time1)
+
+# PEC analysis
+PEC_data_list = [0]*n_subjects
+STCs_list = [0]*n_subjects
+
+# Using inverse operator as generator interferes with concurrent processes
+# If I run it for multiple subjects I run out of ram
+# Thus concurrent processes are used inside the for loop
+def PEC_analysis(input_args): # iterable epoch number and corresponding ts
+    i2, ts = input_args
+    # Estimate PEC
+    PEC_con0 = PEC_estimation(ts, Freq_Bands, sfreq)
+    print("Finished {} out of {} epochs".format(i2+1,n_epochs))
+    return i2, PEC_con0, ts
+
+for i in range(n_subjects):
+    n_epochs, n_ch, n_timepoints = source_epochs[i].get_data().shape
+    # Use different forward solutions depending on number of channels
+    cur_subject_id = Subject_id[i]
+    fwd = fwds[i]
+    
+    # Using assumption about equal variance and no correlations I make a diagonal matrix
+    # Using the default option for 0.2µV std for EEG data
+    noise_cov = mne.make_ad_hoc_cov(source_epochs[i].info, None)
+    
+    # Make inverse operator
+    # Using default depth parameter = 0.8 and free orientation (loose = 1)
+    inverse_operator = mne.minimum_norm.make_inverse_operator(source_epochs[i].info,
+                                                              fwd, noise_cov,
+                                                              loose = 1, depth = 0.8,
+                                                              verbose = 0)
+    src_inv = inverse_operator["src"]
+    # Compute inverse solution and retrieve time series for each label
+    # Preallocate memory
+    label_ts = np.full((n_epochs,len(labels),n_timepoints),np.nan)
+    # Define regularization
+    snr = 9 # Zhang et al, 2020 used delta = 1/81, which is inverse SNR and correspond to lambda2
+    # A for loop is used for each label due to memory issues when doing all labels at the same time
+    for l in range(len(labels)):
+        stc = mne.minimum_norm.apply_inverse_epochs(source_epochs[i],inverse_operator,
+                                                    lambda2 = 1/(snr**2),
+                                                    label = labels[l],
+                                                    pick_ori = "vector",
+                                                    return_generator=False,
+                                                    method = "MNE", verbose = 0)
+        # Use PCA to reduce the 3 orthogonal directions to 1 principal direction with max power
+        # There can be ambiguity about the orientation, thus the one that
+        # is pointing most "normal", i.e. closest 90 degrees to the skull is used
+        stc_pca = [0]*len(stc)
+        for ep in range(n_epochs):
+            stc_pca[ep], pca_dir = stc[ep].project(directions="pca", src=src_inv)
+        # Get mean time series for the whole label
+        temp_label_ts = mne.extract_label_time_course(stc_pca, labels[l], src_inv, mode="mean_flip",
+                                         return_generator=False, verbose=0)
+        # Save to array
+        label_ts[:,l,:] = np.squeeze(np.array(temp_label_ts))
+        print("Finished estimating STC for {} out of {} ROIs".format(l+1,len(labels)))
+    
+    # Free up memory
+    del stc
+
+    # Prepare variables
+    sfreq=source_epochs[i].info["sfreq"]
+    n_epochs = len(source_epochs[i])
+    # Estimate the pairwise PEC for each epoch
+    PEC_con_subject = np.zeros((n_epochs,n_roi,n_roi,n_freq_bands))
+    stcs0 = np.zeros((n_epochs,n_roi,int(sfreq)*4)) # 4s epochs
+    # Make list of arguments to pass into PEC_analysis using the helper func
+    args = []
+    for i2 in range(n_epochs):
+        args.append((i2,label_ts[i2]))
+    
+    with concurrent.futures.ProcessPoolExecutor(max_workers=16) as executor:
+        for i2, PEC_result, stc_result in executor.map(PEC_analysis, args): # Function and arguments
+            PEC_con_subject[i2] = PEC_result
+            stcs0[i2] = stc_result
+    
+    # Save to list
+    PEC_data_list[i] = PEC_con_subject # [subject](epoch,ch,ch,freq)
+    STCs_list[i] = stcs0 # [subject][epoch,roi,timepoint]
+    
+    # Print progress
+    print("Finished {} out of {} subjects".format(i+1,n_subjects))
+
+# Get current time
+c_time2 = time.localtime()
+c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
+print("Started", c_time1, "\nFinished",c_time2)
+
+with open(Feature_savepath+"PEC_each_epoch_drop_interpol_ch_fix_snr.pkl", "wb") as file:
+    pickle.dump(PEC_data_list, file)
+with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "wb") as file:
+    pickle.dump(STCs_list, file)
+
+# # # Load
+# with open(Feature_savepath+"PEC_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file:
+#     PEC_data_list = pickle.load(file)
+
+# # Load
+# with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file:
+#     STCs_list = pickle.load(file)
+
+# Average over eye status
+eye_status = list(source_epochs[0].event_id.keys())
+n_eye_status = len(eye_status)
+pec_data = np.zeros((n_subjects,n_eye_status,n_roi,n_roi,n_freq_bands))
+for i in range(n_subjects):
+    # Get indices for eyes open and closed
+    EC_index = source_epochs[i].events[:,2] == 1
+    EO_index = source_epochs[i].events[:,2] == 2
+    # Average over the indices and save to array
+    pec_data[i,0] = np.mean(PEC_data_list[i][EC_index], axis=0)
+    pec_data[i,1] = np.mean(PEC_data_list[i][EO_index], axis=0)
+    # Only use the lower diagonal as the diagonal should be 0 (or very small due to numerical errors)
+    # And it is symmetric
+    for f in range(n_freq_bands):
+        pec_data[i,0,:,:,f] = np.tril(pec_data[i,0,:,:,f],k=-1)
+        pec_data[i,1,:,:,f] = np.tril(pec_data[i,1,:,:,f],k=-1)
+
+# Also save as dataframe format for feature selection
+# Convert to Pandas dataframe
+# The dimensions will each be a column with numbers and the last column will be the actual values
+arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, pec_data.shape), indexing="ij"))) + [pec_data.ravel()])
+pec_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "chx", "chy", "Freq_band", "Value"])
+# Change from numerical coding to actual values
+eye_status = list(source_epochs[0].event_id.keys())
+freq_bands_name = list(Freq_Bands.keys())
+label_names = [label.name for label in labels]
+
+index_values = [Subject_id,eye_status,label_names,label_names,freq_bands_name]
+for col in range(len(index_values)):
+    col_name = pec_data_df.columns[col]
+    for shape in range(pec_data.shape[col]): # notice not dataframe but the array
+        pec_data_df.loc[pec_data_df.iloc[:,col] == shape,col_name]\
+        = index_values[col][shape]
+
+# Add group status
+Group_status = np.array(["CTRL"]*len(pec_data_df["Subject_ID"]))
+Group_status[np.array([i in cases for i in pec_data_df["Subject_ID"]])] = "PTSD"
+# Add to dataframe
+pec_data_df.insert(3, "Group_status", Group_status)
+
+# Remove all diagonal and upper-matrix entries by removing zeros
+pec_data_df = pec_data_df.iloc[pec_data_df["Value"].to_numpy().nonzero()]
+
+# Save df
+pec_data_df.to_pickle(os.path.join(Feature_savepath,"pec_data_drop_interpol_ch_df.pkl"))
+
+# %% Sparse clustering of PEC for subtyping PTSD group
+# They did it for both eye status together, so all data in one matrix
+# Load PEC df
+# pec_data_df = pd.read_pickle(os.path.join(Feature_savepath,"pec_data_df.pkl"))
+pec_data_df = pd.read_pickle(os.path.join(Feature_savepath,"pec_data_drop_interpol_ch_df.pkl"))
+
+# Convert to wide format
+# Make function to add measurement column for indexing
+def add_measurement_column(df, measurement = "Text"):
+    dummy_variable = [measurement]*df.shape[0]
+    df.insert(1, "Measurement", dummy_variable)
+    return df
+# Make function to convert column tuple to string
+def convertTupleHeader(header):
+    header = list(header)
+    str = "_".join(header)
+    return str
+
+# Prepare overall dataframe
+PEC_df = pd.DataFrame(Subject_id, columns = ["Subject_ID"])
+# Add PEC
+pec_data_df = add_measurement_column(pec_data_df, "PEC")
+temp_df = pec_data_df.pivot_table(index="Subject_ID",columns=["Measurement",
+                                    "Eye_status", "chx", "chy",
+                                    "Freq_band"], dropna=True,
+                               values="Value").reset_index(drop=True)
+# check NaN is properly dropped and subject index is correct
+assert pec_data_df.shape[0] == np.prod(temp_df.shape)
+test1 = pec_data_df.iloc[np.random.randint(n_subjects),:]
+assert test1["Value"] ==\
+    temp_df[test1[1]][test1[2]][test1[3]][test1[5]][test1[6]][Subject_id.index(test1[0])]
+# Fix column names
+temp_df.columns = [convertTupleHeader(temp_df.columns[i]) for i in range(len(temp_df.columns))]
+
+PEC_df = pd.concat([PEC_df,temp_df], axis=1)
+
+# Add group status
+Groups = ["CTRL", "PTSD"]
+Group_status = np.array([0]*PEC_df.shape[0]) # CTRL = 0
+Group_status[np.array([i in cases for i in PEC_df["Subject_ID"]])] = 1 # PTSD = 1
+PEC_df.insert(1, "Group_status", Group_status)
+
+# Only use PTSD patient group
+PEC_df2 = PEC_df.loc[PEC_df["Group_status"]==1,:]
+
+Subject_info_cols = ["Subject_ID","Group_status"]
+
+# Use gridsearch and permutations to estimate gap statistic and use it to 
+# determine number of clusters and sparsity s
+# I will use 100 permutations and test 2 to 6 clusters as Zhang 2020
+# Error when trying to determine Gap statistic for 1 cluster? Also in R package
+max_clusters = 6
+n_sparsity_feat = 20
+perm_res = []
+for k in range(1,max_clusters):
+    # Cannot permute with 1 cluster
+    n_clusters = k+1
+    x = np.array(PEC_df2.copy().drop(Subject_info_cols, axis=1))
+    perm = pysparcl.cluster.permute_modified(x, k=n_clusters, verbose=True,
+                                             nvals=n_sparsity_feat, nperms=100)
+    perm_res.append(perm)
+
+# Save the results
+with open(Feature_savepath+"PEC_drop_interpol_ch_kmeans_perm.pkl", "wb") as file:
+    pickle.dump(perm_res, file)
+
+# # Load
+# with open(Feature_savepath+"PEC_drop_interpol_ch_kmeans_perm.pkl", "rb") as file:
+#     perm_res = pickle.load(file)
+
+# Convert results to array
+perm_res_arr = np.zeros((len(perm_res)*n_sparsity_feat,4))
+for i in range(len(perm_res)):
+    _, gaps, sdgaps, wbounds, _ = perm_res[i].values()
+    for i2 in range(n_sparsity_feat):
+        perm_res_arr[20*i+i2,0] = i+2 # cluster size
+        perm_res_arr[20*i+i2,1] = gaps[i2] # gap statistic
+        perm_res_arr[20*i+i2,2] = sdgaps[i2] # gap statistic std
+        perm_res_arr[20*i+i2,3] = wbounds[i2] # sparsity feature s
+
+# For each sparsity s, determine best k using one-standard-error criterion
+# Meaning the cluster and sparsity is chosen for the smallest value of k for a fixed s
+# that fulfill Gap(k) >= Gap(k+1)-std(k+1)
+def one_standard_deviation_search(gaps, std):
+    best_gaps = np.argmax(gaps)
+    current_gaps = gaps[best_gaps]
+    current_std = std[best_gaps]
+    current_gaps_idx = best_gaps
+    while (gaps[current_gaps_idx-1] >= current_gaps - current_std):
+        if current_gaps_idx == 0:
+            break
+        else:
+            current_gaps_idx -= 1
+            current_gaps = gaps[current_gaps_idx]
+            current_std = std[current_gaps_idx]
+    out = current_gaps, current_std, current_gaps_idx
+    return out
+
+best_ks = np.zeros((n_sparsity_feat, 2))
+all_s = np.unique(perm_res_arr[:,3])
+plt.figure(figsize=(12,12))
+for i2 in range(n_sparsity_feat):
+    current_s = all_s[i2]
+    gaps = perm_res_arr[perm_res_arr[:,3] == current_s,1]
+    std = perm_res_arr[perm_res_arr[:,3] == current_s,2]
+    _, _, idx = one_standard_deviation_search(gaps, std)
+    # Save to array
+    best_ks[i2,0] = current_s
+    best_ks[i2,1] = perm_res_arr[perm_res_arr[:,3] == current_s,0][idx]
+    # Plot gap
+    plt.errorbar(perm_res_arr[perm_res_arr[:,3] == current_s,0].astype("int"),
+             gaps, yerr=std, capsize=5, label = np.round(current_s,3))
+plt.title("Gap statistic for different fixed s")
+plt.legend(loc=1)
+plt.xlabel("Number of clusters")
+plt.ylabel("Gap statistic")
+
+best_k = int(scipy.stats.mode(best_ks[:,1])[0])
+
+# Determine s using fixed k as lowest s within 1 std of max gap statistic
+# According to Witten & Tibshirani, 2010
+best_gaps_idx = np.argmax(perm_res_arr[perm_res_arr[:,0] == best_k,1])
+best_gaps = perm_res_arr[perm_res_arr[:,0] == best_k,1][best_gaps_idx]
+best_gaps_std = perm_res_arr[perm_res_arr[:,0] == best_k,2][best_gaps_idx]
+one_std_crit = perm_res_arr[perm_res_arr[:,0] == best_k,1]>=best_gaps-best_gaps_std
+
+best_s = np.array([perm_res_arr[perm_res_arr[:,0] == best_k,3][one_std_crit][0]])
+
+# Perform clustering with k clusters
+x = np.array(PEC_df2.copy().drop(Subject_info_cols, axis=1))
+sparcl = pysparcl.cluster.kmeans(x, k=best_k, wbounds=best_s)[0]
+
+# Save the results
+with open(Feature_savepath+"PEC_drop_interpol_ch_sparse_kmeans.pkl", "wb") as file:
+    pickle.dump(sparcl, file)
+
+# Get overview of the features chosen and summarize feature type with countplot
+nonzero_idx = sparcl["ws"].nonzero()
+sparcl_features = PEC_df2.copy().drop(Subject_info_cols, axis=1).columns[nonzero_idx]
+
+# Prepare variables
+Freq_Bands = {"delta": [1.25, 4.0],
+              "theta": [4.0, 8.0],
+              "alpha": [8.0, 13.0],
+              "beta": [13.0, 30.0],
+              "gamma": [30.0, 49.0]}
+n_freq_bands = len(Freq_Bands)
+eye_status = list(source_epochs[0].event_id.keys())
+n_eye_status = len(eye_status)
+
+sparcl_feat = []
+sparcl_feat_counts = []
+for e in range(n_eye_status):
+    ee = eye_status[e]
+    for f in range(n_freq_bands):
+        ff = list(Freq_Bands.keys())[f]
+        temp_feat = sparcl_features[sparcl_features.str.contains(("_"+ee))]
+        temp_feat = temp_feat[temp_feat.str.contains(("_"+ff))]
+        # Save to list
+        sparcl_feat.append(temp_feat)
+        sparcl_feat_counts.append(["{}_{}".format(ee,ff), len(temp_feat)])
+
+# Convert the list to dataframe to use in countplot
+sparcl_feat_counts_df = pd.DataFrame(columns=["Eye_status", "Freq_band"])
+for i in range(len(sparcl_feat_counts)):
+    # If this feature type does not exist, then skip it
+    if sparcl_feat_counts[i][1] == 0:
+        continue
+    ee, ff = sparcl_feat_counts[i][0].split("_")
+    counts = sparcl_feat_counts[i][1]
+    temp_df = pd.DataFrame({"Eye_status":np.repeat(ee,counts),
+                            "Freq_band":np.repeat(ff,counts)})
+    sparcl_feat_counts_df = sparcl_feat_counts_df.append(temp_df, ignore_index=True)
+
+# Fix Freq_band categorical order
+cat_type = pd.CategoricalDtype(categories=list(Freq_Bands.keys()), ordered=True)
+sparcl_feat_counts_df["Freq_band"] = sparcl_feat_counts_df["Freq_band"].astype(cat_type)
+
+plt.figure(figsize=(8,8))
+g = sns.countplot(y="Freq_band", hue="Eye_status", data=sparcl_feat_counts_df)
+plt.title("PEC Sparse K-means features")
+plt.xlabel("Number of non-zero weights")
+plt.ylabel("Frequency Band")
+
+# %% Functional connectivity in source space
+# MNE implementation of PLV and wPLI is phase across trials(epochs), e.g. for ERPs
+# I'll use my own manually implemented PLV and wPLI across time and then average across epochs
+# Notice that the new MNE-connectivity library now also takes phase across time
+
+sfreq = final_epochs[0].info["sfreq"]
+# error when using less than 5 cycles for spectrum estimation
+# 1Hz too low with epoch length of 4, thus I changed the fmin to 1.25 for delta
+Freq_Bands = {"delta": [1.25, 4.0],
+              "theta": [4.0, 8.0],
+              "alpha": [8.0, 13.0],
+              "beta": [13.0, 30.0],
+              "gamma": [30.0, 49.0]}
+n_freq_bands = len(Freq_Bands)
+freq_centers = np.array([2.5,6,10.5,21.5,40])
+# Convert to tuples for the mne function
+fmin=tuple([list(Freq_Bands.values())[f][0] for f in range(len(Freq_Bands))])
+fmax=tuple([list(Freq_Bands.values())[f][1] for f in range(len(Freq_Bands))])
+
+# Make linspace array for morlet waves
+freq_centers = np.arange(fmin[0],fmax[-1]+0.25,0.25)
+# Prepare Morlets
+morlets = mne.time_frequency.tfr.morlet(sfreq,freq_centers,n_cycles=3)
+
+# Make freqs array for indexing
+freqs0 = [0]*n_freq_bands
+for f in range(n_freq_bands):
+    freqs0[f] = freq_centers[(freq_centers>=fmin[f]) & (freq_centers<=fmax[f])]
+
+# The in-built connectivity function gives an (n_channel, n_channel, freqs output
+# For loop over subject ID and eye status is implemented
+n_subjects = len(Subject_id)
+eye_status = list(final_epochs[0].event_id.keys())
+n_eye_status = len(eye_status)
+ch_names = final_epochs[0].info["ch_names"]
+
+# Load source labels
+with open("custom_aparc2009_Li_et_al_2022.pkl", "rb") as file:
+    labels = pickle.load(file)
+label_names = [label.name for label in labels]
+n_sources = len(label_names)
+
+# Connectivity methods
+connectivity_methods = ["coh","imcoh","plv","wpli"]
+n_con_methods=len(connectivity_methods)
+
+# Number of pairwise ch connections
+n_ch_connections = scipy.special.comb(n_sources,2, exact=True, repetition=False)
+
+# Load source time series
+with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file:
+    STCs_list = pickle.load(file)
+
+# I made my own slightly-optimized PLV & WPLI function
+# Version 2 based on Filter + Hilbert instead of Morlets
+def calculate_PLV_WPLI_across_time(data):
+    n_ch, n_time0 = data.shape
+    x = data.copy()
+    # Filter the signal in the different freq bands
+    con_array0 = np.zeros((2,n_ch,n_ch,n_freq_bands))
+    # con_array0[con_array0==0] = np.nan
+    for fname, frange in Freq_Bands.items():
+        fmin, fmax = [float(interval) for interval in frange]
+        signal_filtered = mne.filter.filter_data(x, sfreq, fmin, fmax,
+                                          fir_design="firwin", verbose=0)
+        # Filtering on finite signals will yield very low values for first
+        # and last timepoint, which can create outliers. E.g. 1e-29 compared to 1e-14
+        # This systematic error is removed by removing the first and last timepoint
+        signal_filtered = signal_filtered[:,1:-1]
+        # Hilbert transform to get complex signal
+        analytic_signal = scipy.signal.hilbert(signal_filtered)
+        # Calculate for the lower diagnonal only as it is symmetric
+        for ch_r in range(n_ch):
+            for ch_c in range(n_ch):
+                if ch_r>ch_c:
+                    # =========================================================================
+                    # PLV over time correspond to mean across time of the absolute value of
+                    # the circular length of the relative phases. So PLV will be 1 if
+                    # the phases of 2 signals maintain a constant lag
+                    # In equational form: PLV = 1/N * |sum(e^i(phase1-phase2))|
+                    # In code: abs(mean(exp(1i*phase_diff)))
+                    # =========================================================================
+                    # The real part correspond to the amplitude and the imaginary part can be used to calculate the phase
+                    phase_diff = np.angle(analytic_signal[ch_r])-np.angle(analytic_signal[ch_c])
+                    # Convert phase difference to complex part i(phase1-phase2)
+                    phase_diff_im = 0*phase_diff+1j*phase_diff
+                    # Take the exponential, then the mean followed by absolute value
+                    PLV = np.abs(np.mean(np.exp(phase_diff_im)))
+                    # Save to array
+                    con_array0[0,ch_r,ch_c,list(Freq_Bands.keys()).index(fname)] = PLV
+                    # =========================================================================
+                    # PLI over time correspond to the sign of the sine of relative phase
+                    # differences. So PLI will be 1 if one signal is always leading or
+                    # lagging behind the other signal. But it is insensitive to changes in
+                    # relative phase, as long as it is the same signal that leads.
+                    # If 2 signals are almost in phase, they might shift between lead/lag
+                    # due to small fluctuations from noise. This would lead to unstable
+                    # estimation of "phase" synchronisation.
+                    # The wPLI tries to correct for this by weighting the PLI with the
+                    # magnitude of the lag, to attenuate noise sources giving rise to
+                    # near zero phase lag "synchronization"
+                    # In equational form: WPLI = |E{|phase_diff|*sign(phase_diff)}| / E{|phase_diff|}
+                    # =========================================================================
+                    # Calculate the magnitude of phase differences
+                    phase_diff_mag = np.abs(np.sin(phase_diff))
+                    # Calculate the signed phase difference (PLI)
+                    sign_phase_diff = np.sign(np.sin(phase_diff))
+                    # Calculate the nominator (abs and average across time)
+                    WPLI_nominator = np.abs(np.mean(phase_diff_mag*sign_phase_diff))
+                    # Calculate denominator for normalization
+                    WPLI_denom = np.mean(phase_diff_mag)
+                    # Calculate WPLI
+                    WPLI = WPLI_nominator/WPLI_denom
+                    # Save to array
+                    con_array0[1,ch_r,ch_c,list(Freq_Bands.keys()).index(fname)] = WPLI
+    return con_array0
+
+# Pre-allocatate memory
+con_data = np.zeros((n_con_methods,n_subjects,n_eye_status,n_sources,n_sources,n_freq_bands))
+n_epochs_matrix = np.zeros((n_subjects,n_eye_status))
+
+# Get current time
+c_time = time.localtime()
+c_time = time.strftime("%H:%M:%S", c_time)
+print(c_time)
+
+def connectivity_estimation(i):
+    con_data0 = np.zeros((n_con_methods,n_eye_status,n_sources,n_sources,n_freq_bands))
+    con_data0[con_data0==0] = np.nan
+    n_epochs_matrix0 = np.zeros((n_eye_status))
+    for e in range(n_eye_status):
+        ee = eye_status[e]
+        eye_idx = final_epochs[i].events[:,2] == e+1 # EC = 1 and EO = 2
+        # Get source time series
+        temp_STC = STCs_list[i][eye_idx]
+        # Calculate the coherence and ImgCoh for the given subject and eye status
+        con, freqs, times, n_epochs, n_tapers = spectral_connectivity(
+            temp_STC, method=connectivity_methods[0:2],
+            mode="multitaper", sfreq=sfreq, fmin=fmin, fmax=fmax,
+            faverage=True, verbose=0)
+        # Save the results in array
+        con_data0[0,e,:,:,:] = con[0] # coherence
+        con_data0[1,e,:,:,:] = np.abs(con[1]) # Absolute value of ImgCoh to reflect magnitude of ImgCoh
+        
+        # Calculate PLV and wPLI for each epoch and then average
+        n_epochs0 = temp_STC.shape[0]
+        con_data1 = np.zeros((len(connectivity_methods[2:]),n_epochs0,n_sources,n_sources,n_freq_bands))
+        for epoch in range(n_epochs0):
+            # First the data is retrieved and epoch axis dropped
+            temp_data = temp_STC[epoch,:,:]
+            # PLV and WPLI value is calculated across timepoints in each freq band
+            PLV_WPLI_con = calculate_PLV_WPLI_across_time(temp_data)
+            # Save results
+            con_data1[0,epoch,:,:,:] = PLV_WPLI_con[0] # phase locking value
+            con_data1[1,epoch,:,:,:] = PLV_WPLI_con[1] # weighted phase lag index
+        # Take average across epochs for PLV and wPLI
+        con_data2 = np.mean(con_data1,axis=1)
+        # Save to final array
+        con_data0[2,e,:,:,:] = con_data2[0] # phase locking value
+        con_data0[3,e,:,:,:] = con_data2[1] # weighted phase lag index
+        n_epochs_matrix0[e] = n_epochs
+    
+    print("{} out of {} finished".format(i+1,n_subjects))
+    return i, con_data0, n_epochs_matrix0
+
+with concurrent.futures.ProcessPoolExecutor(max_workers=16) as executor:
+    for i, con_result, n_epochs_mat in executor.map(connectivity_estimation, range(n_subjects)): # Function and arguments
+        con_data[:,i,:,:,:,:] = con_result
+        n_epochs_matrix[i] = n_epochs_mat
+
+# Get current time
+c_time = time.localtime()
+c_time = time.strftime("%H:%M:%S", c_time)
+print(c_time)
+
+# Save the results
+np.save(Feature_savepath+"Source_drop_interpol_ch_connectivity_measures_data.npy", con_data) # (con_measure,subject,eye,ch,ch,freq)
+
+# Also save as dataframe format for feature selection
+# Convert to Pandas dataframe
+# The dimensions will each be a column with numbers and the last column will be the actual values
+arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, con_data.shape), indexing="ij"))) + [con_data.ravel()])
+con_data_df = pd.DataFrame(arr, columns = ["Con_measurement", "Subject_ID", "Eye_status", "chx", "chy", "Freq_band", "Value"])
+# Change from numerical coding to actual values
+eye_status = list(final_epochs[0].event_id.keys())
+freq_bands_name = list(Freq_Bands.keys())
+
+index_values = [connectivity_methods,Subject_id,eye_status,label_names,label_names,freq_bands_name]
+for col in range(len(index_values)):
+    col_name = con_data_df.columns[col]
+    for shape in range(con_data.shape[col]): # notice not dataframe but the array
+        con_data_df.loc[con_data_df.iloc[:,col] == shape,col_name]\
+        = index_values[col][shape]
+
+# Add group status
+Group_status = np.array(["CTRL"]*len(con_data_df["Subject_ID"]))
+Group_status[np.array([i in cases for i in con_data_df["Subject_ID"]])] = "PTSD"
+# Add to dataframe
+con_data_df.insert(3, "Group_status", Group_status)
+
+# Remove all diagonal and upper-matrix entries
+con_data_df = con_data_df.iloc[con_data_df["Value"].to_numpy().nonzero()]
+
+# Save df
+con_data_df.to_pickle(os.path.join(Feature_savepath,"con_data_source_drop_interpol_df.pkl"))
+
+# %% Estimate Granger's Causality in source space
+# Load source labels
+with open("custom_aparc2009_Li_et_al_2022.pkl", "rb") as file:
+    labels = pickle.load(file)
+label_names = [label.name for label in labels]
+n_sources = len(label_names)
+
+# Load source time series
+with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file:
+    STCs_list = pickle.load(file)
+
+# Granger's causality might be influenced by volume conduction, thus working with CSD might be beneficial
+# But since I already used source modelling to alleviate this problem I will not apply CSD
+# Barrett et al, 2012 also do not apply CSD on source GC
+
+# GC assumes stationarity, thus I will test for stationarity using ADF test
+# The null hypothesis of ADF is that it has unit root, i.e. is non-stationary
+# I will test how many can reject the null hypothesis, i.e. are stationary
+
+# Due to the low numerical values in STC the ADF test is unstable, thus I multiply it to be around 1e0
+
+stationary_test_arr = [0]*n_subjects
+n_tests = [0]*n_subjects
+for i in range(n_subjects):
+    # Get data
+    data_arr = STCs_list[i]
+    # Get shape
+    n_epochs, n_channels, n_timepoints = data_arr.shape
+    # Create array for indices to print out progress
+    ep_progress_idx = np.arange(n_epochs//5,n_epochs,n_epochs//5)
+    # Calculate number of tests performed for each subject
+    n_tests[i] = n_epochs*n_channels
+    # Prepare empty array (with 2's as 0 and 1 will be used)
+    stationary_test_arr0 = np.zeros((n_epochs,n_channels))+2 # make array of 2's
+    for ep in range(n_epochs):
+        for c in range(n_channels):
+            ADF = adfuller(data_arr[ep,c,:]*1e14) # multilying with a constant does not change ADF, but helps against numerical instability
+            p_value = ADF[1]
+            if p_value < 0.05:
+                stationary_test_arr0[ep,c] = True # Stationary set to 1
+            else:
+                stationary_test_arr0[ep,c] = False # Non-stationary set to 0
+        # Print partial progress
+        if len(np.where(ep_progress_idx==ep)[0]) > 0:
+            print("Finished epoch number: {} out of {}".format(ep,n_epochs))
+    # Indices that were not tested
+    no_test_idx = np.where(stationary_test_arr0==2)[0]
+    if len(no_test_idx) > 0:
+        print("An unexpected error occurred and {} was not tested".format(no_test_idx))
+    # Save to list
+    stationary_test_arr[i] = stationary_test_arr0
+    # Print progress
+    print("Finished subject {} out of {}".format(i+1,n_subjects))
+
+with open(Stat_savepath+"Source_drop_interpol_GC_stationarity_tests.pkl", "wb") as filehandle:
+    # The data is stored as binary data stream
+    pickle.dump(stationary_test_arr, filehandle)
+
+# I used a threshold of 0.05
+# This means that on average I would expect 5% false positives among the tests that showed significance for stationarity
+ratio_stationary = [0]*n_subjects
+for i in range(n_subjects):
+    # Ratio of tests that showed stationarity
+    ratio_stationary[i] = np.sum(stationary_test_arr[i])/n_tests[i]
+
+print("Ratio of stationary time series: {0:.3f}".format(np.mean(ratio_stationary))) # 88%
+
+# The pre-processing have already ensured that most of the data fulfills the stationarity assumption.
+
+# Divide the data into eyes closed and open
+ch_names = label_names
+n_channels = len(ch_names)
+
+STC_eye_data = []
+for i in range(n_subjects):
+    # Get index for eyes open and eyes closed
+    EC_index = final_epochs[i].events[:,2] == 1
+    EO_index = final_epochs[i].events[:,2] == 2
+    # Get the data
+    EC_epoch_data = STCs_list[i][EC_index,:,:] # eye index
+    EO_epoch_data = STCs_list[i][EO_index,:,:]
+    # Save to list
+    STC_eye_data.append([EC_epoch_data, EO_epoch_data])
+
+# Make each epoch a TimeSeries object
+# Input for TimeSeries is: (ch, time)
+eye_status = list(final_epochs[0].event_id.keys())
+n_eye_status = len(eye_status)
+sfreq = final_epochs[0].info["sfreq"]
+
+Timeseries_data = []
+for i in range(n_subjects):
+    temp_list1 = []
+    for e in range(n_eye_status):
+        temp_list2 = []
+        n_epochs = STC_eye_data[i][e].shape[0]
+        for ep in range(n_epochs):
+            # Convert to TimeSeries
+            time_series = nts.TimeSeries(STC_eye_data[i][e][ep,:,:], sampling_rate=sfreq)
+            # Save the object
+            temp_list2.append(time_series)
+        # Save the timeseries across eye status
+        temp_list1.append(temp_list2)
+    # Save the timeseries across subjects
+    Timeseries_data.append(temp_list1) # output [subject][eye][epoch](ch,time)
+
+# Test multiple specified model orders of AR models, each combination has its own model
+m_orders = np.linspace(1,25,25) # the model orders tested
+m_orders = np.round(m_orders)
+n_timepoints = len(Timeseries_data[0][0][0])
+n_ch_combinations = scipy.special.comb(n_channels,2, exact=True, repetition=False)
+
+# To reduce computation time I only test representative epochs (1 from each 1 min session)
+# There will be 5 epochs from eyes closed and 5 from eyes open
+n_rep_epoch = 5
+# The subjects have different number of epochs due to dropped epochs
+gaps_trials_idx = np.load("Gaps_trials_idx.npy") # time_points between sessions
+# I convert the gap time points to epoch number used as representative epoch
+epoch_idx = np.zeros((n_subjects,n_eye_status,n_rep_epoch), dtype=int) # prepare array
+epoch_idx[:,:,0:4] = np.round(gaps_trials_idx/n_timepoints,0)-8 # take random epoch from sessions 1 to 4
+epoch_idx[:,:,4] = np.round(gaps_trials_idx[:,:,3]/n_timepoints,0)+5 # take random epoch from session 5
+
+# Checking if all epoch idx exists
+for i in range(n_subjects):
+    EC_index = final_epochs[i].events[:,2] == 1
+    EO_index = final_epochs[i].events[:,2] == 2
+    assert np.sum(EC_index) >= epoch_idx[i,0,4]
+    assert np.sum(EO_index) >= epoch_idx[i,1,4]
+
+# Prepare model order estimation
+AIC_arr = np.zeros((len(m_orders),n_subjects,n_eye_status,n_rep_epoch,n_ch_combinations))
+BIC_arr = np.zeros((len(m_orders),n_subjects,n_eye_status,n_rep_epoch,n_ch_combinations))
+
+def GC_model_order_est(i):
+    AIC_arr0 = np.zeros((len(m_orders),n_eye_status,n_rep_epoch,n_ch_combinations))
+    BIC_arr0 = np.zeros((len(m_orders),n_eye_status,n_rep_epoch,n_ch_combinations))
+    for e in range(n_eye_status):
+        n_epochs = STC_eye_data[i][e].shape[0]
+        N_total = n_timepoints*n_epochs # total number of datapoints for specific eye condition
+        for ep in range(n_rep_epoch):
+            epp = epoch_idx[i,e,ep]
+            for o in range(len(m_orders)):
+                order = int(m_orders[o])
+                # Make the Granger Causality object
+                GCA1 = nta.GrangerAnalyzer(Timeseries_data[i][e][epp-1], order=order,
+                                           n_freqs=2000)
+                for c in range(n_ch_combinations):
+                    # Retrieve error covariance matrix for all combinations
+                    ecov = np.array(list(GCA1.error_cov.values()))
+                    # Calculate AIC
+                    AIC = ntsu.akaike_information_criterion(ecov[c,:,:], p = n_channels,
+                                                            m=order, Ntotal=N_total)
+                    # Calculate BIC
+                    BIC = ntsu.bayesian_information_criterion(ecov[c,:,:], p = n_channels,
+                                                              m=order, Ntotal=N_total)
+                    # Save the information criterions
+                    AIC_arr0[o,e,ep,c] = AIC
+                    BIC_arr0[o,e,ep,c] = BIC
+
+    print("{} out of {} finished testing".format(i+1,n_subjects))
+    return i, AIC_arr0, BIC_arr0
+
+# Get current time
+c_time1 = time.localtime()
+c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
+print(c_time1)
+
+with concurrent.futures.ProcessPoolExecutor() as executor:
+    for i, AIC_result, BIC_result in executor.map(GC_model_order_est, range(n_subjects)): # Function and arguments
+        AIC_arr[:,i] = AIC_result
+        BIC_arr[:,i] = BIC_result
+
+# Get current time
+c_time2 = time.localtime()
+c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
+print("Started", c_time1, "\nCurrent Time",c_time2)
+
+# Save the AIC and BIC results
+np.save(Feature_savepath+"AIC_Source_drop_interpol_GC_model_order.npy", AIC_arr) # (m. order, subject, eye, epoch, combination)
+np.save(Feature_savepath+"BIC_Source_drop_interpol_GC_model_order.npy", BIC_arr) # (m. order, subject, eye, epoch, combination)
+
+# Load data
+AIC_arr = np.load(Feature_savepath+"AIC_Source_drop_interpol_GC_model_order.npy")
+BIC_arr = np.load(Feature_savepath+"BIC_Source_drop_interpol_GC_model_order.npy")
+
+# Average across all subjects, eye status, epochs and combinations
+plt.figure(figsize=(8,6))
+plt.plot(m_orders, np.nanmean(AIC_arr, axis=(1,2,3,4)), label="AIC")
+plt.plot(m_orders, np.nanmean(BIC_arr, axis=(1,2,3,4)), label="BIC")
+plt.title("Average information criteria value")
+plt.xlabel("Model order (Lag)")
+plt.legend()
+
+np.sum(np.isnan(AIC_arr))/AIC_arr.size # around 0.07% NaN due to non-convergence
+np.sum(np.isnan(BIC_arr))/BIC_arr.size # around 0.07% NaN due to non-convergence
+
+# If we look at each subject
+mean_subject_AIC = np.nanmean(AIC_arr, axis=(2,3,4))
+
+plt.figure(figsize=(8,6))
+for i in range(n_subjects):
+    plt.plot(m_orders, mean_subject_AIC[:,i])
+plt.title("Average AIC for each subject")
+plt.xlabel("Model order (Lag)")
+
+mean_subject_BIC = np.nanmean(BIC_arr, axis=(2,3,4))
+plt.figure(figsize=(8,6))
+for i in range(n_subjects):
+    plt.plot(m_orders, mean_subject_BIC[:,i])
+plt.title("Average BIC for each subject")
+plt.xlabel("Model order (Lag)")
+
+# We see that for many cases in BIC, it does not converge. Monotonic increasing!
+
+# We can look at the distribution of chosen order for each time series analyzed
+# I.e. I will find the minima in model order for each model
+AIC_min_arr = np.argmin(AIC_arr, axis=0)
+BIC_min_arr = np.argmin(BIC_arr, axis=0)
+
+# Plot the distributions of the model order chosen
+plt.figure(figsize=(8,6))
+sns.distplot(AIC_min_arr.reshape(-1)+1, kde=False, norm_hist=True,
+             bins=np.linspace(0.75,30.25,60), label="AIC")
+plt.ylabel("Frequency density")
+plt.xlabel("Model order")
+plt.title("AIC Model Order Estimation")
+
+plt.figure(figsize=(8,6))
+sns.distplot(BIC_min_arr.reshape(-1)+1, kde=False, norm_hist=True, color="blue",
+             bins=np.linspace(0.75,30.25,60), label="BIC")
+plt.ylabel("Frequency density")
+plt.xlabel("Model order")
+plt.title("BIC Model Order Estimation")
+# It is clear from the BIC model that most have model order 1
+# which reflect their monotonic increasing nature without convergence
+# Thus I will only use AIC
+
+# There is a bias variance trade-off with model order [Stokes & Purdon, 2017]
+# Lower order is associated with higher bias and higher order with variance
+# I will choose the model order that is chosen the most (i.e. majority voting)
+AR_order = int(np.nanquantile(AIC_min_arr.reshape(-1), q=0.5))
+# Order = 5
+
+# Calculate Granger Causality for each subject, eye and epoch
+Freq_Bands = {"delta": [1.25, 4.0],
+              "theta": [4.0, 8.0],
+              "alpha": [8.0, 13.0],
+              "beta": [13.0, 30.0],
+              "gamma": [30.0, 49.0]}
+n_freq_bands = len(Freq_Bands)
+
+# Pre-allocate memory
+GC_data = np.zeros((2,n_subjects,n_eye_status,n_channels,n_channels,n_freq_bands))
+
+def GC_analysis(i):
+    GC_data0 = np.zeros((2,n_eye_status,n_channels,n_channels,n_freq_bands))
+    for e in range(n_eye_status):
+        n_epochs = STC_eye_data[i][e].shape[0]
+        # Make temporary array to save GC for each epoch
+        temp_GC_data = np.zeros((2,n_epochs,n_channels,n_channels,n_freq_bands))
+        for ep in range(n_epochs):
+            # Fit the AR model
+            GCA = nta.GrangerAnalyzer(Timeseries_data[i][e][ep], order=AR_order,
+                                       n_freqs=int(800)) # n_Freq=800 correspond to step of 0.25Hz, the same as multitaper for power estimation
+            for f in range(n_freq_bands):
+                # Define lower and upper band
+                f_lb = list(Freq_Bands.values())[f][0]
+                f_ub = list(Freq_Bands.values())[f][1]
+                # Get index corresponding to the frequency bands of interest
+                freq_idx_G = np.where((GCA.frequencies >= f_lb) * (GCA.frequencies < f_ub))[0]
+                # Calculcate Granger causality quantities
+                g_xy = np.mean(GCA.causality_xy[:, :, freq_idx_G], -1) # avg on last dimension
+                g_yx = np.mean(GCA.causality_yx[:, :, freq_idx_G], -1) # avg on last dimension
+                # Transpose to use same format as con_measurement and save
+                temp_GC_data[0,ep,:,:,f] = np.transpose(g_xy)
+                temp_GC_data[1,ep,:,:,f] = np.transpose(g_yx)
+        
+        # Average over epochs for each person, eye condition, direction and frequency band
+        temp_GC_epoch_mean = np.nanmean(temp_GC_data, axis=1) # sometimes Log(Sxx/xx_auto_component) is nan
+        # Save to array
+        GC_data0[:,e,:,:,:] = temp_GC_epoch_mean
+        
+    print("{} out of {} finished analyzing".format(i+1,n_subjects))
+    return i, GC_data0
+
+# Get current time
+c_time1 = time.localtime()
+c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
+print(c_time1)
+
+with concurrent.futures.ProcessPoolExecutor() as executor:
+    for i, GC_result in executor.map(GC_analysis, range(n_subjects)): # Function and arguments
+        GC_data[:,i] = GC_result
+        
+# Get current time
+c_time2 = time.localtime()
+c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
+print("Started", c_time1, "\nCurrent Time",c_time2)
+
+# Output: GC_data (g_xy/g_yx, subject, eye, chx, chy, freq)
+# Notice that for g_xy ([0,...]) it means "chy" Granger causes "chx"
+# and for g_yx ([1,...]) it means "chx" Granger causes "chy"
+# This is due to the transposing which flipped the results on to the lower-part of the diagonal
+
+# Save the Granger_Causality data
+np.save(Feature_savepath+"Source_drop_interpol_GrangerCausality_data.npy", GC_data)
+
+# Theoretically negative GC values should be impossible, but in practice
+# they can still occur due to problems with model fitting (see Stokes & Purdon, 2017)
+print("{:.3f}% negative GC values".\
+      format(np.sum(GC_data[~np.isnan(GC_data)]<0)/np.sum(~np.isnan(GC_data))*100)) # 0.08% negative values
+# These values cannot be interpreted, but seems to occur mostly for true non-causal connections
+# Hence I set them to 0
+with np.errstate(invalid="ignore"): # invalid number refers to np.nan, which will be set to False for comparisons
+    GC_data[(GC_data<0)] = 0
+
+# Save as dataframe for further processing with other features
+# Convert to Pandas dataframe
+# The dimensions will each be a column with numbers and the last column will be the actual values
+arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, GC_data.shape), indexing="ij"))) + [GC_data.ravel()])
+GC_data_df = pd.DataFrame(arr, columns = ["GC_direction", "Subject_ID", "Eye_status", "chx", "chy", "Freq_band", "Value"])
+# Change from numerical coding to actual values
+eye_status = list(final_epochs[0].event_id.keys())
+freq_bands_name = list(Freq_Bands.keys())
+GC_directions_info = ["chy -> chx", "chx -> chy"]
+
+index_values = [GC_directions_info,Subject_id,eye_status,ch_names,ch_names,freq_bands_name]
+for col in range(len(index_values)):
+    col_name = GC_data_df.columns[col]
+    for shape in range(GC_data.shape[col]): # notice not dataframe but the array
+        GC_data_df.loc[GC_data_df.iloc[:,col] == shape,col_name]\
+        = index_values[col][shape]
+
+# Add group status
+Group_status = np.array(["CTRL"]*len(GC_data_df["Subject_ID"]))
+Group_status[np.array([i in cases for i in GC_data_df["Subject_ID"]])] = "PTSD"
+# Add to dataframe
+GC_data_df.insert(3, "Group_status", Group_status)
+
+# Remove all nan (including diagonal and upper-matrix entries)
+GC_data_df = GC_data_df.iloc[np.invert(np.isnan(GC_data_df["Value"].to_numpy()))]
+
+# Swap ch values for GC_direction chy -> chx (so it is always chx -> chy)
+tempchy = GC_data_df[GC_data_df["GC_direction"] == "chy -> chx"]["chy"] # save chy
+GC_data_df.loc[GC_data_df["GC_direction"] == "chy -> chx","chy"] =\
+             GC_data_df.loc[GC_data_df["GC_direction"] == "chy -> chx","chx"] # overwrite old chy
+GC_data_df.loc[GC_data_df["GC_direction"] == "chy -> chx","chx"] = tempchy # overwrite chx
+
+# Drop the GC_direction column
+GC_data_df = GC_data_df.drop("GC_direction", axis=1)
+
+# Save df
+GC_data_df.to_pickle(os.path.join(Feature_savepath,"GC_data_source_drop_interpol_df.pkl"))
+
+# Testing if df was formatted correctly
+expected_GC_values = n_subjects*n_eye_status*n_ch_combinations*n_freq_bands*2 # 2 because it is bidirectional
+assert GC_data_df.shape[0] == expected_GC_values
+# Testing a random GC value
+random_connection = np.random.randint(0,GC_data_df.shape[0])
+test_connection = GC_data_df.iloc[random_connection,:]
+i = np.where(Subject_id==test_connection["Subject_ID"])[0]
+e = np.where(np.array(eye_status)==test_connection["Eye_status"])[0]
+chx = np.where(np.array(ch_names)==test_connection["chx"])[0]
+chy = np.where(np.array(ch_names)==test_connection["chy"])[0]
+f = np.where(np.array(freq_bands_name)==test_connection["Freq_band"])[0]
+value = test_connection["Value"]
+if chx < chy: # the GC array is only lower diagonal to save memory
+    assert GC_data[0,i,e,chy,chx,f] == value
+if chx > chy:
+    assert GC_data[1,i,e,chx,chy,f] == value