diff --git a/FeatureEstimation.py b/FeatureEstimation.py
index 612ce0d02737451720cc3cc12515e505a3817c94..10e0e55f36a396d6b22aaba3ea9877c519bd4bde 100644
--- a/FeatureEstimation.py
+++ b/FeatureEstimation.py
@@ -255,16 +255,27 @@ frontal_theta_mean_subject = power_df_sub1[power_df_sub1["Freq_band"] == "theta"
 # Calculate average frontal power beta
 frontal_beta_mean_subject = power_df_sub1[power_df_sub1["Freq_band"] == "beta"].\
     groupby(["Subject_ID","Group_status","Eye_status"]).mean().reset_index()
+# Extract all values
+frontal_beta_subject_values = power_df_sub1[power_df_sub1["Freq_band"] == "beta"]
 
 # Calculate average frontal, midline power theta
 frontal_midline_theta_mean_subject = power_df_sub2[power_df_sub2["Freq_band"] == "theta"].\
     groupby(["Subject_ID","Group_status","Eye_status"]).mean().reset_index()
+# Extract all values
+frontal_midline_theta_subject_values = power_df_sub2[power_df_sub2["Freq_band"] == "theta"]
 
 
 # Convert from dB to raw power
 frontal_theta_mean_subject["PSD"] = 10**(frontal_theta_mean_subject["PSD"]/10)
 frontal_beta_mean_subject["PSD"] = 10**(frontal_beta_mean_subject["PSD"]/10)
 frontal_midline_theta_mean_subject["PSD"] = 10**(frontal_midline_theta_mean_subject["PSD"]/10)
+frontal_beta_subject_values["PSD"] = 10**(frontal_beta_subject_values["PSD"]/10)
+frontal_midline_theta_subject_values["PSD"] = 10**(frontal_midline_theta_subject_values["PSD"]/10)
+
+frontal_beta_mean_subject.to_pickle(os.path.join(Feature_savepath,"fBMS_df.pkl"))
+frontal_midline_theta_mean_subject.to_pickle(os.path.join(Feature_savepath,"fMTMS_df.pkl"))
+frontal_beta_subject_values.to_pickle(os.path.join(Feature_savepath,"fBSV_df.pkl"))
+frontal_midline_theta_subject_values.to_pickle(os.path.join(Feature_savepath,"fMTSV_df.pkl"))
 
 # Calculate mean for each group and take ratio for whole group
 # To confirm trend observed in PSD plots
@@ -285,7 +296,7 @@ frontal_theta_beta_ratio.rename(columns={"PSD":"TBR"},inplace=True)
 dummy_variable = ["Frontal Theta Beta Ratio"]*frontal_theta_beta_ratio.shape[0]
 frontal_theta_beta_ratio.insert(3, "Measurement", dummy_variable )
 
-frontal_theta_beta_ratio.to_pickle(os.path.join(Feature_savepath,"fTBR_df.pkl"))
+# frontal_theta_beta_ratio.to_pickle(os.path.join(Feature_savepath,"fTBR_df.pkl"))
 
 """
 # %% Frequency bands asymmetry
@@ -922,8 +933,12 @@ for i in range(n_subjects):
     temp_EC_p_hat = p_empirical(micro_labels[i][0], n_maps)
     temp_EO_p_hat = p_empirical(micro_labels[i][1], n_maps)
     # Calculate transition matrix
+    """
     temp_EC_T_hat = T_empirical(micro_labels[i][0], n_maps, gaps_idx[i][0])
     temp_EO_T_hat = T_empirical(micro_labels[i][1], n_maps, gaps_idx[i][1])
+    """
+    temp_EC_T_hat = T_empirical(micro_labels[i][0], n_maps)
+    temp_EO_T_hat = T_empirical(micro_labels[i][1], n_maps)
     # Calculate Shannon entropy
     temp_EC_h_hat = H_1(micro_labels[i][0], n_maps)
     temp_EO_h_hat = H_1(micro_labels[i][1], n_maps)
@@ -942,10 +957,11 @@ np.save(Feature_savepath+"microstate_transition_data.npy", microstate_transition
 # Transition matrix should be read as probability of row to column
 microstate_transition_data_arr =\
      microstate_transition_data.reshape((n_subjects,n_eye_status,n_maps*n_maps)) # flatten 4 x 4 matrix to 1D
-transition_info = ["M1->M1", "M1->M2", "M1->M3", "M1->M4",
-                   "M2->M1", "M2->M2", "M2->M3", "M2->M4",
-                   "M3->M1", "M3->M2", "M3->M3", "M3->M4",
-                   "M4->M1", "M4->M2", "M4->M3", "M4->M4"]
+transition_info = ["M1->M1", "M1->M2", "M1->M3", "M1->M4", "M1->M5",
+                   "M2->M1", "M2->M2", "M2->M3", "M2->M4", "M2-M5",
+                   "M3->M1", "M3->M2", "M3->M3", "M3->M4", "M3->M5",
+                   "M4->M1", "M4->M2", "M4->M3", "M4->M4", "M4->M5",
+                   "M5->M1", "M5->M2", "M5->M3", "M5->M4", "M5->M5"]
 
 arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, microstate_transition_data_arr.shape), indexing="ij"))) + [microstate_transition_data_arr.ravel()])
 microstate_transition_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Transition", "Value"])
@@ -974,7 +990,7 @@ arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, microstate_
 microstate_time_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Microstate", "Value"])
 # Change from numerical coding to actual values
 eye_status = list(final_epochs[0].event_id.keys())
-microstates = [1,2,3,4]
+microstates = [1,2,3,4,5]
 
 index_values = [Subject_id,eye_status,microstates]
 for col in range(len(index_values)):
@@ -1032,1724 +1048,1724 @@ microstate_entropy_df.insert(3, "Measurement", dummy_variable)
 # Save df
 microstate_entropy_df.to_pickle(os.path.join(Feature_savepath,"microstate_entropy_df.pkl"))
 
-# %% Long-range temporal correlations (LRTC)
-"""
-See Hardstone et al, 2012
-Hurst exponent estimation steps:
-    1. Preprocess
-    2. Band-pass filter for frequency band of interest
-    3. Hilbert transform to obtain amplitude envelope
-    4. Perform DFA
-        4.1 Compute cumulative sum of time series to create signal profile
-        4.2 Define set of window sizes (see below)
-        4.3 Remove the linear trend using least-squares for each window
-        4.4 Calculate standard deviation for each window and take the mean
-        4.5 Plot fluctuation function (Standard deviation) as function
-            for all window sizes, on double logarithmic scale
-        4.6 The DFA exponent alpha correspond to Hurst exponent
-            f(L) = sd = L^alpha (with alpha as linear coefficient in log plot)
-
-If 0 < alpha < 0.5: The process exhibits anti-correlations
-If 0.5 < alpha < 1: The process exhibits positive correlations
-If alpha = 0.5: The process is indistinguishable from a random process
-If 1.0 < alpha < 2.0: The process is non-stationary. H = alpha - 1
-
-Window sizes should be equally spaced on a logarithmic scale
-Sizes should be at least 4 samples and up to 10% of total signal length
-Filters can influence neighboring samples, thus filters should be tested
-on white noise to estimate window sizes that are unaffected by filters
-
-filter_length=str(2*1/fmin)+"s" # cannot be used with default transition bandwidth
-
-"""
-# From simulations with white noise I determined window size thresholds for the 5 frequency bands:
-thresholds = [7,7,7,6.5,6.5]
-# And their corresponding log step sizes
-with open("LRTC_log_win_sizes.pkl", "rb") as filehandle:
-    log_win_sizes = pickle.load(filehandle)
-
-# Variables for the the different conditions
-# Sampling frequency
-sfreq = final_epochs[0].info["sfreq"]
-# Channels
-ch_names = final_epochs[0].info["ch_names"]
-n_channels = len(ch_names)
-# Frequency
-Freq_Bands = {"delta": [1.25, 4.0],
-              "theta": [4.0, 8.0],
-              "alpha": [8.0, 13.0],
-              "beta": [13.0, 30.0],
-              "gamma": [30.0, 49.0]}
-n_freq_bands = len(Freq_Bands)
-# Eye status
-eye_status = list(final_epochs[0].event_id.keys())
-n_eye_status = len(eye_status)
-
-### Estimating Hurst exponent for the data
-# The data should be re-referenced to common average (Already done)
-
-# Data are transformed to numpy arrays
-# Then divided into EO and EC and further into each of the 5 trials
-# So DFA is estimated for each trial separately, which was concluded from simulations
-gaps_trials_idx = np.load("Gaps_trials_idx.npy") # re-used from microstate analysis
-n_trials = 5
-
-H_data = []
-for i in range(n_subjects):
-    # Transform data to correct shape
-    temp_arr = final_epochs[i].get_data() # get data
-    arr_shape = temp_arr.shape # get shape
-    temp_arr = temp_arr.swapaxes(1,2) # swap ch and time axis
-    temp_arr = temp_arr.reshape(arr_shape[0]*arr_shape[2],arr_shape[1]) # reshape by combining epochs and times
-    # Get indices for eyes open and closed
-    EC_index = final_epochs[i].events[:,2] == 1
-    EO_index = final_epochs[i].events[:,2] == 2
-    # Repeat with 4s * sample frequency to correct for concatenation of times and epochs
-    EC_index = np.repeat(EC_index,4*sfreq)
-    EO_index = np.repeat(EO_index,4*sfreq)
-    # Divide into eye status
-    EC_data = temp_arr[EC_index]
-    EO_data = temp_arr[EO_index]
-    # Divide into trials
-    EC_gap_idx = np.array([0]+list(gaps_trials_idx[i,0])+[len(EC_data)])
-    EO_gap_idx = np.array([0]+list(gaps_trials_idx[i,1])+[len(EO_data)])
+# # %% Long-range temporal correlations (LRTC)
+# """
+# See Hardstone et al, 2012
+# Hurst exponent estimation steps:
+#     1. Preprocess
+#     2. Band-pass filter for frequency band of interest
+#     3. Hilbert transform to obtain amplitude envelope
+#     4. Perform DFA
+#         4.1 Compute cumulative sum of time series to create signal profile
+#         4.2 Define set of window sizes (see below)
+#         4.3 Remove the linear trend using least-squares for each window
+#         4.4 Calculate standard deviation for each window and take the mean
+#         4.5 Plot fluctuation function (Standard deviation) as function
+#             for all window sizes, on double logarithmic scale
+#         4.6 The DFA exponent alpha correspond to Hurst exponent
+#             f(L) = sd = L^alpha (with alpha as linear coefficient in log plot)
+
+# If 0 < alpha < 0.5: The process exhibits anti-correlations
+# If 0.5 < alpha < 1: The process exhibits positive correlations
+# If alpha = 0.5: The process is indistinguishable from a random process
+# If 1.0 < alpha < 2.0: The process is non-stationary. H = alpha - 1
+
+# Window sizes should be equally spaced on a logarithmic scale
+# Sizes should be at least 4 samples and up to 10% of total signal length
+# Filters can influence neighboring samples, thus filters should be tested
+# on white noise to estimate window sizes that are unaffected by filters
+
+# filter_length=str(2*1/fmin)+"s" # cannot be used with default transition bandwidth
+
+# """
+# # From simulations with white noise I determined window size thresholds for the 5 frequency bands:
+# thresholds = [7,7,7,6.5,6.5]
+# # And their corresponding log step sizes
+# with open("LRTC_log_win_sizes.pkl", "rb") as filehandle:
+#     log_win_sizes = pickle.load(filehandle)
+
+# # Variables for the the different conditions
+# # Sampling frequency
+# sfreq = final_epochs[0].info["sfreq"]
+# # Channels
+# ch_names = final_epochs[0].info["ch_names"]
+# n_channels = len(ch_names)
+# # Frequency
+# Freq_Bands = {"delta": [1.25, 4.0],
+#               "theta": [4.0, 8.0],
+#               "alpha": [8.0, 13.0],
+#               "beta": [13.0, 30.0],
+#               "gamma": [30.0, 49.0]}
+# n_freq_bands = len(Freq_Bands)
+# # Eye status
+# eye_status = list(final_epochs[0].event_id.keys())
+# n_eye_status = len(eye_status)
+
+# ### Estimating Hurst exponent for the data
+# # The data should be re-referenced to common average (Already done)
+
+# # Data are transformed to numpy arrays
+# # Then divided into EO and EC and further into each of the 5 trials
+# # So DFA is estimated for each trial separately, which was concluded from simulations
+# gaps_trials_idx = np.load("Gaps_trials_idx.npy") # re-used from microstate analysis
+# n_trials = 5
+
+# H_data = []
+# for i in range(n_subjects):
+#     # Transform data to correct shape
+#     temp_arr = final_epochs[i].get_data() # get data
+#     arr_shape = temp_arr.shape # get shape
+#     temp_arr = temp_arr.swapaxes(1,2) # swap ch and time axis
+#     temp_arr = temp_arr.reshape(arr_shape[0]*arr_shape[2],arr_shape[1]) # reshape by combining epochs and times
+#     # Get indices for eyes open and closed
+#     EC_index = final_epochs[i].events[:,2] == 1
+#     EO_index = final_epochs[i].events[:,2] == 2
+#     # Repeat with 4s * sample frequency to correct for concatenation of times and epochs
+#     EC_index = np.repeat(EC_index,4*sfreq)
+#     EO_index = np.repeat(EO_index,4*sfreq)
+#     # Divide into eye status
+#     EC_data = temp_arr[EC_index]
+#     EO_data = temp_arr[EO_index]
+#     # Divide into trials
+#     EC_gap_idx = np.array([0]+list(gaps_trials_idx[i,0])+[len(EC_data)])
+#     EO_gap_idx = np.array([0]+list(gaps_trials_idx[i,1])+[len(EO_data)])
     
-    EC_trial_data = []
-    EO_trial_data = []
-    for t in range(n_trials):
-        EC_trial_data.append(EC_data[EC_gap_idx[t]:EC_gap_idx[t+1]])
-        EO_trial_data.append(EO_data[EO_gap_idx[t]:EO_gap_idx[t+1]])
+#     EC_trial_data = []
+#     EO_trial_data = []
+#     for t in range(n_trials):
+#         EC_trial_data.append(EC_data[EC_gap_idx[t]:EC_gap_idx[t+1]])
+#         EO_trial_data.append(EO_data[EO_gap_idx[t]:EO_gap_idx[t+1]])
         
-    # Save data
-    H_data.append([EC_trial_data,EO_trial_data]) # output [subject][eye][trial][time,ch]
-
-# Calculate H for each subject, eye status, trial, freq and channel
-H_arr = np.zeros((n_subjects,n_eye_status,n_trials,n_channels,n_freq_bands))
-w_len = [len(ele) for ele in log_win_sizes]
-DFA_arr = np.empty((n_subjects,n_eye_status,n_trials,n_channels,n_freq_bands,2,np.max(w_len)))
-DFA_arr[:] = np.nan
-
-# Get current time
-c_time1 = time.localtime()
-c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
-print("Started",c_time1)
-
-# Nolds are already using all cores so multiprocessing with make it slower
-# Warning occurs when R2 is estimated during detrending - but R2 is not used
-warnings.simplefilter("ignore")
-for i in range(n_subjects):
-    # Pre-allocate memory
-    DFA_temp = np.empty((n_eye_status,n_trials,n_channels,n_freq_bands,2,np.max(w_len)))
-    DFA_temp[:] = np.nan
-    H_temp = np.empty((n_eye_status,n_trials,n_channels,n_freq_bands))
-    for e in range(n_eye_status):
-        for trial in range(n_trials):
-            for c in range(n_channels):
-                # Get the data
-                signal = H_data[i][e][trial][:,c]
+#     # Save data
+#     H_data.append([EC_trial_data,EO_trial_data]) # output [subject][eye][trial][time,ch]
+
+# # Calculate H for each subject, eye status, trial, freq and channel
+# H_arr = np.zeros((n_subjects,n_eye_status,n_trials,n_channels,n_freq_bands))
+# w_len = [len(ele) for ele in log_win_sizes]
+# DFA_arr = np.empty((n_subjects,n_eye_status,n_trials,n_channels,n_freq_bands,2,np.max(w_len)))
+# DFA_arr[:] = np.nan
+
+# # Get current time
+# c_time1 = time.localtime()
+# c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
+# print("Started",c_time1)
+
+# # Nolds are already using all cores so multiprocessing with make it slower
+# # Warning occurs when R2 is estimated during detrending - but R2 is not used
+# warnings.simplefilter("ignore")
+# for i in range(n_subjects):
+#     # Pre-allocate memory
+#     DFA_temp = np.empty((n_eye_status,n_trials,n_channels,n_freq_bands,2,np.max(w_len)))
+#     DFA_temp[:] = np.nan
+#     H_temp = np.empty((n_eye_status,n_trials,n_channels,n_freq_bands))
+#     for e in range(n_eye_status):
+#         for trial in range(n_trials):
+#             for c in range(n_channels):
+#                 # Get the data
+#                 signal = H_data[i][e][trial][:,c]
                 
-                counter = 0 # prepare counter
-                for fmin, fmax in Freq_Bands.values():
-                    # Filter for each freq band
-                    signal_filtered = mne.filter.filter_data(signal, sfreq=sfreq, verbose=0,
-                                                  l_freq=fmin, h_freq=fmax)
-                    # Hilbert transform
-                    analytic_signal = scipy.signal.hilbert(signal_filtered)
-                    # Get Amplitude envelope
-                    # np.abs is the same as np.linalg.norm, i.e. the length for complex input which is the amplitude
-                    ampltude_envelope = np.abs(analytic_signal)
-                    # Perform DFA using predefined window sizes from simulation
-                    a, dfa_data = nolds.dfa(ampltude_envelope,
-                                            nvals=np.exp(log_win_sizes[counter]).astype("int"),
-                                            debug_data=True)
-                    # Save DFA results
-                    DFA_temp[e,trial,c,counter,:,0:w_len[counter]] = dfa_data[0:2]
-                    H_temp[e,trial,c,counter] = a
-                    # Update counter
-                    counter += 1
-
-    # Print run status
-    print("Finished {} out of {}".format(i+1,n_subjects))
-    # Save the results
-    H_arr[i] = H_temp
-    DFA_arr[i] = DFA_temp
-
-warnings.simplefilter("default")
-
-# Get current time
-c_time2 = time.localtime()
-c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
-print("Started", c_time1, "\nCurrent Time",c_time2)
-
-# Save the DFA analysis data 
-np.save(Feature_savepath+"DFA_arr.npy", DFA_arr)
-np.save(Feature_savepath+"H_arr.npy", H_arr)
-
-# Load
-DFA_arr = np.load(Feature_savepath+"DFA_arr.npy")
-H_arr = np.load(Feature_savepath+"H_arr.npy")
+#                 counter = 0 # prepare counter
+#                 for fmin, fmax in Freq_Bands.values():
+#                     # Filter for each freq band
+#                     signal_filtered = mne.filter.filter_data(signal, sfreq=sfreq, verbose=0,
+#                                                   l_freq=fmin, h_freq=fmax)
+#                     # Hilbert transform
+#                     analytic_signal = scipy.signal.hilbert(signal_filtered)
+#                     # Get Amplitude envelope
+#                     # np.abs is the same as np.linalg.norm, i.e. the length for complex input which is the amplitude
+#                     ampltude_envelope = np.abs(analytic_signal)
+#                     # Perform DFA using predefined window sizes from simulation
+#                     a, dfa_data = nolds.dfa(ampltude_envelope,
+#                                             nvals=np.exp(log_win_sizes[counter]).astype("int"),
+#                                             debug_data=True)
+#                     # Save DFA results
+#                     DFA_temp[e,trial,c,counter,:,0:w_len[counter]] = dfa_data[0:2]
+#                     H_temp[e,trial,c,counter] = a
+#                     # Update counter
+#                     counter += 1
+
+#     # Print run status
+#     print("Finished {} out of {}".format(i+1,n_subjects))
+#     # Save the results
+#     H_arr[i] = H_temp
+#     DFA_arr[i] = DFA_temp
+
+# warnings.simplefilter("default")
+
+# # Get current time
+# c_time2 = time.localtime()
+# c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
+# print("Started", c_time1, "\nCurrent Time",c_time2)
+
+# # Save the DFA analysis data 
+# np.save(Feature_savepath+"DFA_arr.npy", DFA_arr)
+# np.save(Feature_savepath+"H_arr.npy", H_arr)
 
-# Average the Hurst Exponent across trials
-H_arr = np.mean(H_arr, axis=2)
-
-# Convert to Pandas dataframe (Hurst exponent)
-# The dimensions will each be a column with numbers and the last column will be the actual values
-arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, H_arr.shape), indexing="ij"))) + [H_arr.ravel()])
-H_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Channel", "Freq_band", "Value"])
-# Change from numerical coding to actual values
-eye_status = list(final_epochs[0].event_id.keys())
-ch_name = final_epochs[0].info["ch_names"]
-
-index_values = [Subject_id,eye_status,ch_name,list(Freq_Bands.keys())]
-for col in range(len(index_values)):
-    col_name = H_data_df.columns[col]
-    for shape in range(H_arr.shape[col]): # notice this is the shape of original numpy array. Not shape of DF
-        H_data_df.loc[H_data_df.iloc[:,col] == shape,col_name]\
-        = index_values[col][shape]
-
-# Add group status
-Group_status = np.array(["CTRL"]*len(H_data_df["Subject_ID"]))
-Group_status[np.array([i in cases for i in H_data_df["Subject_ID"]])] = "PTSD"
-# Add to dataframe
-H_data_df.insert(2, "Group_status", Group_status)
-
-# Fix Freq_band categorical order
-H_data_df["Freq_band"] = H_data_df["Freq_band"].astype("category").\
-            cat.reorder_categories(list(Freq_Bands.keys()), ordered=True)
-
-# Global Hurst exponent
-H_data_df_global = H_data_df.groupby(["Subject_ID", "Eye_status", "Freq_band"]).mean().reset_index() # by default pandas mean skip nan
-# Add group status (cannot use group_by as each subject only have 1 group, not both)
-Group_status = np.array(["CTRL"]*len(H_data_df_global["Subject_ID"]))
-Group_status[np.array([i in cases for i in H_data_df_global["Subject_ID"]])] = "PTSD"
-# Add to dataframe
-H_data_df_global.insert(2, "Group_status", Group_status)
-# Add dummy variable for re-using plot code
-dummy_variable = ["Global Hurst Exponent"]*H_data_df_global.shape[0]
-H_data_df_global.insert(3, "Measurement", dummy_variable )
-
-# Save the data
-H_data_df.to_pickle(os.path.join(Feature_savepath,"H_data_df.pkl"))
-H_data_df_global.to_pickle(os.path.join(Feature_savepath,"H_data_global_df.pkl"))
-
-# %% Source localization of sensor data
-# Using non-interpolated channels
-# Even interpolated channels during preprocessing and visual inspection
-# are dropped
+# # Load
+# DFA_arr = np.load(Feature_savepath+"DFA_arr.npy")
+# H_arr = np.load(Feature_savepath+"H_arr.npy")
+
+# # Average the Hurst Exponent across trials
+# H_arr = np.mean(H_arr, axis=2)
+
+# # Convert to Pandas dataframe (Hurst exponent)
+# # The dimensions will each be a column with numbers and the last column will be the actual values
+# arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, H_arr.shape), indexing="ij"))) + [H_arr.ravel()])
+# H_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Channel", "Freq_band", "Value"])
+# # Change from numerical coding to actual values
+# eye_status = list(final_epochs[0].event_id.keys())
+# ch_name = final_epochs[0].info["ch_names"]
+
+# index_values = [Subject_id,eye_status,ch_name,list(Freq_Bands.keys())]
+# for col in range(len(index_values)):
+#     col_name = H_data_df.columns[col]
+#     for shape in range(H_arr.shape[col]): # notice this is the shape of original numpy array. Not shape of DF
+#         H_data_df.loc[H_data_df.iloc[:,col] == shape,col_name]\
+#         = index_values[col][shape]
+
+# # Add group status
+# Group_status = np.array(["CTRL"]*len(H_data_df["Subject_ID"]))
+# Group_status[np.array([i in cases for i in H_data_df["Subject_ID"]])] = "PTSD"
+# # Add to dataframe
+# H_data_df.insert(2, "Group_status", Group_status)
+
+# # Fix Freq_band categorical order
+# H_data_df["Freq_band"] = H_data_df["Freq_band"].astype("category").\
+#             cat.reorder_categories(list(Freq_Bands.keys()), ordered=True)
+
+# # Global Hurst exponent
+# H_data_df_global = H_data_df.groupby(["Subject_ID", "Eye_status", "Freq_band"]).mean().reset_index() # by default pandas mean skip nan
+# # Add group status (cannot use group_by as each subject only have 1 group, not both)
+# Group_status = np.array(["CTRL"]*len(H_data_df_global["Subject_ID"]))
+# Group_status[np.array([i in cases for i in H_data_df_global["Subject_ID"]])] = "PTSD"
+# # Add to dataframe
+# H_data_df_global.insert(2, "Group_status", Group_status)
+# # Add dummy variable for re-using plot code
+# dummy_variable = ["Global Hurst Exponent"]*H_data_df_global.shape[0]
+# H_data_df_global.insert(3, "Measurement", dummy_variable )
+
+# # Save the data
+# H_data_df.to_pickle(os.path.join(Feature_savepath,"H_data_df.pkl"))
+# H_data_df_global.to_pickle(os.path.join(Feature_savepath,"H_data_global_df.pkl"))
+
+# # %% Source localization of sensor data
+# # Using non-interpolated channels
+# # Even interpolated channels during preprocessing and visual inspection
+# # are dropped
+
+# # Prepare epochs for estimation of source connectivity
+# source_epochs = [0]*n_subjects
+# for i in range(n_subjects):
+#     source_epochs[i] = final_epochs[i].copy()
+
+# ### Make forward solutions
+# # A forward solution is first made for all individuals with no dropped channels
+# # Afterwards individual forward solutions are made for subjects with bad
+# # channels that were interpolated in preprocessing and these are dropped
+# # First forward operator is computed using a template MRI for each dataset
+# fs_dir = "/home/glia/MNE-fsaverage-data/fsaverage"
+# subjects_dir = os.path.dirname(fs_dir)
+# trans = "fsaverage"
+# src = os.path.join(fs_dir, "bem", "fsaverage-ico-5-src.fif")
+# bem = os.path.join(fs_dir, "bem", "fsaverage-5120-5120-5120-bem-sol.fif")
+
+# # Read the template sourcespace
+# sourcespace = mne.read_source_spaces(src)
+
+# temp_idx = 0 # Index with subject that had no bad channels
+# subject_eeg = source_epochs[temp_idx].copy()
+# subject_eeg.set_eeg_reference(projection=True) # needed for inverse modelling
+# # Make forward solution
+# fwd = mne.make_forward_solution(subject_eeg.info, trans=trans, src=src,
+#                             bem=bem, eeg=True, mindist=5.0, n_jobs=1)
+# # Save forward operator
+# fname_fwd = "./Source_fwd/fsaverage-fwd.fif"
+# mne.write_forward_solution(fname_fwd, fwd, overwrite=True)
+
+# # A specific forward solution is also made for each subject with bad channels
+# with open("./Preprocessing/bad_ch.pkl", "rb") as file:
+#    bad_ch = pickle.load(file)
+
+# All_bad_ch = bad_ch
+# All_drop_epochs = dropped_epochs_df
+# All_dropped_ch = []
+
+# Bad_ch_idx = [idx for idx, item in enumerate(All_bad_ch) if item != 0]
+# Bad_ch_subjects = All_drop_epochs["Subject_ID"][Bad_ch_idx]
+# # For each subject with bad channels, drop the channels and make forward operator
+# for n in range(len(Bad_ch_subjects)):
+#     Subject = Bad_ch_subjects.iloc[n]
+#     try:
+#         Subject_idx = Subject_id.index(Subject)
+#         # Get unique bad channels
+#         Bad_ch0 = All_bad_ch[Bad_ch_idx[n]]
+#         Bad_ch1 = []
+#         for i2 in range(len(Bad_ch0)):
+#             if type(Bad_ch0[i2]) == list:
+#                 for i3 in range(len(Bad_ch0[i2])):
+#                     Bad_ch1.append(Bad_ch0[i2][i3])
+#             elif type(Bad_ch0[i2]) == str:
+#                 Bad_ch1.append(Bad_ch0[i2])
+#         Bad_ch1 = np.unique(Bad_ch1)
+#         # Drop the bad channels
+#         source_epochs[Subject_idx].drop_channels(Bad_ch1)
+#         # Save the overview of dropped channels
+#         All_dropped_ch.append([Subject,Subject_idx,Bad_ch1])
+#         # Make forward operator
+#         subject_eeg = source_epochs[Subject_idx].copy()
+#         subject_eeg.set_eeg_reference(projection=True) # needed for inverse modelling
+#         # Make forward solution
+#         fwd = mne.make_forward_solution(subject_eeg.info, trans=trans, src=src,
+#                                     bem=bem, eeg=True, mindist=5.0, n_jobs=1)
+#         # Save forward operator
+#         fname_fwd = "./Source_fwd/fsaverage_{}-fwd.fif".format(Subject)
+#         mne.write_forward_solution(fname_fwd, fwd, overwrite=True)
+#     except:
+#         print(Subject,"was already dropped")
+
+# with open("./Preprocessing/All_datasets_bad_ch.pkl", "wb") as filehandle:
+#     pickle.dump(All_dropped_ch, filehandle)
+
+
+# # %% Load forward operators
+# # Re-use for all subjects without dropped channels
+# fname_fwd = "./Source_fwd/fsaverage-fwd.fif"
+# fwd = mne.read_forward_solution(fname_fwd)
+
+# fwd_list = [fwd]*n_subjects
+
+# # Use specific forward solutions for subjects with dropped channels
+# with open("./Preprocessing/All_datasets_bad_ch.pkl", "rb") as file:
+#    All_dropped_ch = pickle.load(file)
+
+# for i in range(len(All_dropped_ch)):
+#     Subject = All_dropped_ch[i][0]
+#     Subject_idx = All_dropped_ch[i][1]
+#     fname_fwd = "./Source_fwd/fsaverage_{}-fwd.fif".format(Subject)
+#     fwd = mne.read_forward_solution(fname_fwd)
+#     fwd_list[Subject_idx] = fwd
+
+# # Check the correct number of channels are present in fwd
+# random_point = int(np.random.randint(0,len(All_dropped_ch)-1,1))
+# assert len(fwds[All_dropped_ch[random_point][1]].ch_names) == source_epochs[All_dropped_ch[random_point][1]].info["nchan"]
+
+# # %% Make parcellation
+# # After mapping to source space, I end up with 20484 vertices
+# # but I wanted to map to fewer sources and not many more
+# # Thus I need to perform parcellation
+# # Get labels for FreeSurfer "aparc" cortical parcellation (example with 74 labels/hemi - Destriuex)
+# labels_aparc = mne.read_labels_from_annot("fsaverage", parc="aparc.a2009s",
+#                                     subjects_dir=subjects_dir)
+# labels_aparc = labels_aparc[:-2] # remove unknowns
+
+# labels_aparc_names = [label.name for label in labels_aparc]
+
+# # Manually adding the 31 ROIs (14-lh/rh + 3 in midline) from Toll et al, 2020
+# # Making fuction to take subset of a label
+# def label_subset(label, subset, name="ROI_name"):
+#     label_subset = mne.Label(label.vertices[subset], label.pos[subset,:],
+#                          label.values[subset], label.hemi,
+#                          name = "{}-{}".format(name,label.hemi),
+#                          subject = label.subject, color = None)
+#     return label_subset
+
+# ### Visual area 1 (V1 and somatosensory cortex BA1-3)
+# label_filenames = ["lh.V1.label", "rh.V1.label",
+#                    "lh.BA1.label", "rh.BA1.label",
+#                    "lh.BA2.label", "rh.BA2.label",
+#                    "lh.BA3a.label", "rh.BA3a.label",
+#                    "lh.BA3b.label", "rh.BA3b.label"]
+# labels0 = [0]*len(label_filenames)
+# for i, filename in enumerate(label_filenames):
+#     labels0[i] = mne.read_label(os.path.join(fs_dir, "label", filename), subject="fsaverage")
+# # Add V1 to final label variable
+# labels = labels0[:2]
+# # Rename to remove redundant hemi information
+# labels[0].name = "V1-{}".format(labels[0].hemi)
+# labels[1].name = "V1-{}".format(labels[1].hemi)
+# # Assign a color
+# labels[0].color = matplotlib.colors.to_rgba("salmon")
+# labels[1].color = matplotlib.colors.to_rgba("salmon")
+# # Combine Brodmann Areas for SMC. Only use vertices ones to avoid duplication error
+# SMC_labels = labels0[2:]
+# for hem in range(2):
+#     SMC_p1 = SMC_labels[hem]
+#     for i in range(1,len(SMC_labels)//2):
+#         SMC_p2 = SMC_labels[hem+2*i]
+#         p2_idx = np.isin(SMC_p2.vertices, SMC_p1.vertices, invert=True)
+#         SMC_p21 = label_subset(SMC_p2, p2_idx, "SMC")
+#         SMC_p1 = SMC_p1.__add__(SMC_p21)
+#     SMC_p1.name = SMC_p21.name
+#     # Assign a color
+#     SMC_p1.color = matplotlib.colors.to_rgba("orange")
+#     labels.append(SMC_p1)
+
+# ### Inferior frontal junction
+# # Located at junction between inferior frontal and inferior precentral sulcus
+# label_aparc_names0 = ["S_front_inf","S_precentral-inf-part"]
+# temp_labels = []
+# for i in range(len(label_aparc_names0)):
+#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+#     for i2 in range(len(labels_aparc_idx)):
+#         temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
+
+# pos1 = temp_labels[0].pos
+# pos2 = temp_labels[2].pos
+# distm = scipy.spatial.distance.cdist(pos1,pos2)
+# # Find the closest points between the 2 ROIs
+# l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.001))[0]) # q chosen to correspond to around 10% of ROI
+# l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[1]) # q chosen to correspond to around 10% of ROI
+
+# IFJ_label_p1 = label_subset(temp_labels[0], l1_idx, "IFJ")
+# IFJ_label_p2 = label_subset(temp_labels[2], l2_idx, "IFJ")
+# # Combine the 2 parts
+# IFJ_label = IFJ_label_p1.__add__(IFJ_label_p2)
+# IFJ_label.name = IFJ_label_p1.name
+# # Assign a color
+# IFJ_label.color = matplotlib.colors.to_rgba("chartreuse")
+# # Append to final list
+# labels.append(IFJ_label)
+
+# # Do the same for the right hemisphere
+# pos1 = temp_labels[1].pos
+# pos2 = temp_labels[3].pos
+# distm = scipy.spatial.distance.cdist(pos1,pos2)
+# # Find the closest points between the 2 ROIs
+# l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.00075))[0]) # q chosen to correspond to around 10% of ROI
+# l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[1]) # q chosen to correspond to around 10% of ROI
+# IFJ_label_p1 = label_subset(temp_labels[1], l1_idx, "IFJ")
+# IFJ_label_p2 = label_subset(temp_labels[3], l2_idx, "IFJ")
+# # Combine the 2 parts
+# IFJ_label = IFJ_label_p1.__add__(IFJ_label_p2)
+# IFJ_label.name = IFJ_label_p1.name
+# # Assign a color
+# IFJ_label.color = matplotlib.colors.to_rgba("chartreuse")
+# # Append to final list
+# labels.append(IFJ_label)
+
+# ### Intraparietal sulcus
+# label_aparc_names0 = ["S_intrapariet_and_P_trans"]
+# labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[0])]
+# for i in range(len(labels_aparc_idx)):
+#     labels.append(labels_aparc[labels_aparc_idx[i]].copy())
+#     labels[-1].name = "IPS-{}".format(labels[-1].hemi)
+
+# ### Frontal eye field as intersection between middle frontal gyrus and precentral gyrus
+# label_aparc_names0 = ["G_front_middle","G_precentral"]
+# temp_labels = []
+# for i in range(len(label_aparc_names0)):
+#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+#     for i2 in range(len(labels_aparc_idx)):
+#         temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
+
+# # Take 10% of middle frontal gyrus closest to precentral gyrus (most posterior)
+# temp_label0 = temp_labels[0]
+# G_fm_y = temp_label0.pos[:,1]
+# thres_G_fm_y = np.sort(G_fm_y)[len(G_fm_y)//10]
+# idx_p1 = np.where(G_fm_y<thres_G_fm_y)[0]
+# FEF_label_p1 = label_subset(temp_label0, idx_p1, "FEF")
+# # Take 10% closest for precentral gyrus (most anterior)
+# temp_label0 = temp_labels[2]
+# # I cannot only use y (anterior/posterior) but also need to restrict z-position
+# G_pre_cen_z = temp_label0.pos[:,2]
+# thres_G_pre_cen_z = 0.04 # visually inspected threshold
+# G_pre_cen_y = temp_label0.pos[:,1]
+# thres_G_pre_cen_y = np.sort(G_pre_cen_y[G_pre_cen_z>thres_G_pre_cen_z])[-len(G_pre_cen_y)//10] # notice - for anterior
+# idx_p2 = np.where((G_pre_cen_y>thres_G_pre_cen_y) & (G_pre_cen_z>thres_G_pre_cen_z))[0]
+# FEF_label_p2 = label_subset(temp_label0, idx_p2, "FEF")
+# # Combine the 2 parts
+# FEF_label = FEF_label_p1.__add__(FEF_label_p2)
+# FEF_label.name = FEF_label_p1.name
+# # Assign a color
+# FEF_label.color = matplotlib.colors.to_rgba("aqua")
+# # Append to final list
+# labels.append(FEF_label)
+
+# # Do the same for the right hemisphere
+# temp_label0 = temp_labels[1]
+# G_fm_y = temp_label0.pos[:,1]
+# thres_G_fm_y = np.sort(G_fm_y)[len(G_fm_y)//10]
+# idx_p1 = np.where(G_fm_y<thres_G_fm_y)[0]
+# FEF_label_p1 = label_subset(temp_label0, idx_p1, "FEF")
+
+# temp_label0 = temp_labels[3]
+# G_pre_cen_z = temp_label0.pos[:,2]
+# thres_G_pre_cen_z = 0.04 # visually inspected threshold
+# G_pre_cen_y = temp_label0.pos[:,1]
+# thres_G_pre_cen_y = np.sort(G_pre_cen_y[G_pre_cen_z>thres_G_pre_cen_z])[-len(G_pre_cen_y)//10] # notice - for anterior
+# idx_p2 = np.where((G_pre_cen_y>thres_G_pre_cen_y) & (G_pre_cen_z>thres_G_pre_cen_z))[0]
+# FEF_label_p2 = label_subset(temp_label0, idx_p2, "FEF")
+# # Combine the 2 parts
+# FEF_label = FEF_label_p1.__add__(FEF_label_p2)
+# FEF_label.name = FEF_label_p1.name
+# # Assign a color
+# FEF_label.color = matplotlib.colors.to_rgba("aqua")
+# # Append to final list
+# labels.append(FEF_label)
+
+# ### Supplementary eye fields
+# # Located at caudal end of frontal gyrus and upper part of paracentral sulcus
+# label_aparc_names0 = ["G_and_S_paracentral","G_front_sup"]
+# temp_labels = []
+# for i in range(len(label_aparc_names0)):
+#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+#     for i2 in range(len(labels_aparc_idx)):
+#         temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
+
+# pos1 = temp_labels[0].pos
+# pos2 = temp_labels[2].pos
+# distm = scipy.spatial.distance.cdist(pos1,pos2)
+# # Find the closest points between the 2 ROIs
+# l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[0]) # q chosen to correspond to around 15% of ROI
+# l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.005))[1]) # q chosen to correspond to around 10% of ROI
+# # Notice that superior frontal gyrus is around 4 times bigger than paracentral
+# len(l1_idx)/pos1.shape[0]
+# len(l2_idx)/pos2.shape[0]
+# # Only use upper part
+# z_threshold = 0.06 # visually inspected
+# l1_idx = l1_idx[pos1[l1_idx,2] > z_threshold]
+# l2_idx = l2_idx[pos2[l2_idx,2] > z_threshold]
+
+# SEF_label_p1 = label_subset(temp_labels[0], l1_idx, "SEF")
+# SEF_label_p2 = label_subset(temp_labels[2], l2_idx, "SEF")
+# # Combine the 2 parts
+# SEF_label = SEF_label_p1.__add__(SEF_label_p2)
+# SEF_label.name = SEF_label_p1.name
+# # Assign a color
+# SEF_label.color = matplotlib.colors.to_rgba("royalblue")
+# # Append to final list
+# labels.append(SEF_label)
+
+# # Do the same for the right hemisphere
+# pos1 = temp_labels[1].pos
+# pos2 = temp_labels[3].pos
+# distm = scipy.spatial.distance.cdist(pos1,pos2)
+# # Find the closest points between the 2 ROIs
+# l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[0]) # q chosen to correspond to around 15% of ROI
+# l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.005))[1]) # q chosen to correspond to around 10% of ROI
+# # Notice that superior frontal gyrus is around 4 times bigger than paracentral
+# len(l1_idx)/pos1.shape[0]
+# len(l2_idx)/pos2.shape[0]
+# # Only use upper part
+# z_threshold = 0.06 # visually inspected
+# l1_idx = l1_idx[pos1[l1_idx,2] > z_threshold]
+# l2_idx = l2_idx[pos2[l2_idx,2] > z_threshold]
+
+# SEF_label_p1 = label_subset(temp_labels[1], l1_idx, "SEF")
+# SEF_label_p2 = label_subset(temp_labels[3], l2_idx, "SEF")
+# # Combine the 2 parts
+# SEF_label = SEF_label_p1.__add__(SEF_label_p2)
+# SEF_label.name = SEF_label_p1.name
+# # Assign a color
+# SEF_label.color = matplotlib.colors.to_rgba("royalblue")
+# # Append to final list
+# labels.append(SEF_label)
+
+# ### Posterior cingulate cortex
+# label_aparc_names0 = ["G_cingul-Post-dorsal", "G_cingul-Post-ventral"]
+# temp_labels = []
+# for i in range(len(label_aparc_names0)):
+#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+#     for i2 in range(len(labels_aparc_idx)):
+#         temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
+# labels0 = []
+# for hem in range(2):
+#     PCC_p1 = temp_labels[hem]
+#     for i in range(1,len(temp_labels)//2):
+#         PCC_p2 = temp_labels[hem+2*i]
+#         PCC_p1 = PCC_p1.__add__(PCC_p2)
+#     PCC_p1.name = "PCC-{}".format(PCC_p1.hemi)
+#     labels0.append(PCC_p1)
+# # Combine the 2 hemisphere in 1 label
+# labels.append(labels0[0].__add__(labels0[1]))
+
+# ### Medial prefrontal cortex
+# # From their schematic it looks like rostral 1/4 of superior frontal gyrus
+# label_aparc_names0 = ["G_front_sup"]
+# temp_labels = []
+# for i in range(len(label_aparc_names0)):
+#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+#     for i2 in range(len(labels_aparc_idx)):
+#         temp_labels0 = labels_aparc[labels_aparc_idx[i2]].copy()
+#         temp_labels0 = temp_labels0.split(4, subjects_dir=subjects_dir)[3]
+#         temp_labels0.name = "mPFC-{}".format(temp_labels0.hemi)
+#         temp_labels.append(temp_labels0)
+# # Combine the 2 hemisphere in 1 label
+# labels.append(temp_labels[0].__add__(temp_labels[1]))
+
+# ### Angular gyrus
+# label_aparc_names0 = ["G_pariet_inf-Angular"]
+# for i in range(len(label_aparc_names0)):
+#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+#     for i2 in range(len(labels_aparc_idx)):
+#         temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
+#         temp_labels.name = "ANG-{}".format(temp_labels.hemi)
+#         labels.append(temp_labels)
+
+# ### Posterior middle frontal gyrus
+# label_aparc_names0 = ["G_front_middle"]
+# for i in range(len(label_aparc_names0)):
+#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+#     for i2 in range(len(labels_aparc_idx)):
+#         temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
+#         temp_labels = temp_labels.split(2, subjects_dir=subjects_dir)[0]
+#         temp_labels.name = "PMFG-{}".format(temp_labels.hemi)
+#         labels.append(temp_labels)
+
+# ### Inferior parietal lobule
+# # From their parcellation figure seems to be rostral angular gyrus and posterior supramarginal gyrus
+# label_aparc_names0 = ["G_pariet_inf-Angular","G_pariet_inf-Supramar"]
+# temp_labels = []
+# for i in range(len(label_aparc_names0)):
+#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+#     for i2 in range(len(labels_aparc_idx)):
+#         temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
+# # Split angular in 2 and get rostral part
+# temp_labels[0] = temp_labels[0].split(2, subjects_dir=subjects_dir)[1]
+# temp_labels[1] = temp_labels[1].split(2, subjects_dir=subjects_dir)[1]
+# # Split supramarginal in 2 and get posterior part
+# temp_labels[2] = temp_labels[2].split(2, subjects_dir=subjects_dir)[0]
+# temp_labels[3] = temp_labels[3].split(2, subjects_dir=subjects_dir)[0]
+
+# for hem in range(2):
+#     PCC_p1 = temp_labels[hem]
+#     for i in range(1,len(temp_labels)//2):
+#         PCC_p2 = temp_labels[hem+2*i]
+#         PCC_p1 = PCC_p1.__add__(PCC_p2)
+#     PCC_p1.name = "IPL-{}".format(PCC_p1.hemi)
+#     labels.append(PCC_p1)
+
+# ### Orbital gyrus
+# # From their figure it seems to correspond to orbital part of inferior frontal gyrus
+# label_aparc_names0 = ["G_front_inf-Orbital"]
+# for i in range(len(label_aparc_names0)):
+#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+#     for i2 in range(len(labels_aparc_idx)):
+#         temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
+#         temp_labels.name = "ORB-{}".format(temp_labels.hemi)
+#         labels.append(temp_labels)
+
+# ### Middle temporal gyrus
+# # From their figure it seems to only be 1/4 of MTG at the 2nd to last caudal part
+# label_aparc_names0 = ["G_temporal_middle"]
+# for i in range(len(label_aparc_names0)):
+#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+#     for i2 in range(len(labels_aparc_idx)):
+#         temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
+#         temp_labels = temp_labels.split(4, subjects_dir=subjects_dir)[1]
+#         temp_labels.name = "MTG-{}".format(temp_labels.hemi)
+#         labels.append(temp_labels)
+
+# ### Anterior middle frontal gyrus
+# label_aparc_names0 = ["G_front_middle"]
+# for i in range(len(label_aparc_names0)):
+#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+#     for i2 in range(len(labels_aparc_idx)):
+#         temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
+#         temp_labels = temp_labels.split(2, subjects_dir=subjects_dir)[1]
+#         temp_labels.name = "AMFG-{}".format(temp_labels.hemi)
+#         labels.append(temp_labels)
+
+# ### Insula
+# label_aparc_names0 = ["G_Ins_lg_and_S_cent_ins","G_insular_short"]
+# temp_labels = []
+# for i in range(len(label_aparc_names0)):
+#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+#     for i2 in range(len(labels_aparc_idx)):
+#         temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
+# for hem in range(2):
+#     PCC_p1 = temp_labels[hem]
+#     for i in range(1,len(temp_labels)//2):
+#         PCC_p2 = temp_labels[hem+2*i]
+#         PCC_p1 = PCC_p1.__add__(PCC_p2)
+#     PCC_p1.name = "INS-{}".format(PCC_p1.hemi)
+#     labels.append(PCC_p1)
+
+# ### (Dorsal) Anterior Cingulate Cortex
+# label_aparc_names0 = ["G_and_S_cingul-Ant"]
+# temp_labels = []
+# for i in range(len(label_aparc_names0)):
+#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+#     for i2 in range(len(labels_aparc_idx)):
+#         temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
+#         temp_labels[-1].name = "ACC-{}".format(temp_labels[-1].hemi)
+# # Combine the 2 hemisphere in 1 label
+# labels.append(temp_labels[0].__add__(temp_labels[1]))
+
+# ### Supramarginal Gyrus
+# label_aparc_names0 = ["G_pariet_inf-Supramar"]
+# for i in range(len(label_aparc_names0)):
+#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
+#     for i2 in range(len(labels_aparc_idx)):
+#         temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
+#         temp_labels.name = "SUP-{}".format(temp_labels.hemi)
+#         labels.append(temp_labels)
+
+# print("{} ROIs have been defined".format(len(labels)))
+
+# # # Visualize positions
+# # fig = plt.figure()
+# # ax = fig.add_subplot(111, projection="3d")
+# # for i in range(0,3):
+# #     temp_pos = temp_labels[i].pos
+# #     ax.scatter(temp_pos[:,0],temp_pos[:,1],temp_pos[:,2], marker="o", alpha=0.1)
+# # # Add to plot
+# # ax.scatter(labels[-1].pos[:,0],labels[-1].pos[:,1],labels[-1].pos[:,2], marker="o")
+
+# # # Visualize the labels
+# # # temp_l = labels_aparc[labels_aparc_idx[0]]
+# # temp_l = labels[-2]
+# # l_stc = stc[100].in_label(temp_l)
+# # l_stc.vertices
+
+# # l_stc.plot(**surfer_kwargs)
+
+# # Save the annotation file
+# with open("custom_aparc2009_Li_et_al_2022.pkl", "wb") as file:
+#     pickle.dump(labels, file)
+
+# # %% Calculate orthogonalized power envelope connectivity in source space
+# # In non-interpolated channels
+# # Updated 22/1 - 2021 to use delta = 1/81 and assumption
+# # about non-correlated and equal variance noise covariance matrix for channels
 
-# Prepare epochs for estimation of source connectivity
-source_epochs = [0]*n_subjects
-for i in range(n_subjects):
-    source_epochs[i] = final_epochs[i].copy()
-
-### Make forward solutions
-# A forward solution is first made for all individuals with no dropped channels
-# Afterwards individual forward solutions are made for subjects with bad
-# channels that were interpolated in preprocessing and these are dropped
-# First forward operator is computed using a template MRI for each dataset
-fs_dir = "/home/glia/MNE-fsaverage-data/fsaverage"
-subjects_dir = os.path.dirname(fs_dir)
-trans = "fsaverage"
-src = os.path.join(fs_dir, "bem", "fsaverage-ico-5-src.fif")
-bem = os.path.join(fs_dir, "bem", "fsaverage-5120-5120-5120-bem-sol.fif")
-
-# Read the template sourcespace
-sourcespace = mne.read_source_spaces(src)
-
-temp_idx = 0 # Index with subject that had no bad channels
-subject_eeg = source_epochs[temp_idx].copy()
-subject_eeg.set_eeg_reference(projection=True) # needed for inverse modelling
-# Make forward solution
-fwd = mne.make_forward_solution(subject_eeg.info, trans=trans, src=src,
-                            bem=bem, eeg=True, mindist=5.0, n_jobs=1)
-# Save forward operator
-fname_fwd = "./Source_fwd/fsaverage-fwd.fif"
-mne.write_forward_solution(fname_fwd, fwd, overwrite=True)
-
-# A specific forward solution is also made for each subject with bad channels
-with open("./Preprocessing/bad_ch.pkl", "rb") as file:
-   bad_ch = pickle.load(file)
-
-All_bad_ch = bad_ch
-All_drop_epochs = dropped_epochs_df
-All_dropped_ch = []
-
-Bad_ch_idx = [idx for idx, item in enumerate(All_bad_ch) if item != 0]
-Bad_ch_subjects = All_drop_epochs["Subject_ID"][Bad_ch_idx]
-# For each subject with bad channels, drop the channels and make forward operator
-for n in range(len(Bad_ch_subjects)):
-    Subject = Bad_ch_subjects.iloc[n]
-    try:
-        Subject_idx = Subject_id.index(Subject)
-        # Get unique bad channels
-        Bad_ch0 = All_bad_ch[Bad_ch_idx[n]]
-        Bad_ch1 = []
-        for i2 in range(len(Bad_ch0)):
-            if type(Bad_ch0[i2]) == list:
-                for i3 in range(len(Bad_ch0[i2])):
-                    Bad_ch1.append(Bad_ch0[i2][i3])
-            elif type(Bad_ch0[i2]) == str:
-                Bad_ch1.append(Bad_ch0[i2])
-        Bad_ch1 = np.unique(Bad_ch1)
-        # Drop the bad channels
-        source_epochs[Subject_idx].drop_channels(Bad_ch1)
-        # Save the overview of dropped channels
-        All_dropped_ch.append([Subject,Subject_idx,Bad_ch1])
-        # Make forward operator
-        subject_eeg = source_epochs[Subject_idx].copy()
-        subject_eeg.set_eeg_reference(projection=True) # needed for inverse modelling
-        # Make forward solution
-        fwd = mne.make_forward_solution(subject_eeg.info, trans=trans, src=src,
-                                    bem=bem, eeg=True, mindist=5.0, n_jobs=1)
-        # Save forward operator
-        fname_fwd = "./Source_fwd/fsaverage_{}-fwd.fif".format(Subject)
-        mne.write_forward_solution(fname_fwd, fwd, overwrite=True)
-    except:
-        print(Subject,"was already dropped")
-
-with open("./Preprocessing/All_datasets_bad_ch.pkl", "wb") as filehandle:
-    pickle.dump(All_dropped_ch, filehandle)
-
-
-# %% Load forward operators
-# Re-use for all subjects without dropped channels
-fname_fwd = "./Source_fwd/fsaverage-fwd.fif"
-fwd = mne.read_forward_solution(fname_fwd)
-
-fwd_list = [fwd]*n_subjects
-
-# Use specific forward solutions for subjects with dropped channels
-with open("./Preprocessing/All_datasets_bad_ch.pkl", "rb") as file:
-   All_dropped_ch = pickle.load(file)
-
-for i in range(len(All_dropped_ch)):
-    Subject = All_dropped_ch[i][0]
-    Subject_idx = All_dropped_ch[i][1]
-    fname_fwd = "./Source_fwd/fsaverage_{}-fwd.fif".format(Subject)
-    fwd = mne.read_forward_solution(fname_fwd)
-    fwd_list[Subject_idx] = fwd
-
-# Check the correct number of channels are present in fwd
-random_point = int(np.random.randint(0,len(All_dropped_ch)-1,1))
-assert len(fwds[All_dropped_ch[random_point][1]].ch_names) == source_epochs[All_dropped_ch[random_point][1]].info["nchan"]
-
-# %% Make parcellation
-# After mapping to source space, I end up with 20484 vertices
-# but I wanted to map to fewer sources and not many more
-# Thus I need to perform parcellation
-# Get labels for FreeSurfer "aparc" cortical parcellation (example with 74 labels/hemi - Destriuex)
-labels_aparc = mne.read_labels_from_annot("fsaverage", parc="aparc.a2009s",
-                                    subjects_dir=subjects_dir)
-labels_aparc = labels_aparc[:-2] # remove unknowns
-
-labels_aparc_names = [label.name for label in labels_aparc]
-
-# Manually adding the 31 ROIs (14-lh/rh + 3 in midline) from Toll et al, 2020
-# Making fuction to take subset of a label
-def label_subset(label, subset, name="ROI_name"):
-    label_subset = mne.Label(label.vertices[subset], label.pos[subset,:],
-                         label.values[subset], label.hemi,
-                         name = "{}-{}".format(name,label.hemi),
-                         subject = label.subject, color = None)
-    return label_subset
-
-### Visual area 1 (V1 and somatosensory cortex BA1-3)
-label_filenames = ["lh.V1.label", "rh.V1.label",
-                   "lh.BA1.label", "rh.BA1.label",
-                   "lh.BA2.label", "rh.BA2.label",
-                   "lh.BA3a.label", "rh.BA3a.label",
-                   "lh.BA3b.label", "rh.BA3b.label"]
-labels0 = [0]*len(label_filenames)
-for i, filename in enumerate(label_filenames):
-    labels0[i] = mne.read_label(os.path.join(fs_dir, "label", filename), subject="fsaverage")
-# Add V1 to final label variable
-labels = labels0[:2]
-# Rename to remove redundant hemi information
-labels[0].name = "V1-{}".format(labels[0].hemi)
-labels[1].name = "V1-{}".format(labels[1].hemi)
-# Assign a color
-labels[0].color = matplotlib.colors.to_rgba("salmon")
-labels[1].color = matplotlib.colors.to_rgba("salmon")
-# Combine Brodmann Areas for SMC. Only use vertices ones to avoid duplication error
-SMC_labels = labels0[2:]
-for hem in range(2):
-    SMC_p1 = SMC_labels[hem]
-    for i in range(1,len(SMC_labels)//2):
-        SMC_p2 = SMC_labels[hem+2*i]
-        p2_idx = np.isin(SMC_p2.vertices, SMC_p1.vertices, invert=True)
-        SMC_p21 = label_subset(SMC_p2, p2_idx, "SMC")
-        SMC_p1 = SMC_p1.__add__(SMC_p21)
-    SMC_p1.name = SMC_p21.name
-    # Assign a color
-    SMC_p1.color = matplotlib.colors.to_rgba("orange")
-    labels.append(SMC_p1)
-
-### Inferior frontal junction
-# Located at junction between inferior frontal and inferior precentral sulcus
-label_aparc_names0 = ["S_front_inf","S_precentral-inf-part"]
-temp_labels = []
-for i in range(len(label_aparc_names0)):
-    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
-    for i2 in range(len(labels_aparc_idx)):
-        temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
-
-pos1 = temp_labels[0].pos
-pos2 = temp_labels[2].pos
-distm = scipy.spatial.distance.cdist(pos1,pos2)
-# Find the closest points between the 2 ROIs
-l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.001))[0]) # q chosen to correspond to around 10% of ROI
-l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[1]) # q chosen to correspond to around 10% of ROI
-
-IFJ_label_p1 = label_subset(temp_labels[0], l1_idx, "IFJ")
-IFJ_label_p2 = label_subset(temp_labels[2], l2_idx, "IFJ")
-# Combine the 2 parts
-IFJ_label = IFJ_label_p1.__add__(IFJ_label_p2)
-IFJ_label.name = IFJ_label_p1.name
-# Assign a color
-IFJ_label.color = matplotlib.colors.to_rgba("chartreuse")
-# Append to final list
-labels.append(IFJ_label)
-
-# Do the same for the right hemisphere
-pos1 = temp_labels[1].pos
-pos2 = temp_labels[3].pos
-distm = scipy.spatial.distance.cdist(pos1,pos2)
-# Find the closest points between the 2 ROIs
-l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.00075))[0]) # q chosen to correspond to around 10% of ROI
-l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[1]) # q chosen to correspond to around 10% of ROI
-IFJ_label_p1 = label_subset(temp_labels[1], l1_idx, "IFJ")
-IFJ_label_p2 = label_subset(temp_labels[3], l2_idx, "IFJ")
-# Combine the 2 parts
-IFJ_label = IFJ_label_p1.__add__(IFJ_label_p2)
-IFJ_label.name = IFJ_label_p1.name
-# Assign a color
-IFJ_label.color = matplotlib.colors.to_rgba("chartreuse")
-# Append to final list
-labels.append(IFJ_label)
-
-### Intraparietal sulcus
-label_aparc_names0 = ["S_intrapariet_and_P_trans"]
-labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[0])]
-for i in range(len(labels_aparc_idx)):
-    labels.append(labels_aparc[labels_aparc_idx[i]].copy())
-    labels[-1].name = "IPS-{}".format(labels[-1].hemi)
-
-### Frontal eye field as intersection between middle frontal gyrus and precentral gyrus
-label_aparc_names0 = ["G_front_middle","G_precentral"]
-temp_labels = []
-for i in range(len(label_aparc_names0)):
-    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
-    for i2 in range(len(labels_aparc_idx)):
-        temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
-
-# Take 10% of middle frontal gyrus closest to precentral gyrus (most posterior)
-temp_label0 = temp_labels[0]
-G_fm_y = temp_label0.pos[:,1]
-thres_G_fm_y = np.sort(G_fm_y)[len(G_fm_y)//10]
-idx_p1 = np.where(G_fm_y<thres_G_fm_y)[0]
-FEF_label_p1 = label_subset(temp_label0, idx_p1, "FEF")
-# Take 10% closest for precentral gyrus (most anterior)
-temp_label0 = temp_labels[2]
-# I cannot only use y (anterior/posterior) but also need to restrict z-position
-G_pre_cen_z = temp_label0.pos[:,2]
-thres_G_pre_cen_z = 0.04 # visually inspected threshold
-G_pre_cen_y = temp_label0.pos[:,1]
-thres_G_pre_cen_y = np.sort(G_pre_cen_y[G_pre_cen_z>thres_G_pre_cen_z])[-len(G_pre_cen_y)//10] # notice - for anterior
-idx_p2 = np.where((G_pre_cen_y>thres_G_pre_cen_y) & (G_pre_cen_z>thres_G_pre_cen_z))[0]
-FEF_label_p2 = label_subset(temp_label0, idx_p2, "FEF")
-# Combine the 2 parts
-FEF_label = FEF_label_p1.__add__(FEF_label_p2)
-FEF_label.name = FEF_label_p1.name
-# Assign a color
-FEF_label.color = matplotlib.colors.to_rgba("aqua")
-# Append to final list
-labels.append(FEF_label)
-
-# Do the same for the right hemisphere
-temp_label0 = temp_labels[1]
-G_fm_y = temp_label0.pos[:,1]
-thres_G_fm_y = np.sort(G_fm_y)[len(G_fm_y)//10]
-idx_p1 = np.where(G_fm_y<thres_G_fm_y)[0]
-FEF_label_p1 = label_subset(temp_label0, idx_p1, "FEF")
-
-temp_label0 = temp_labels[3]
-G_pre_cen_z = temp_label0.pos[:,2]
-thres_G_pre_cen_z = 0.04 # visually inspected threshold
-G_pre_cen_y = temp_label0.pos[:,1]
-thres_G_pre_cen_y = np.sort(G_pre_cen_y[G_pre_cen_z>thres_G_pre_cen_z])[-len(G_pre_cen_y)//10] # notice - for anterior
-idx_p2 = np.where((G_pre_cen_y>thres_G_pre_cen_y) & (G_pre_cen_z>thres_G_pre_cen_z))[0]
-FEF_label_p2 = label_subset(temp_label0, idx_p2, "FEF")
-# Combine the 2 parts
-FEF_label = FEF_label_p1.__add__(FEF_label_p2)
-FEF_label.name = FEF_label_p1.name
-# Assign a color
-FEF_label.color = matplotlib.colors.to_rgba("aqua")
-# Append to final list
-labels.append(FEF_label)
-
-### Supplementary eye fields
-# Located at caudal end of frontal gyrus and upper part of paracentral sulcus
-label_aparc_names0 = ["G_and_S_paracentral","G_front_sup"]
-temp_labels = []
-for i in range(len(label_aparc_names0)):
-    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
-    for i2 in range(len(labels_aparc_idx)):
-        temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
-
-pos1 = temp_labels[0].pos
-pos2 = temp_labels[2].pos
-distm = scipy.spatial.distance.cdist(pos1,pos2)
-# Find the closest points between the 2 ROIs
-l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[0]) # q chosen to correspond to around 15% of ROI
-l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.005))[1]) # q chosen to correspond to around 10% of ROI
-# Notice that superior frontal gyrus is around 4 times bigger than paracentral
-len(l1_idx)/pos1.shape[0]
-len(l2_idx)/pos2.shape[0]
-# Only use upper part
-z_threshold = 0.06 # visually inspected
-l1_idx = l1_idx[pos1[l1_idx,2] > z_threshold]
-l2_idx = l2_idx[pos2[l2_idx,2] > z_threshold]
-
-SEF_label_p1 = label_subset(temp_labels[0], l1_idx, "SEF")
-SEF_label_p2 = label_subset(temp_labels[2], l2_idx, "SEF")
-# Combine the 2 parts
-SEF_label = SEF_label_p1.__add__(SEF_label_p2)
-SEF_label.name = SEF_label_p1.name
-# Assign a color
-SEF_label.color = matplotlib.colors.to_rgba("royalblue")
-# Append to final list
-labels.append(SEF_label)
-
-# Do the same for the right hemisphere
-pos1 = temp_labels[1].pos
-pos2 = temp_labels[3].pos
-distm = scipy.spatial.distance.cdist(pos1,pos2)
-# Find the closest points between the 2 ROIs
-l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[0]) # q chosen to correspond to around 15% of ROI
-l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.005))[1]) # q chosen to correspond to around 10% of ROI
-# Notice that superior frontal gyrus is around 4 times bigger than paracentral
-len(l1_idx)/pos1.shape[0]
-len(l2_idx)/pos2.shape[0]
-# Only use upper part
-z_threshold = 0.06 # visually inspected
-l1_idx = l1_idx[pos1[l1_idx,2] > z_threshold]
-l2_idx = l2_idx[pos2[l2_idx,2] > z_threshold]
-
-SEF_label_p1 = label_subset(temp_labels[1], l1_idx, "SEF")
-SEF_label_p2 = label_subset(temp_labels[3], l2_idx, "SEF")
-# Combine the 2 parts
-SEF_label = SEF_label_p1.__add__(SEF_label_p2)
-SEF_label.name = SEF_label_p1.name
-# Assign a color
-SEF_label.color = matplotlib.colors.to_rgba("royalblue")
-# Append to final list
-labels.append(SEF_label)
-
-### Posterior cingulate cortex
-label_aparc_names0 = ["G_cingul-Post-dorsal", "G_cingul-Post-ventral"]
-temp_labels = []
-for i in range(len(label_aparc_names0)):
-    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
-    for i2 in range(len(labels_aparc_idx)):
-        temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
-labels0 = []
-for hem in range(2):
-    PCC_p1 = temp_labels[hem]
-    for i in range(1,len(temp_labels)//2):
-        PCC_p2 = temp_labels[hem+2*i]
-        PCC_p1 = PCC_p1.__add__(PCC_p2)
-    PCC_p1.name = "PCC-{}".format(PCC_p1.hemi)
-    labels0.append(PCC_p1)
-# Combine the 2 hemisphere in 1 label
-labels.append(labels0[0].__add__(labels0[1]))
-
-### Medial prefrontal cortex
-# From their schematic it looks like rostral 1/4 of superior frontal gyrus
-label_aparc_names0 = ["G_front_sup"]
-temp_labels = []
-for i in range(len(label_aparc_names0)):
-    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
-    for i2 in range(len(labels_aparc_idx)):
-        temp_labels0 = labels_aparc[labels_aparc_idx[i2]].copy()
-        temp_labels0 = temp_labels0.split(4, subjects_dir=subjects_dir)[3]
-        temp_labels0.name = "mPFC-{}".format(temp_labels0.hemi)
-        temp_labels.append(temp_labels0)
-# Combine the 2 hemisphere in 1 label
-labels.append(temp_labels[0].__add__(temp_labels[1]))
-
-### Angular gyrus
-label_aparc_names0 = ["G_pariet_inf-Angular"]
-for i in range(len(label_aparc_names0)):
-    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
-    for i2 in range(len(labels_aparc_idx)):
-        temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
-        temp_labels.name = "ANG-{}".format(temp_labels.hemi)
-        labels.append(temp_labels)
-
-### Posterior middle frontal gyrus
-label_aparc_names0 = ["G_front_middle"]
-for i in range(len(label_aparc_names0)):
-    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
-    for i2 in range(len(labels_aparc_idx)):
-        temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
-        temp_labels = temp_labels.split(2, subjects_dir=subjects_dir)[0]
-        temp_labels.name = "PMFG-{}".format(temp_labels.hemi)
-        labels.append(temp_labels)
-
-### Inferior parietal lobule
-# From their parcellation figure seems to be rostral angular gyrus and posterior supramarginal gyrus
-label_aparc_names0 = ["G_pariet_inf-Angular","G_pariet_inf-Supramar"]
-temp_labels = []
-for i in range(len(label_aparc_names0)):
-    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
-    for i2 in range(len(labels_aparc_idx)):
-        temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
-# Split angular in 2 and get rostral part
-temp_labels[0] = temp_labels[0].split(2, subjects_dir=subjects_dir)[1]
-temp_labels[1] = temp_labels[1].split(2, subjects_dir=subjects_dir)[1]
-# Split supramarginal in 2 and get posterior part
-temp_labels[2] = temp_labels[2].split(2, subjects_dir=subjects_dir)[0]
-temp_labels[3] = temp_labels[3].split(2, subjects_dir=subjects_dir)[0]
-
-for hem in range(2):
-    PCC_p1 = temp_labels[hem]
-    for i in range(1,len(temp_labels)//2):
-        PCC_p2 = temp_labels[hem+2*i]
-        PCC_p1 = PCC_p1.__add__(PCC_p2)
-    PCC_p1.name = "IPL-{}".format(PCC_p1.hemi)
-    labels.append(PCC_p1)
-
-### Orbital gyrus
-# From their figure it seems to correspond to orbital part of inferior frontal gyrus
-label_aparc_names0 = ["G_front_inf-Orbital"]
-for i in range(len(label_aparc_names0)):
-    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
-    for i2 in range(len(labels_aparc_idx)):
-        temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
-        temp_labels.name = "ORB-{}".format(temp_labels.hemi)
-        labels.append(temp_labels)
-
-### Middle temporal gyrus
-# From their figure it seems to only be 1/4 of MTG at the 2nd to last caudal part
-label_aparc_names0 = ["G_temporal_middle"]
-for i in range(len(label_aparc_names0)):
-    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
-    for i2 in range(len(labels_aparc_idx)):
-        temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
-        temp_labels = temp_labels.split(4, subjects_dir=subjects_dir)[1]
-        temp_labels.name = "MTG-{}".format(temp_labels.hemi)
-        labels.append(temp_labels)
-
-### Anterior middle frontal gyrus
-label_aparc_names0 = ["G_front_middle"]
-for i in range(len(label_aparc_names0)):
-    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
-    for i2 in range(len(labels_aparc_idx)):
-        temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
-        temp_labels = temp_labels.split(2, subjects_dir=subjects_dir)[1]
-        temp_labels.name = "AMFG-{}".format(temp_labels.hemi)
-        labels.append(temp_labels)
-
-### Insula
-label_aparc_names0 = ["G_Ins_lg_and_S_cent_ins","G_insular_short"]
-temp_labels = []
-for i in range(len(label_aparc_names0)):
-    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
-    for i2 in range(len(labels_aparc_idx)):
-        temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
-for hem in range(2):
-    PCC_p1 = temp_labels[hem]
-    for i in range(1,len(temp_labels)//2):
-        PCC_p2 = temp_labels[hem+2*i]
-        PCC_p1 = PCC_p1.__add__(PCC_p2)
-    PCC_p1.name = "INS-{}".format(PCC_p1.hemi)
-    labels.append(PCC_p1)
-
-### (Dorsal) Anterior Cingulate Cortex
-label_aparc_names0 = ["G_and_S_cingul-Ant"]
-temp_labels = []
-for i in range(len(label_aparc_names0)):
-    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
-    for i2 in range(len(labels_aparc_idx)):
-        temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
-        temp_labels[-1].name = "ACC-{}".format(temp_labels[-1].hemi)
-# Combine the 2 hemisphere in 1 label
-labels.append(temp_labels[0].__add__(temp_labels[1]))
-
-### Supramarginal Gyrus
-label_aparc_names0 = ["G_pariet_inf-Supramar"]
-for i in range(len(label_aparc_names0)):
-    labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
-    for i2 in range(len(labels_aparc_idx)):
-        temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
-        temp_labels.name = "SUP-{}".format(temp_labels.hemi)
-        labels.append(temp_labels)
-
-print("{} ROIs have been defined".format(len(labels)))
-
-# # Visualize positions
-# fig = plt.figure()
-# ax = fig.add_subplot(111, projection="3d")
-# for i in range(0,3):
-#     temp_pos = temp_labels[i].pos
-#     ax.scatter(temp_pos[:,0],temp_pos[:,1],temp_pos[:,2], marker="o", alpha=0.1)
-# # Add to plot
-# ax.scatter(labels[-1].pos[:,0],labels[-1].pos[:,1],labels[-1].pos[:,2], marker="o")
-
-# # Visualize the labels
-# # temp_l = labels_aparc[labels_aparc_idx[0]]
-# temp_l = labels[-2]
-# l_stc = stc[100].in_label(temp_l)
-# l_stc.vertices
-
-# l_stc.plot(**surfer_kwargs)
-
-# Save the annotation file
-with open("custom_aparc2009_Li_et_al_2022.pkl", "wb") as file:
-    pickle.dump(labels, file)
-
-# %% Calculate orthogonalized power envelope connectivity in source space
-# In non-interpolated channels
-# Updated 22/1 - 2021 to use delta = 1/81 and assumption
-# about non-correlated and equal variance noise covariance matrix for channels
-
-# Load
-with open("custom_aparc2009_Li_et_al_2022.pkl", "rb") as file:
-    labels = pickle.load(file)
-label_names = [label.name for label in labels]
-
-# Define function to estimate PEC
-def PEC_estimation(x, freq_bands, sfreq=200):
-    """
-    This function takes a source timeseries signal x and performs:
-        1. Bandpass filtering
-        2. Hilbert transform to yield analytical signal
-        3. Compute all to all connectivity by iteratively computing for each pair
-            a. Orthogonalization
-            b. Computing power envelopes by squaring the signals |x|^2
-            c. Log-transform to enhance normality
-            d. Pearson's correlation between each pair
-            e. Fisher's r-to-z transform to enhance normality
-    The code has been optimized by inspiration from MNE-Python's function:
-    mne.connectivity.enelope_correlation.
+# # Load
+# with open("custom_aparc2009_Li_et_al_2022.pkl", "rb") as file:
+#     labels = pickle.load(file)
+# label_names = [label.name for label in labels]
+
+# # Define function to estimate PEC
+# def PEC_estimation(x, freq_bands, sfreq=200):
+#     """
+#     This function takes a source timeseries signal x and performs:
+#         1. Bandpass filtering
+#         2. Hilbert transform to yield analytical signal
+#         3. Compute all to all connectivity by iteratively computing for each pair
+#             a. Orthogonalization
+#             b. Computing power envelopes by squaring the signals |x|^2
+#             c. Log-transform to enhance normality
+#             d. Pearson's correlation between each pair
+#             e. Fisher's r-to-z transform to enhance normality
+#     The code has been optimized by inspiration from MNE-Python's function:
+#     mne.connectivity.enelope_correlation.
     
-    In MNE-python version < 0.22 there was a bug, but after the fix in 0.22
-    the mne function is equivalent to my implementation, although they don't
-    use epsilon but gives same result with a RuntimeWarning about log(0)
+#     In MNE-python version < 0.22 there was a bug, but after the fix in 0.22
+#     the mne function is equivalent to my implementation, although they don't
+#     use epsilon but gives same result with a RuntimeWarning about log(0)
     
-    IMPORTANT NOTE:
-        Filtering introduce artifacts for first and last timepoint
-    The values are very low, more than 1e-12 less than the others
-    If they are not removed, then they will heavily influence Pearson's
-    correlation as it is outlier sensitive
+#     IMPORTANT NOTE:
+#         Filtering introduce artifacts for first and last timepoint
+#     The values are very low, more than 1e-12 less than the others
+#     If they are not removed, then they will heavily influence Pearson's
+#     correlation as it is outlier sensitive
     
-    Inputs:
-        x - The signal in source space as np.array with shape (ROIs,Timepoints)
-        freq_bands - The frequency bands of interest as a dictionary e.g.
-                     {"alpha": [8.0, 13.0], "beta": [13.0, 30.0]}
-        sfreq - The sampling frequency in Hertz
+#     Inputs:
+#         x - The signal in source space as np.array with shape (ROIs,Timepoints)
+#         freq_bands - The frequency bands of interest as a dictionary e.g.
+#                      {"alpha": [8.0, 13.0], "beta": [13.0, 30.0]}
+#         sfreq - The sampling frequency in Hertz
     
-    Output:
-        The pairwise connectivity matrix
-    """
-    n_roi, n_timepoints = x.shape
-    n_freq_bands = len(freq_bands)
+#     Output:
+#         The pairwise connectivity matrix
+#     """
+#     n_roi, n_timepoints = x.shape
+#     n_freq_bands = len(freq_bands)
     
-    epsilon = 1e-100 # small value to prevent log(0) errors
+#     epsilon = 1e-100 # small value to prevent log(0) errors
     
-    # Filter the signal in the different freq bands
-    PEC_con0 = np.zeros((n_roi,n_roi,n_freq_bands))
-    for fname, frange in freq_bands.items():
-        fmin, fmax = [float(interval) for interval in frange]
-        signal_filtered = mne.filter.filter_data(x, sfreq, fmin, fmax,
-                                          fir_design="firwin", verbose=0)
-        # Filtering on finite signals will yield very low values for first
-        # and last timepoint, which can create outliers. E.g. 1e-29 compared to 1e-14
-        # Outlier sensitive methods, like Pearson's correlation, is therefore
-        # heavily affected and this systematic error is removed by removing
-        # the first and last timepoint
-        signal_filtered = signal_filtered[:,1:-1]
+#     # Filter the signal in the different freq bands
+#     PEC_con0 = np.zeros((n_roi,n_roi,n_freq_bands))
+#     for fname, frange in freq_bands.items():
+#         fmin, fmax = [float(interval) for interval in frange]
+#         signal_filtered = mne.filter.filter_data(x, sfreq, fmin, fmax,
+#                                           fir_design="firwin", verbose=0)
+#         # Filtering on finite signals will yield very low values for first
+#         # and last timepoint, which can create outliers. E.g. 1e-29 compared to 1e-14
+#         # Outlier sensitive methods, like Pearson's correlation, is therefore
+#         # heavily affected and this systematic error is removed by removing
+#         # the first and last timepoint
+#         signal_filtered = signal_filtered[:,1:-1]
         
-        # Hilbert transform
-        analytic_signal = scipy.signal.hilbert(signal_filtered)
-        # I will use x and y to keep track of orthogonalization
-        x0 = analytic_signal
-        # Get power envelope
-        x0_mag = np.abs(x0)
-        # Get scaled conjugate used for orthogonalization estimation
-        x0_conj_scaled = x0.conj()
-        x0_conj_scaled /= x0_mag
-        # Take square power envelope
-        PEx = np.square(x0_mag)
-        # Take log transform
-        lnPEx = np.log(PEx+epsilon)
-        # Remove mean for Pearson correlation calculation
-        lnPEx_nomean = lnPEx - np.mean(lnPEx, axis=-1, keepdims=True) # normalize each roi timeseries
-        # Get std for Pearson correlation calculation
-        lnPEx_std = np.std(lnPEx, axis=-1)
-        lnPEx_std[lnPEx_std == 0] = 1 # Prevent std = 0 problems
-        # Prepare con matrix
-        con0 = np.zeros((n_roi,n_roi))
-        for roi_r, y0 in enumerate(x0): # for each y0
-            # Calculate orthogonalized signal y with respect to x for all x
-            # Using y_ort = imag(y*x_conj/|x|)
-            # I checked the formula in temp_v3 and it works as intended
-            # I want to orthogonalize element wise for each timepoint
-            y0_ort = (y0*x0_conj_scaled).imag
-            # Here y0_ort.shape = (n_roi, n_timepoints)
-            # So y is current roi and the first axis gives each x it is orthogonalized to
-            # Take the abs to get power envelope
-            y0_ort = np.abs(y0_ort)
-            # Prevent log(0) error when calculating y_ort on y
-            y0_ort[roi_r] = 1. # this will be 0 zero after mean subtraction
-            # Take square power envelope
-            PEy = np.square(y0_ort) # squared power envelope
-            # Take log transform
-            lnPEy = np.log(PEy+epsilon)
-            # Remove mean for pearson correlation calculation
-            lnPEy_nomean = lnPEy - np.mean(lnPEy, axis=-1, keepdims=True)
-            # Get std for Pearson correlation calculation
-            lnPEy_std = np.std(lnPEy, axis=-1)
-            lnPEy_std[lnPEy_std == 0] = 1.
-            # Pearson correlation is expectation of X_nomean * Y_nomean for each time-series divided with standard deviations
-            PEC = np.mean(lnPEx_nomean*lnPEy_nomean, axis=-1)
-            PEC /= lnPEx_std
-            PEC /= lnPEy_std
-            con0[roi_r] = PEC
-        # The con0 connectivity matrix should be read as correlation between
-        # orthogonalized y (row number) and x (column number)
-        # It is not symmetrical, as cor(roi2_ort, roi1) is not cor(roi1_ort, roi2)
-        # To make it symmetrical the average of the absolute correlation
-        # of the 2 possibilities for each pair are taken
-        con0 = np.abs(con0)
-        con0 = (con0.T+con0)/2.
-        # Fisher's z transform - which is equivalent to arctanh
-        con0 = np.arctanh(con0)
-        # The diagonal is not 0 as I wanted to avoid numerical errors with log(0)
-        # and used a small epsilon value. Thus the diagonal is explicitly set to 0
+#         # Hilbert transform
+#         analytic_signal = scipy.signal.hilbert(signal_filtered)
+#         # I will use x and y to keep track of orthogonalization
+#         x0 = analytic_signal
+#         # Get power envelope
+#         x0_mag = np.abs(x0)
+#         # Get scaled conjugate used for orthogonalization estimation
+#         x0_conj_scaled = x0.conj()
+#         x0_conj_scaled /= x0_mag
+#         # Take square power envelope
+#         PEx = np.square(x0_mag)
+#         # Take log transform
+#         lnPEx = np.log(PEx+epsilon)
+#         # Remove mean for Pearson correlation calculation
+#         lnPEx_nomean = lnPEx - np.mean(lnPEx, axis=-1, keepdims=True) # normalize each roi timeseries
+#         # Get std for Pearson correlation calculation
+#         lnPEx_std = np.std(lnPEx, axis=-1)
+#         lnPEx_std[lnPEx_std == 0] = 1 # Prevent std = 0 problems
+#         # Prepare con matrix
+#         con0 = np.zeros((n_roi,n_roi))
+#         for roi_r, y0 in enumerate(x0): # for each y0
+#             # Calculate orthogonalized signal y with respect to x for all x
+#             # Using y_ort = imag(y*x_conj/|x|)
+#             # I checked the formula in temp_v3 and it works as intended
+#             # I want to orthogonalize element wise for each timepoint
+#             y0_ort = (y0*x0_conj_scaled).imag
+#             # Here y0_ort.shape = (n_roi, n_timepoints)
+#             # So y is current roi and the first axis gives each x it is orthogonalized to
+#             # Take the abs to get power envelope
+#             y0_ort = np.abs(y0_ort)
+#             # Prevent log(0) error when calculating y_ort on y
+#             y0_ort[roi_r] = 1. # this will be 0 zero after mean subtraction
+#             # Take square power envelope
+#             PEy = np.square(y0_ort) # squared power envelope
+#             # Take log transform
+#             lnPEy = np.log(PEy+epsilon)
+#             # Remove mean for pearson correlation calculation
+#             lnPEy_nomean = lnPEy - np.mean(lnPEy, axis=-1, keepdims=True)
+#             # Get std for Pearson correlation calculation
+#             lnPEy_std = np.std(lnPEy, axis=-1)
+#             lnPEy_std[lnPEy_std == 0] = 1.
+#             # Pearson correlation is expectation of X_nomean * Y_nomean for each time-series divided with standard deviations
+#             PEC = np.mean(lnPEx_nomean*lnPEy_nomean, axis=-1)
+#             PEC /= lnPEx_std
+#             PEC /= lnPEy_std
+#             con0[roi_r] = PEC
+#         # The con0 connectivity matrix should be read as correlation between
+#         # orthogonalized y (row number) and x (column number)
+#         # It is not symmetrical, as cor(roi2_ort, roi1) is not cor(roi1_ort, roi2)
+#         # To make it symmetrical the average of the absolute correlation
+#         # of the 2 possibilities for each pair are taken
+#         con0 = np.abs(con0)
+#         con0 = (con0.T+con0)/2.
+#         # Fisher's z transform - which is equivalent to arctanh
+#         con0 = np.arctanh(con0)
+#         # The diagonal is not 0 as I wanted to avoid numerical errors with log(0)
+#         # and used a small epsilon value. Thus the diagonal is explicitly set to 0
         
-        # Save to array
-        PEC_con0[:,:,list(freq_bands.keys()).index(fname)] = con0
-    return PEC_con0
-
-# Prepare variables
-Freq_Bands = {"delta": [1.25, 4.0],
-              "theta": [4.0, 8.0],
-              "alpha": [8.0, 13.0],
-              "beta": [13.0, 30.0],
-              "gamma": [30.0, 49.0]}
-n_freq_bands = len(Freq_Bands)
-n_roi = len(labels)
-
-# Get current time
-c_time1 = time.localtime()
-c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
-print(c_time1)
-
-# PEC analysis
-PEC_data_list = [0]*n_subjects
-STCs_list = [0]*n_subjects
-
-# Using inverse operator as generator interferes with concurrent processes
-# If I run it for multiple subjects I run out of ram
-# Thus concurrent processes are used inside the for loop
-def PEC_analysis(input_args): # iterable epoch number and corresponding ts
-    i2, ts = input_args
-    # Estimate PEC
-    PEC_con0 = PEC_estimation(ts, Freq_Bands, sfreq)
-    print("Finished {} out of {} epochs".format(i2+1,n_epochs))
-    return i2, PEC_con0, ts
-
-for i in range(n_subjects):
-    n_epochs, n_ch, n_timepoints = source_epochs[i].get_data().shape
-    # Use different forward solutions depending on number of channels
-    cur_subject_id = Subject_id[i]
-    fwd = fwds[i]
+#         # Save to array
+#         PEC_con0[:,:,list(freq_bands.keys()).index(fname)] = con0
+#     return PEC_con0
+
+# # Prepare variables
+# Freq_Bands = {"delta": [1.25, 4.0],
+#               "theta": [4.0, 8.0],
+#               "alpha": [8.0, 13.0],
+#               "beta": [13.0, 30.0],
+#               "gamma": [30.0, 49.0]}
+# n_freq_bands = len(Freq_Bands)
+# n_roi = len(labels)
+
+# # Get current time
+# c_time1 = time.localtime()
+# c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
+# print(c_time1)
+
+# # PEC analysis
+# PEC_data_list = [0]*n_subjects
+# STCs_list = [0]*n_subjects
+
+# # Using inverse operator as generator interferes with concurrent processes
+# # If I run it for multiple subjects I run out of ram
+# # Thus concurrent processes are used inside the for loop
+# def PEC_analysis(input_args): # iterable epoch number and corresponding ts
+#     i2, ts = input_args
+#     # Estimate PEC
+#     PEC_con0 = PEC_estimation(ts, Freq_Bands, sfreq)
+#     print("Finished {} out of {} epochs".format(i2+1,n_epochs))
+#     return i2, PEC_con0, ts
+
+# for i in range(n_subjects):
+#     n_epochs, n_ch, n_timepoints = source_epochs[i].get_data().shape
+#     # Use different forward solutions depending on number of channels
+#     cur_subject_id = Subject_id[i]
+#     fwd = fwds[i]
     
-    # Using assumption about equal variance and no correlations I make a diagonal matrix
-    # Using the default option for 0.2µV std for EEG data
-    noise_cov = mne.make_ad_hoc_cov(source_epochs[i].info, None)
+#     # Using assumption about equal variance and no correlations I make a diagonal matrix
+#     # Using the default option for 0.2µV std for EEG data
+#     noise_cov = mne.make_ad_hoc_cov(source_epochs[i].info, None)
     
-    # Make inverse operator
-    # Using default depth parameter = 0.8 and free orientation (loose = 1)
-    inverse_operator = mne.minimum_norm.make_inverse_operator(source_epochs[i].info,
-                                                              fwd, noise_cov,
-                                                              loose = 1, depth = 0.8,
-                                                              verbose = 0)
-    src_inv = inverse_operator["src"]
-    # Compute inverse solution and retrieve time series for each label
-    # Preallocate memory
-    label_ts = np.full((n_epochs,len(labels),n_timepoints),np.nan)
-    # Define regularization
-    snr = 9 # Zhang et al, 2020 used delta = 1/81, which is inverse SNR and correspond to lambda2
-    # A for loop is used for each label due to memory issues when doing all labels at the same time
-    for l in range(len(labels)):
-        stc = mne.minimum_norm.apply_inverse_epochs(source_epochs[i],inverse_operator,
-                                                    lambda2 = 1/(snr**2),
-                                                    label = labels[l],
-                                                    pick_ori = "vector",
-                                                    return_generator=False,
-                                                    method = "MNE", verbose = 0)
-        # Use PCA to reduce the 3 orthogonal directions to 1 principal direction with max power
-        # There can be ambiguity about the orientation, thus the one that
-        # is pointing most "normal", i.e. closest 90 degrees to the skull is used
-        stc_pca = [0]*len(stc)
-        for ep in range(n_epochs):
-            stc_pca[ep], pca_dir = stc[ep].project(directions="pca", src=src_inv)
-        # Get mean time series for the whole label
-        temp_label_ts = mne.extract_label_time_course(stc_pca, labels[l], src_inv, mode="mean_flip",
-                                         return_generator=False, verbose=0)
-        # Save to array
-        label_ts[:,l,:] = np.squeeze(np.array(temp_label_ts))
-        print("Finished estimating STC for {} out of {} ROIs".format(l+1,len(labels)))
+#     # Make inverse operator
+#     # Using default depth parameter = 0.8 and free orientation (loose = 1)
+#     inverse_operator = mne.minimum_norm.make_inverse_operator(source_epochs[i].info,
+#                                                               fwd, noise_cov,
+#                                                               loose = 1, depth = 0.8,
+#                                                               verbose = 0)
+#     src_inv = inverse_operator["src"]
+#     # Compute inverse solution and retrieve time series for each label
+#     # Preallocate memory
+#     label_ts = np.full((n_epochs,len(labels),n_timepoints),np.nan)
+#     # Define regularization
+#     snr = 9 # Zhang et al, 2020 used delta = 1/81, which is inverse SNR and correspond to lambda2
+#     # A for loop is used for each label due to memory issues when doing all labels at the same time
+#     for l in range(len(labels)):
+#         stc = mne.minimum_norm.apply_inverse_epochs(source_epochs[i],inverse_operator,
+#                                                     lambda2 = 1/(snr**2),
+#                                                     label = labels[l],
+#                                                     pick_ori = "vector",
+#                                                     return_generator=False,
+#                                                     method = "MNE", verbose = 0)
+#         # Use PCA to reduce the 3 orthogonal directions to 1 principal direction with max power
+#         # There can be ambiguity about the orientation, thus the one that
+#         # is pointing most "normal", i.e. closest 90 degrees to the skull is used
+#         stc_pca = [0]*len(stc)
+#         for ep in range(n_epochs):
+#             stc_pca[ep], pca_dir = stc[ep].project(directions="pca", src=src_inv)
+#         # Get mean time series for the whole label
+#         temp_label_ts = mne.extract_label_time_course(stc_pca, labels[l], src_inv, mode="mean_flip",
+#                                          return_generator=False, verbose=0)
+#         # Save to array
+#         label_ts[:,l,:] = np.squeeze(np.array(temp_label_ts))
+#         print("Finished estimating STC for {} out of {} ROIs".format(l+1,len(labels)))
     
-    # Free up memory
-    del stc
-
-    # Prepare variables
-    sfreq=source_epochs[i].info["sfreq"]
-    n_epochs = len(source_epochs[i])
-    # Estimate the pairwise PEC for each epoch
-    PEC_con_subject = np.zeros((n_epochs,n_roi,n_roi,n_freq_bands))
-    stcs0 = np.zeros((n_epochs,n_roi,int(sfreq)*4)) # 4s epochs
-    # Make list of arguments to pass into PEC_analysis using the helper func
-    args = []
-    for i2 in range(n_epochs):
-        args.append((i2,label_ts[i2]))
+#     # Free up memory
+#     del stc
+
+#     # Prepare variables
+#     sfreq=source_epochs[i].info["sfreq"]
+#     n_epochs = len(source_epochs[i])
+#     # Estimate the pairwise PEC for each epoch
+#     PEC_con_subject = np.zeros((n_epochs,n_roi,n_roi,n_freq_bands))
+#     stcs0 = np.zeros((n_epochs,n_roi,int(sfreq)*4)) # 4s epochs
+#     # Make list of arguments to pass into PEC_analysis using the helper func
+#     args = []
+#     for i2 in range(n_epochs):
+#         args.append((i2,label_ts[i2]))
     
-    with concurrent.futures.ProcessPoolExecutor(max_workers=16) as executor:
-        for i2, PEC_result, stc_result in executor.map(PEC_analysis, args): # Function and arguments
-            PEC_con_subject[i2] = PEC_result
-            stcs0[i2] = stc_result
+#     with concurrent.futures.ProcessPoolExecutor(max_workers=16) as executor:
+#         for i2, PEC_result, stc_result in executor.map(PEC_analysis, args): # Function and arguments
+#             PEC_con_subject[i2] = PEC_result
+#             stcs0[i2] = stc_result
     
-    # Save to list
-    PEC_data_list[i] = PEC_con_subject # [subject](epoch,ch,ch,freq)
-    STCs_list[i] = stcs0 # [subject][epoch,roi,timepoint]
+#     # Save to list
+#     PEC_data_list[i] = PEC_con_subject # [subject](epoch,ch,ch,freq)
+#     STCs_list[i] = stcs0 # [subject][epoch,roi,timepoint]
     
-    # Print progress
-    print("Finished {} out of {} subjects".format(i+1,n_subjects))
+#     # Print progress
+#     print("Finished {} out of {} subjects".format(i+1,n_subjects))
 
-# Get current time
-c_time2 = time.localtime()
-c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
-print("Started", c_time1, "\nFinished",c_time2)
+# # Get current time
+# c_time2 = time.localtime()
+# c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
+# print("Started", c_time1, "\nFinished",c_time2)
 
-with open(Feature_savepath+"PEC_each_epoch_drop_interpol_ch_fix_snr.pkl", "wb") as file:
-    pickle.dump(PEC_data_list, file)
-with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "wb") as file:
-    pickle.dump(STCs_list, file)
+# with open(Feature_savepath+"PEC_each_epoch_drop_interpol_ch_fix_snr.pkl", "wb") as file:
+#     pickle.dump(PEC_data_list, file)
+# with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "wb") as file:
+#     pickle.dump(STCs_list, file)
+
+# # # # Load
+# # with open(Feature_savepath+"PEC_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file:
+# #     PEC_data_list = pickle.load(file)
 
 # # # Load
-# with open(Feature_savepath+"PEC_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file:
-#     PEC_data_list = pickle.load(file)
+# # with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file:
+# #     STCs_list = pickle.load(file)
+
+# # Average over eye status
+# eye_status = list(source_epochs[0].event_id.keys())
+# n_eye_status = len(eye_status)
+# pec_data = np.zeros((n_subjects,n_eye_status,n_roi,n_roi,n_freq_bands))
+# for i in range(n_subjects):
+#     # Get indices for eyes open and closed
+#     EC_index = source_epochs[i].events[:,2] == 1
+#     EO_index = source_epochs[i].events[:,2] == 2
+#     # Average over the indices and save to array
+#     pec_data[i,0] = np.mean(PEC_data_list[i][EC_index], axis=0)
+#     pec_data[i,1] = np.mean(PEC_data_list[i][EO_index], axis=0)
+#     # Only use the lower diagonal as the diagonal should be 0 (or very small due to numerical errors)
+#     # And it is symmetric
+#     for f in range(n_freq_bands):
+#         pec_data[i,0,:,:,f] = np.tril(pec_data[i,0,:,:,f],k=-1)
+#         pec_data[i,1,:,:,f] = np.tril(pec_data[i,1,:,:,f],k=-1)
+
+# # Also save as dataframe format for feature selection
+# # Convert to Pandas dataframe
+# # The dimensions will each be a column with numbers and the last column will be the actual values
+# arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, pec_data.shape), indexing="ij"))) + [pec_data.ravel()])
+# pec_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "chx", "chy", "Freq_band", "Value"])
+# # Change from numerical coding to actual values
+# eye_status = list(source_epochs[0].event_id.keys())
+# freq_bands_name = list(Freq_Bands.keys())
+# label_names = [label.name for label in labels]
+
+# index_values = [Subject_id,eye_status,label_names,label_names,freq_bands_name]
+# for col in range(len(index_values)):
+#     col_name = pec_data_df.columns[col]
+#     for shape in range(pec_data.shape[col]): # notice not dataframe but the array
+#         pec_data_df.loc[pec_data_df.iloc[:,col] == shape,col_name]\
+#         = index_values[col][shape]
+
+# # Add group status
+# Group_status = np.array(["CTRL"]*len(pec_data_df["Subject_ID"]))
+# Group_status[np.array([i in cases for i in pec_data_df["Subject_ID"]])] = "PTSD"
+# # Add to dataframe
+# pec_data_df.insert(3, "Group_status", Group_status)
+
+# # Remove all diagonal and upper-matrix entries by removing zeros
+# pec_data_df = pec_data_df.iloc[pec_data_df["Value"].to_numpy().nonzero()]
+
+# # Save df
+# pec_data_df.to_pickle(os.path.join(Feature_savepath,"pec_data_drop_interpol_ch_df.pkl"))
+
+# # %% Sparse clustering of PEC for subtyping PTSD group
+# # They did it for both eye status together, so all data in one matrix
+# # Load PEC df
+# # pec_data_df = pd.read_pickle(os.path.join(Feature_savepath,"pec_data_df.pkl"))
+# pec_data_df = pd.read_pickle(os.path.join(Feature_savepath,"pec_data_drop_interpol_ch_df.pkl"))
+
+# # Convert to wide format
+# # Make function to add measurement column for indexing
+# def add_measurement_column(df, measurement = "Text"):
+#     dummy_variable = [measurement]*df.shape[0]
+#     df.insert(1, "Measurement", dummy_variable)
+#     return df
+# # Make function to convert column tuple to string
+# def convertTupleHeader(header):
+#     header = list(header)
+#     str = "_".join(header)
+#     return str
+
+# # Prepare overall dataframe
+# PEC_df = pd.DataFrame(Subject_id, columns = ["Subject_ID"])
+# # Add PEC
+# pec_data_df = add_measurement_column(pec_data_df, "PEC")
+# temp_df = pec_data_df.pivot_table(index="Subject_ID",columns=["Measurement",
+#                                     "Eye_status", "chx", "chy",
+#                                     "Freq_band"], dropna=True,
+#                                values="Value").reset_index(drop=True)
+# # check NaN is properly dropped and subject index is correct
+# assert pec_data_df.shape[0] == np.prod(temp_df.shape)
+# test1 = pec_data_df.iloc[np.random.randint(n_subjects),:]
+# assert test1["Value"] ==\
+#     temp_df[test1[1]][test1[2]][test1[3]][test1[5]][test1[6]][Subject_id.index(test1[0])]
+# # Fix column names
+# temp_df.columns = [convertTupleHeader(temp_df.columns[i]) for i in range(len(temp_df.columns))]
+
+# PEC_df = pd.concat([PEC_df,temp_df], axis=1)
+
+# # Add group status
+# Groups = ["CTRL", "PTSD"]
+# Group_status = np.array([0]*PEC_df.shape[0]) # CTRL = 0
+# Group_status[np.array([i in cases for i in PEC_df["Subject_ID"]])] = 1 # PTSD = 1
+# PEC_df.insert(1, "Group_status", Group_status)
+
+# # Only use PTSD patient group
+# PEC_df2 = PEC_df.loc[PEC_df["Group_status"]==1,:]
+
+# Subject_info_cols = ["Subject_ID","Group_status"]
+
+# # Use gridsearch and permutations to estimate gap statistic and use it to 
+# # determine number of clusters and sparsity s
+# # I will use 100 permutations and test 2 to 6 clusters as Zhang 2020
+# # Error when trying to determine Gap statistic for 1 cluster? Also in R package
+# max_clusters = 6
+# n_sparsity_feat = 20
+# perm_res = []
+# for k in range(1,max_clusters):
+#     # Cannot permute with 1 cluster
+#     n_clusters = k+1
+#     x = np.array(PEC_df2.copy().drop(Subject_info_cols, axis=1))
+#     perm = pysparcl.cluster.permute_modified(x, k=n_clusters, verbose=True,
+#                                              nvals=n_sparsity_feat, nperms=100)
+#     perm_res.append(perm)
+
+# # Save the results
+# with open(Feature_savepath+"PEC_drop_interpol_ch_kmeans_perm.pkl", "wb") as file:
+#     pickle.dump(perm_res, file)
 
-# # Load
+# # # Load
+# # with open(Feature_savepath+"PEC_drop_interpol_ch_kmeans_perm.pkl", "rb") as file:
+# #     perm_res = pickle.load(file)
+
+# # Convert results to array
+# perm_res_arr = np.zeros((len(perm_res)*n_sparsity_feat,4))
+# for i in range(len(perm_res)):
+#     _, gaps, sdgaps, wbounds, _ = perm_res[i].values()
+#     for i2 in range(n_sparsity_feat):
+#         perm_res_arr[20*i+i2,0] = i+2 # cluster size
+#         perm_res_arr[20*i+i2,1] = gaps[i2] # gap statistic
+#         perm_res_arr[20*i+i2,2] = sdgaps[i2] # gap statistic std
+#         perm_res_arr[20*i+i2,3] = wbounds[i2] # sparsity feature s
+
+# # For each sparsity s, determine best k using one-standard-error criterion
+# # Meaning the cluster and sparsity is chosen for the smallest value of k for a fixed s
+# # that fulfill Gap(k) >= Gap(k+1)-std(k+1)
+# def one_standard_deviation_search(gaps, std):
+#     best_gaps = np.argmax(gaps)
+#     current_gaps = gaps[best_gaps]
+#     current_std = std[best_gaps]
+#     current_gaps_idx = best_gaps
+#     while (gaps[current_gaps_idx-1] >= current_gaps - current_std):
+#         if current_gaps_idx == 0:
+#             break
+#         else:
+#             current_gaps_idx -= 1
+#             current_gaps = gaps[current_gaps_idx]
+#             current_std = std[current_gaps_idx]
+#     out = current_gaps, current_std, current_gaps_idx
+#     return out
+
+# best_ks = np.zeros((n_sparsity_feat, 2))
+# all_s = np.unique(perm_res_arr[:,3])
+# plt.figure(figsize=(12,12))
+# for i2 in range(n_sparsity_feat):
+#     current_s = all_s[i2]
+#     gaps = perm_res_arr[perm_res_arr[:,3] == current_s,1]
+#     std = perm_res_arr[perm_res_arr[:,3] == current_s,2]
+#     _, _, idx = one_standard_deviation_search(gaps, std)
+#     # Save to array
+#     best_ks[i2,0] = current_s
+#     best_ks[i2,1] = perm_res_arr[perm_res_arr[:,3] == current_s,0][idx]
+#     # Plot gap
+#     plt.errorbar(perm_res_arr[perm_res_arr[:,3] == current_s,0].astype("int"),
+#              gaps, yerr=std, capsize=5, label = np.round(current_s,3))
+# plt.title("Gap statistic for different fixed s")
+# plt.legend(loc=1)
+# plt.xlabel("Number of clusters")
+# plt.ylabel("Gap statistic")
+
+# best_k = int(scipy.stats.mode(best_ks[:,1])[0])
+
+# # Determine s using fixed k as lowest s within 1 std of max gap statistic
+# # According to Witten & Tibshirani, 2010
+# best_gaps_idx = np.argmax(perm_res_arr[perm_res_arr[:,0] == best_k,1])
+# best_gaps = perm_res_arr[perm_res_arr[:,0] == best_k,1][best_gaps_idx]
+# best_gaps_std = perm_res_arr[perm_res_arr[:,0] == best_k,2][best_gaps_idx]
+# one_std_crit = perm_res_arr[perm_res_arr[:,0] == best_k,1]>=best_gaps-best_gaps_std
+
+# best_s = np.array([perm_res_arr[perm_res_arr[:,0] == best_k,3][one_std_crit][0]])
+
+# # Perform clustering with k clusters
+# x = np.array(PEC_df2.copy().drop(Subject_info_cols, axis=1))
+# sparcl = pysparcl.cluster.kmeans(x, k=best_k, wbounds=best_s)[0]
+
+# # Save the results
+# with open(Feature_savepath+"PEC_drop_interpol_ch_sparse_kmeans.pkl", "wb") as file:
+#     pickle.dump(sparcl, file)
+
+# # Get overview of the features chosen and summarize feature type with countplot
+# nonzero_idx = sparcl["ws"].nonzero()
+# sparcl_features = PEC_df2.copy().drop(Subject_info_cols, axis=1).columns[nonzero_idx]
+
+# # Prepare variables
+# Freq_Bands = {"delta": [1.25, 4.0],
+#               "theta": [4.0, 8.0],
+#               "alpha": [8.0, 13.0],
+#               "beta": [13.0, 30.0],
+#               "gamma": [30.0, 49.0]}
+# n_freq_bands = len(Freq_Bands)
+# eye_status = list(source_epochs[0].event_id.keys())
+# n_eye_status = len(eye_status)
+
+# sparcl_feat = []
+# sparcl_feat_counts = []
+# for e in range(n_eye_status):
+#     ee = eye_status[e]
+#     for f in range(n_freq_bands):
+#         ff = list(Freq_Bands.keys())[f]
+#         temp_feat = sparcl_features[sparcl_features.str.contains(("_"+ee))]
+#         temp_feat = temp_feat[temp_feat.str.contains(("_"+ff))]
+#         # Save to list
+#         sparcl_feat.append(temp_feat)
+#         sparcl_feat_counts.append(["{}_{}".format(ee,ff), len(temp_feat)])
+
+# # Convert the list to dataframe to use in countplot
+# sparcl_feat_counts_df = pd.DataFrame(columns=["Eye_status", "Freq_band"])
+# for i in range(len(sparcl_feat_counts)):
+#     # If this feature type does not exist, then skip it
+#     if sparcl_feat_counts[i][1] == 0:
+#         continue
+#     ee, ff = sparcl_feat_counts[i][0].split("_")
+#     counts = sparcl_feat_counts[i][1]
+#     temp_df = pd.DataFrame({"Eye_status":np.repeat(ee,counts),
+#                             "Freq_band":np.repeat(ff,counts)})
+#     sparcl_feat_counts_df = sparcl_feat_counts_df.append(temp_df, ignore_index=True)
+
+# # Fix Freq_band categorical order
+# cat_type = pd.CategoricalDtype(categories=list(Freq_Bands.keys()), ordered=True)
+# sparcl_feat_counts_df["Freq_band"] = sparcl_feat_counts_df["Freq_band"].astype(cat_type)
+
+# plt.figure(figsize=(8,8))
+# g = sns.countplot(y="Freq_band", hue="Eye_status", data=sparcl_feat_counts_df)
+# plt.title("PEC Sparse K-means features")
+# plt.xlabel("Number of non-zero weights")
+# plt.ylabel("Frequency Band")
+
+# # %% Functional connectivity in source space
+# # MNE implementation of PLV and wPLI is phase across trials(epochs), e.g. for ERPs
+# # I'll use my own manually implemented PLV and wPLI across time and then average across epochs
+# # Notice that the new MNE-connectivity library now also takes phase across time
+
+# sfreq = final_epochs[0].info["sfreq"]
+# # error when using less than 5 cycles for spectrum estimation
+# # 1Hz too low with epoch length of 4, thus I changed the fmin to 1.25 for delta
+# Freq_Bands = {"delta": [1.25, 4.0],
+#               "theta": [4.0, 8.0],
+#               "alpha": [8.0, 13.0],
+#               "beta": [13.0, 30.0],
+#               "gamma": [30.0, 49.0]}
+# n_freq_bands = len(Freq_Bands)
+# freq_centers = np.array([2.5,6,10.5,21.5,40])
+# # Convert to tuples for the mne function
+# fmin=tuple([list(Freq_Bands.values())[f][0] for f in range(len(Freq_Bands))])
+# fmax=tuple([list(Freq_Bands.values())[f][1] for f in range(len(Freq_Bands))])
+
+# # Make linspace array for morlet waves
+# freq_centers = np.arange(fmin[0],fmax[-1]+0.25,0.25)
+# # Prepare Morlets
+# morlets = mne.time_frequency.tfr.morlet(sfreq,freq_centers,n_cycles=3)
+
+# # Make freqs array for indexing
+# freqs0 = [0]*n_freq_bands
+# for f in range(n_freq_bands):
+#     freqs0[f] = freq_centers[(freq_centers>=fmin[f]) & (freq_centers<=fmax[f])]
+
+# # The in-built connectivity function gives an (n_channel, n_channel, freqs output
+# # For loop over subject ID and eye status is implemented
+# n_subjects = len(Subject_id)
+# eye_status = list(final_epochs[0].event_id.keys())
+# n_eye_status = len(eye_status)
+# ch_names = final_epochs[0].info["ch_names"]
+
+# # Load source labels
+# with open("custom_aparc2009_Li_et_al_2022.pkl", "rb") as file:
+#     labels = pickle.load(file)
+# label_names = [label.name for label in labels]
+# n_sources = len(label_names)
+
+# # Connectivity methods
+# connectivity_methods = ["coh","imcoh","plv","wpli"]
+# n_con_methods=len(connectivity_methods)
+
+# # Number of pairwise ch connections
+# n_ch_connections = scipy.special.comb(n_sources,2, exact=True, repetition=False)
+
+# # Load source time series
 # with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file:
 #     STCs_list = pickle.load(file)
 
-# Average over eye status
-eye_status = list(source_epochs[0].event_id.keys())
-n_eye_status = len(eye_status)
-pec_data = np.zeros((n_subjects,n_eye_status,n_roi,n_roi,n_freq_bands))
-for i in range(n_subjects):
-    # Get indices for eyes open and closed
-    EC_index = source_epochs[i].events[:,2] == 1
-    EO_index = source_epochs[i].events[:,2] == 2
-    # Average over the indices and save to array
-    pec_data[i,0] = np.mean(PEC_data_list[i][EC_index], axis=0)
-    pec_data[i,1] = np.mean(PEC_data_list[i][EO_index], axis=0)
-    # Only use the lower diagonal as the diagonal should be 0 (or very small due to numerical errors)
-    # And it is symmetric
-    for f in range(n_freq_bands):
-        pec_data[i,0,:,:,f] = np.tril(pec_data[i,0,:,:,f],k=-1)
-        pec_data[i,1,:,:,f] = np.tril(pec_data[i,1,:,:,f],k=-1)
-
-# Also save as dataframe format for feature selection
-# Convert to Pandas dataframe
-# The dimensions will each be a column with numbers and the last column will be the actual values
-arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, pec_data.shape), indexing="ij"))) + [pec_data.ravel()])
-pec_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "chx", "chy", "Freq_band", "Value"])
-# Change from numerical coding to actual values
-eye_status = list(source_epochs[0].event_id.keys())
-freq_bands_name = list(Freq_Bands.keys())
-label_names = [label.name for label in labels]
-
-index_values = [Subject_id,eye_status,label_names,label_names,freq_bands_name]
-for col in range(len(index_values)):
-    col_name = pec_data_df.columns[col]
-    for shape in range(pec_data.shape[col]): # notice not dataframe but the array
-        pec_data_df.loc[pec_data_df.iloc[:,col] == shape,col_name]\
-        = index_values[col][shape]
-
-# Add group status
-Group_status = np.array(["CTRL"]*len(pec_data_df["Subject_ID"]))
-Group_status[np.array([i in cases for i in pec_data_df["Subject_ID"]])] = "PTSD"
-# Add to dataframe
-pec_data_df.insert(3, "Group_status", Group_status)
-
-# Remove all diagonal and upper-matrix entries by removing zeros
-pec_data_df = pec_data_df.iloc[pec_data_df["Value"].to_numpy().nonzero()]
-
-# Save df
-pec_data_df.to_pickle(os.path.join(Feature_savepath,"pec_data_drop_interpol_ch_df.pkl"))
-
-# %% Sparse clustering of PEC for subtyping PTSD group
-# They did it for both eye status together, so all data in one matrix
-# Load PEC df
-# pec_data_df = pd.read_pickle(os.path.join(Feature_savepath,"pec_data_df.pkl"))
-pec_data_df = pd.read_pickle(os.path.join(Feature_savepath,"pec_data_drop_interpol_ch_df.pkl"))
-
-# Convert to wide format
-# Make function to add measurement column for indexing
-def add_measurement_column(df, measurement = "Text"):
-    dummy_variable = [measurement]*df.shape[0]
-    df.insert(1, "Measurement", dummy_variable)
-    return df
-# Make function to convert column tuple to string
-def convertTupleHeader(header):
-    header = list(header)
-    str = "_".join(header)
-    return str
-
-# Prepare overall dataframe
-PEC_df = pd.DataFrame(Subject_id, columns = ["Subject_ID"])
-# Add PEC
-pec_data_df = add_measurement_column(pec_data_df, "PEC")
-temp_df = pec_data_df.pivot_table(index="Subject_ID",columns=["Measurement",
-                                    "Eye_status", "chx", "chy",
-                                    "Freq_band"], dropna=True,
-                               values="Value").reset_index(drop=True)
-# check NaN is properly dropped and subject index is correct
-assert pec_data_df.shape[0] == np.prod(temp_df.shape)
-test1 = pec_data_df.iloc[np.random.randint(n_subjects),:]
-assert test1["Value"] ==\
-    temp_df[test1[1]][test1[2]][test1[3]][test1[5]][test1[6]][Subject_id.index(test1[0])]
-# Fix column names
-temp_df.columns = [convertTupleHeader(temp_df.columns[i]) for i in range(len(temp_df.columns))]
-
-PEC_df = pd.concat([PEC_df,temp_df], axis=1)
-
-# Add group status
-Groups = ["CTRL", "PTSD"]
-Group_status = np.array([0]*PEC_df.shape[0]) # CTRL = 0
-Group_status[np.array([i in cases for i in PEC_df["Subject_ID"]])] = 1 # PTSD = 1
-PEC_df.insert(1, "Group_status", Group_status)
-
-# Only use PTSD patient group
-PEC_df2 = PEC_df.loc[PEC_df["Group_status"]==1,:]
-
-Subject_info_cols = ["Subject_ID","Group_status"]
-
-# Use gridsearch and permutations to estimate gap statistic and use it to 
-# determine number of clusters and sparsity s
-# I will use 100 permutations and test 2 to 6 clusters as Zhang 2020
-# Error when trying to determine Gap statistic for 1 cluster? Also in R package
-max_clusters = 6
-n_sparsity_feat = 20
-perm_res = []
-for k in range(1,max_clusters):
-    # Cannot permute with 1 cluster
-    n_clusters = k+1
-    x = np.array(PEC_df2.copy().drop(Subject_info_cols, axis=1))
-    perm = pysparcl.cluster.permute_modified(x, k=n_clusters, verbose=True,
-                                             nvals=n_sparsity_feat, nperms=100)
-    perm_res.append(perm)
-
-# Save the results
-with open(Feature_savepath+"PEC_drop_interpol_ch_kmeans_perm.pkl", "wb") as file:
-    pickle.dump(perm_res, file)
-
-# # Load
-# with open(Feature_savepath+"PEC_drop_interpol_ch_kmeans_perm.pkl", "rb") as file:
-#     perm_res = pickle.load(file)
-
-# Convert results to array
-perm_res_arr = np.zeros((len(perm_res)*n_sparsity_feat,4))
-for i in range(len(perm_res)):
-    _, gaps, sdgaps, wbounds, _ = perm_res[i].values()
-    for i2 in range(n_sparsity_feat):
-        perm_res_arr[20*i+i2,0] = i+2 # cluster size
-        perm_res_arr[20*i+i2,1] = gaps[i2] # gap statistic
-        perm_res_arr[20*i+i2,2] = sdgaps[i2] # gap statistic std
-        perm_res_arr[20*i+i2,3] = wbounds[i2] # sparsity feature s
-
-# For each sparsity s, determine best k using one-standard-error criterion
-# Meaning the cluster and sparsity is chosen for the smallest value of k for a fixed s
-# that fulfill Gap(k) >= Gap(k+1)-std(k+1)
-def one_standard_deviation_search(gaps, std):
-    best_gaps = np.argmax(gaps)
-    current_gaps = gaps[best_gaps]
-    current_std = std[best_gaps]
-    current_gaps_idx = best_gaps
-    while (gaps[current_gaps_idx-1] >= current_gaps - current_std):
-        if current_gaps_idx == 0:
-            break
-        else:
-            current_gaps_idx -= 1
-            current_gaps = gaps[current_gaps_idx]
-            current_std = std[current_gaps_idx]
-    out = current_gaps, current_std, current_gaps_idx
-    return out
-
-best_ks = np.zeros((n_sparsity_feat, 2))
-all_s = np.unique(perm_res_arr[:,3])
-plt.figure(figsize=(12,12))
-for i2 in range(n_sparsity_feat):
-    current_s = all_s[i2]
-    gaps = perm_res_arr[perm_res_arr[:,3] == current_s,1]
-    std = perm_res_arr[perm_res_arr[:,3] == current_s,2]
-    _, _, idx = one_standard_deviation_search(gaps, std)
-    # Save to array
-    best_ks[i2,0] = current_s
-    best_ks[i2,1] = perm_res_arr[perm_res_arr[:,3] == current_s,0][idx]
-    # Plot gap
-    plt.errorbar(perm_res_arr[perm_res_arr[:,3] == current_s,0].astype("int"),
-             gaps, yerr=std, capsize=5, label = np.round(current_s,3))
-plt.title("Gap statistic for different fixed s")
-plt.legend(loc=1)
-plt.xlabel("Number of clusters")
-plt.ylabel("Gap statistic")
-
-best_k = int(scipy.stats.mode(best_ks[:,1])[0])
-
-# Determine s using fixed k as lowest s within 1 std of max gap statistic
-# According to Witten & Tibshirani, 2010
-best_gaps_idx = np.argmax(perm_res_arr[perm_res_arr[:,0] == best_k,1])
-best_gaps = perm_res_arr[perm_res_arr[:,0] == best_k,1][best_gaps_idx]
-best_gaps_std = perm_res_arr[perm_res_arr[:,0] == best_k,2][best_gaps_idx]
-one_std_crit = perm_res_arr[perm_res_arr[:,0] == best_k,1]>=best_gaps-best_gaps_std
-
-best_s = np.array([perm_res_arr[perm_res_arr[:,0] == best_k,3][one_std_crit][0]])
-
-# Perform clustering with k clusters
-x = np.array(PEC_df2.copy().drop(Subject_info_cols, axis=1))
-sparcl = pysparcl.cluster.kmeans(x, k=best_k, wbounds=best_s)[0]
-
-# Save the results
-with open(Feature_savepath+"PEC_drop_interpol_ch_sparse_kmeans.pkl", "wb") as file:
-    pickle.dump(sparcl, file)
-
-# Get overview of the features chosen and summarize feature type with countplot
-nonzero_idx = sparcl["ws"].nonzero()
-sparcl_features = PEC_df2.copy().drop(Subject_info_cols, axis=1).columns[nonzero_idx]
-
-# Prepare variables
-Freq_Bands = {"delta": [1.25, 4.0],
-              "theta": [4.0, 8.0],
-              "alpha": [8.0, 13.0],
-              "beta": [13.0, 30.0],
-              "gamma": [30.0, 49.0]}
-n_freq_bands = len(Freq_Bands)
-eye_status = list(source_epochs[0].event_id.keys())
-n_eye_status = len(eye_status)
-
-sparcl_feat = []
-sparcl_feat_counts = []
-for e in range(n_eye_status):
-    ee = eye_status[e]
-    for f in range(n_freq_bands):
-        ff = list(Freq_Bands.keys())[f]
-        temp_feat = sparcl_features[sparcl_features.str.contains(("_"+ee))]
-        temp_feat = temp_feat[temp_feat.str.contains(("_"+ff))]
-        # Save to list
-        sparcl_feat.append(temp_feat)
-        sparcl_feat_counts.append(["{}_{}".format(ee,ff), len(temp_feat)])
-
-# Convert the list to dataframe to use in countplot
-sparcl_feat_counts_df = pd.DataFrame(columns=["Eye_status", "Freq_band"])
-for i in range(len(sparcl_feat_counts)):
-    # If this feature type does not exist, then skip it
-    if sparcl_feat_counts[i][1] == 0:
-        continue
-    ee, ff = sparcl_feat_counts[i][0].split("_")
-    counts = sparcl_feat_counts[i][1]
-    temp_df = pd.DataFrame({"Eye_status":np.repeat(ee,counts),
-                            "Freq_band":np.repeat(ff,counts)})
-    sparcl_feat_counts_df = sparcl_feat_counts_df.append(temp_df, ignore_index=True)
-
-# Fix Freq_band categorical order
-cat_type = pd.CategoricalDtype(categories=list(Freq_Bands.keys()), ordered=True)
-sparcl_feat_counts_df["Freq_band"] = sparcl_feat_counts_df["Freq_band"].astype(cat_type)
-
-plt.figure(figsize=(8,8))
-g = sns.countplot(y="Freq_band", hue="Eye_status", data=sparcl_feat_counts_df)
-plt.title("PEC Sparse K-means features")
-plt.xlabel("Number of non-zero weights")
-plt.ylabel("Frequency Band")
-
-# %% Functional connectivity in source space
-# MNE implementation of PLV and wPLI is phase across trials(epochs), e.g. for ERPs
-# I'll use my own manually implemented PLV and wPLI across time and then average across epochs
-# Notice that the new MNE-connectivity library now also takes phase across time
-
-sfreq = final_epochs[0].info["sfreq"]
-# error when using less than 5 cycles for spectrum estimation
-# 1Hz too low with epoch length of 4, thus I changed the fmin to 1.25 for delta
-Freq_Bands = {"delta": [1.25, 4.0],
-              "theta": [4.0, 8.0],
-              "alpha": [8.0, 13.0],
-              "beta": [13.0, 30.0],
-              "gamma": [30.0, 49.0]}
-n_freq_bands = len(Freq_Bands)
-freq_centers = np.array([2.5,6,10.5,21.5,40])
-# Convert to tuples for the mne function
-fmin=tuple([list(Freq_Bands.values())[f][0] for f in range(len(Freq_Bands))])
-fmax=tuple([list(Freq_Bands.values())[f][1] for f in range(len(Freq_Bands))])
-
-# Make linspace array for morlet waves
-freq_centers = np.arange(fmin[0],fmax[-1]+0.25,0.25)
-# Prepare Morlets
-morlets = mne.time_frequency.tfr.morlet(sfreq,freq_centers,n_cycles=3)
-
-# Make freqs array for indexing
-freqs0 = [0]*n_freq_bands
-for f in range(n_freq_bands):
-    freqs0[f] = freq_centers[(freq_centers>=fmin[f]) & (freq_centers<=fmax[f])]
-
-# The in-built connectivity function gives an (n_channel, n_channel, freqs output
-# For loop over subject ID and eye status is implemented
-n_subjects = len(Subject_id)
-eye_status = list(final_epochs[0].event_id.keys())
-n_eye_status = len(eye_status)
-ch_names = final_epochs[0].info["ch_names"]
-
-# Load source labels
-with open("custom_aparc2009_Li_et_al_2022.pkl", "rb") as file:
-    labels = pickle.load(file)
-label_names = [label.name for label in labels]
-n_sources = len(label_names)
-
-# Connectivity methods
-connectivity_methods = ["coh","imcoh","plv","wpli"]
-n_con_methods=len(connectivity_methods)
-
-# Number of pairwise ch connections
-n_ch_connections = scipy.special.comb(n_sources,2, exact=True, repetition=False)
-
-# Load source time series
-with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file:
-    STCs_list = pickle.load(file)
-
-# I made my own slightly-optimized PLV & WPLI function
-# Version 2 based on Filter + Hilbert instead of Morlets
-def calculate_PLV_WPLI_across_time(data):
-    n_ch, n_time0 = data.shape
-    x = data.copy()
-    # Filter the signal in the different freq bands
-    con_array0 = np.zeros((2,n_ch,n_ch,n_freq_bands))
-    # con_array0[con_array0==0] = np.nan
-    for fname, frange in Freq_Bands.items():
-        fmin, fmax = [float(interval) for interval in frange]
-        signal_filtered = mne.filter.filter_data(x, sfreq, fmin, fmax,
-                                          fir_design="firwin", verbose=0)
-        # Filtering on finite signals will yield very low values for first
-        # and last timepoint, which can create outliers. E.g. 1e-29 compared to 1e-14
-        # This systematic error is removed by removing the first and last timepoint
-        signal_filtered = signal_filtered[:,1:-1]
-        # Hilbert transform to get complex signal
-        analytic_signal = scipy.signal.hilbert(signal_filtered)
-        # Calculate for the lower diagnonal only as it is symmetric
-        for ch_r in range(n_ch):
-            for ch_c in range(n_ch):
-                if ch_r>ch_c:
-                    # =========================================================================
-                    # PLV over time correspond to mean across time of the absolute value of
-                    # the circular length of the relative phases. So PLV will be 1 if
-                    # the phases of 2 signals maintain a constant lag
-                    # In equational form: PLV = 1/N * |sum(e^i(phase1-phase2))|
-                    # In code: abs(mean(exp(1i*phase_diff)))
-                    # =========================================================================
-                    # The real part correspond to the amplitude and the imaginary part can be used to calculate the phase
-                    phase_diff = np.angle(analytic_signal[ch_r])-np.angle(analytic_signal[ch_c])
-                    # Convert phase difference to complex part i(phase1-phase2)
-                    phase_diff_im = 0*phase_diff+1j*phase_diff
-                    # Take the exponential, then the mean followed by absolute value
-                    PLV = np.abs(np.mean(np.exp(phase_diff_im)))
-                    # Save to array
-                    con_array0[0,ch_r,ch_c,list(Freq_Bands.keys()).index(fname)] = PLV
-                    # =========================================================================
-                    # PLI over time correspond to the sign of the sine of relative phase
-                    # differences. So PLI will be 1 if one signal is always leading or
-                    # lagging behind the other signal. But it is insensitive to changes in
-                    # relative phase, as long as it is the same signal that leads.
-                    # If 2 signals are almost in phase, they might shift between lead/lag
-                    # due to small fluctuations from noise. This would lead to unstable
-                    # estimation of "phase" synchronisation.
-                    # The wPLI tries to correct for this by weighting the PLI with the
-                    # magnitude of the lag, to attenuate noise sources giving rise to
-                    # near zero phase lag "synchronization"
-                    # In equational form: WPLI = |E{|phase_diff|*sign(phase_diff)}| / E{|phase_diff|}
-                    # =========================================================================
-                    # Calculate the magnitude of phase differences
-                    phase_diff_mag = np.abs(np.sin(phase_diff))
-                    # Calculate the signed phase difference (PLI)
-                    sign_phase_diff = np.sign(np.sin(phase_diff))
-                    # Calculate the nominator (abs and average across time)
-                    WPLI_nominator = np.abs(np.mean(phase_diff_mag*sign_phase_diff))
-                    # Calculate denominator for normalization
-                    WPLI_denom = np.mean(phase_diff_mag)
-                    # Calculate WPLI
-                    WPLI = WPLI_nominator/WPLI_denom
-                    # Save to array
-                    con_array0[1,ch_r,ch_c,list(Freq_Bands.keys()).index(fname)] = WPLI
-    return con_array0
-
-# Pre-allocatate memory
-con_data = np.zeros((n_con_methods,n_subjects,n_eye_status,n_sources,n_sources,n_freq_bands))
-n_epochs_matrix = np.zeros((n_subjects,n_eye_status))
-
-# Get current time
-c_time = time.localtime()
-c_time = time.strftime("%H:%M:%S", c_time)
-print(c_time)
-
-def connectivity_estimation(i):
-    con_data0 = np.zeros((n_con_methods,n_eye_status,n_sources,n_sources,n_freq_bands))
-    con_data0[con_data0==0] = np.nan
-    n_epochs_matrix0 = np.zeros((n_eye_status))
-    for e in range(n_eye_status):
-        ee = eye_status[e]
-        eye_idx = final_epochs[i].events[:,2] == e+1 # EC = 1 and EO = 2
-        # Get source time series
-        temp_STC = STCs_list[i][eye_idx]
-        # Calculate the coherence and ImgCoh for the given subject and eye status
-        con, freqs, times, n_epochs, n_tapers = spectral_connectivity(
-            temp_STC, method=connectivity_methods[0:2],
-            mode="multitaper", sfreq=sfreq, fmin=fmin, fmax=fmax,
-            faverage=True, verbose=0)
-        # Save the results in array
-        con_data0[0,e,:,:,:] = con[0] # coherence
-        con_data0[1,e,:,:,:] = np.abs(con[1]) # Absolute value of ImgCoh to reflect magnitude of ImgCoh
+# # I made my own slightly-optimized PLV & WPLI function
+# # Version 2 based on Filter + Hilbert instead of Morlets
+# def calculate_PLV_WPLI_across_time(data):
+#     n_ch, n_time0 = data.shape
+#     x = data.copy()
+#     # Filter the signal in the different freq bands
+#     con_array0 = np.zeros((2,n_ch,n_ch,n_freq_bands))
+#     # con_array0[con_array0==0] = np.nan
+#     for fname, frange in Freq_Bands.items():
+#         fmin, fmax = [float(interval) for interval in frange]
+#         signal_filtered = mne.filter.filter_data(x, sfreq, fmin, fmax,
+#                                           fir_design="firwin", verbose=0)
+#         # Filtering on finite signals will yield very low values for first
+#         # and last timepoint, which can create outliers. E.g. 1e-29 compared to 1e-14
+#         # This systematic error is removed by removing the first and last timepoint
+#         signal_filtered = signal_filtered[:,1:-1]
+#         # Hilbert transform to get complex signal
+#         analytic_signal = scipy.signal.hilbert(signal_filtered)
+#         # Calculate for the lower diagnonal only as it is symmetric
+#         for ch_r in range(n_ch):
+#             for ch_c in range(n_ch):
+#                 if ch_r>ch_c:
+#                     # =========================================================================
+#                     # PLV over time correspond to mean across time of the absolute value of
+#                     # the circular length of the relative phases. So PLV will be 1 if
+#                     # the phases of 2 signals maintain a constant lag
+#                     # In equational form: PLV = 1/N * |sum(e^i(phase1-phase2))|
+#                     # In code: abs(mean(exp(1i*phase_diff)))
+#                     # =========================================================================
+#                     # The real part correspond to the amplitude and the imaginary part can be used to calculate the phase
+#                     phase_diff = np.angle(analytic_signal[ch_r])-np.angle(analytic_signal[ch_c])
+#                     # Convert phase difference to complex part i(phase1-phase2)
+#                     phase_diff_im = 0*phase_diff+1j*phase_diff
+#                     # Take the exponential, then the mean followed by absolute value
+#                     PLV = np.abs(np.mean(np.exp(phase_diff_im)))
+#                     # Save to array
+#                     con_array0[0,ch_r,ch_c,list(Freq_Bands.keys()).index(fname)] = PLV
+#                     # =========================================================================
+#                     # PLI over time correspond to the sign of the sine of relative phase
+#                     # differences. So PLI will be 1 if one signal is always leading or
+#                     # lagging behind the other signal. But it is insensitive to changes in
+#                     # relative phase, as long as it is the same signal that leads.
+#                     # If 2 signals are almost in phase, they might shift between lead/lag
+#                     # due to small fluctuations from noise. This would lead to unstable
+#                     # estimation of "phase" synchronisation.
+#                     # The wPLI tries to correct for this by weighting the PLI with the
+#                     # magnitude of the lag, to attenuate noise sources giving rise to
+#                     # near zero phase lag "synchronization"
+#                     # In equational form: WPLI = |E{|phase_diff|*sign(phase_diff)}| / E{|phase_diff|}
+#                     # =========================================================================
+#                     # Calculate the magnitude of phase differences
+#                     phase_diff_mag = np.abs(np.sin(phase_diff))
+#                     # Calculate the signed phase difference (PLI)
+#                     sign_phase_diff = np.sign(np.sin(phase_diff))
+#                     # Calculate the nominator (abs and average across time)
+#                     WPLI_nominator = np.abs(np.mean(phase_diff_mag*sign_phase_diff))
+#                     # Calculate denominator for normalization
+#                     WPLI_denom = np.mean(phase_diff_mag)
+#                     # Calculate WPLI
+#                     WPLI = WPLI_nominator/WPLI_denom
+#                     # Save to array
+#                     con_array0[1,ch_r,ch_c,list(Freq_Bands.keys()).index(fname)] = WPLI
+#     return con_array0
+
+# # Pre-allocatate memory
+# con_data = np.zeros((n_con_methods,n_subjects,n_eye_status,n_sources,n_sources,n_freq_bands))
+# n_epochs_matrix = np.zeros((n_subjects,n_eye_status))
+
+# # Get current time
+# c_time = time.localtime()
+# c_time = time.strftime("%H:%M:%S", c_time)
+# print(c_time)
+
+# def connectivity_estimation(i):
+#     con_data0 = np.zeros((n_con_methods,n_eye_status,n_sources,n_sources,n_freq_bands))
+#     con_data0[con_data0==0] = np.nan
+#     n_epochs_matrix0 = np.zeros((n_eye_status))
+#     for e in range(n_eye_status):
+#         ee = eye_status[e]
+#         eye_idx = final_epochs[i].events[:,2] == e+1 # EC = 1 and EO = 2
+#         # Get source time series
+#         temp_STC = STCs_list[i][eye_idx]
+#         # Calculate the coherence and ImgCoh for the given subject and eye status
+#         con, freqs, times, n_epochs, n_tapers = spectral_connectivity(
+#             temp_STC, method=connectivity_methods[0:2],
+#             mode="multitaper", sfreq=sfreq, fmin=fmin, fmax=fmax,
+#             faverage=True, verbose=0)
+#         # Save the results in array
+#         con_data0[0,e,:,:,:] = con[0] # coherence
+#         con_data0[1,e,:,:,:] = np.abs(con[1]) # Absolute value of ImgCoh to reflect magnitude of ImgCoh
         
-        # Calculate PLV and wPLI for each epoch and then average
-        n_epochs0 = temp_STC.shape[0]
-        con_data1 = np.zeros((len(connectivity_methods[2:]),n_epochs0,n_sources,n_sources,n_freq_bands))
-        for epoch in range(n_epochs0):
-            # First the data is retrieved and epoch axis dropped
-            temp_data = temp_STC[epoch,:,:]
-            # PLV and WPLI value is calculated across timepoints in each freq band
-            PLV_WPLI_con = calculate_PLV_WPLI_across_time(temp_data)
-            # Save results
-            con_data1[0,epoch,:,:,:] = PLV_WPLI_con[0] # phase locking value
-            con_data1[1,epoch,:,:,:] = PLV_WPLI_con[1] # weighted phase lag index
-        # Take average across epochs for PLV and wPLI
-        con_data2 = np.mean(con_data1,axis=1)
-        # Save to final array
-        con_data0[2,e,:,:,:] = con_data2[0] # phase locking value
-        con_data0[3,e,:,:,:] = con_data2[1] # weighted phase lag index
-        n_epochs_matrix0[e] = n_epochs
+#         # Calculate PLV and wPLI for each epoch and then average
+#         n_epochs0 = temp_STC.shape[0]
+#         con_data1 = np.zeros((len(connectivity_methods[2:]),n_epochs0,n_sources,n_sources,n_freq_bands))
+#         for epoch in range(n_epochs0):
+#             # First the data is retrieved and epoch axis dropped
+#             temp_data = temp_STC[epoch,:,:]
+#             # PLV and WPLI value is calculated across timepoints in each freq band
+#             PLV_WPLI_con = calculate_PLV_WPLI_across_time(temp_data)
+#             # Save results
+#             con_data1[0,epoch,:,:,:] = PLV_WPLI_con[0] # phase locking value
+#             con_data1[1,epoch,:,:,:] = PLV_WPLI_con[1] # weighted phase lag index
+#         # Take average across epochs for PLV and wPLI
+#         con_data2 = np.mean(con_data1,axis=1)
+#         # Save to final array
+#         con_data0[2,e,:,:,:] = con_data2[0] # phase locking value
+#         con_data0[3,e,:,:,:] = con_data2[1] # weighted phase lag index
+#         n_epochs_matrix0[e] = n_epochs
     
-    print("{} out of {} finished".format(i+1,n_subjects))
-    return i, con_data0, n_epochs_matrix0
-
-with concurrent.futures.ProcessPoolExecutor(max_workers=16) as executor:
-    for i, con_result, n_epochs_mat in executor.map(connectivity_estimation, range(n_subjects)): # Function and arguments
-        con_data[:,i,:,:,:,:] = con_result
-        n_epochs_matrix[i] = n_epochs_mat
-
-# Get current time
-c_time = time.localtime()
-c_time = time.strftime("%H:%M:%S", c_time)
-print(c_time)
-
-# Save the results
-np.save(Feature_savepath+"Source_drop_interpol_ch_connectivity_measures_data.npy", con_data) # (con_measure,subject,eye,ch,ch,freq)
-
-# Also save as dataframe format for feature selection
-# Convert to Pandas dataframe
-# The dimensions will each be a column with numbers and the last column will be the actual values
-arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, con_data.shape), indexing="ij"))) + [con_data.ravel()])
-con_data_df = pd.DataFrame(arr, columns = ["Con_measurement", "Subject_ID", "Eye_status", "chx", "chy", "Freq_band", "Value"])
-# Change from numerical coding to actual values
-eye_status = list(final_epochs[0].event_id.keys())
-freq_bands_name = list(Freq_Bands.keys())
-
-index_values = [connectivity_methods,Subject_id,eye_status,label_names,label_names,freq_bands_name]
-for col in range(len(index_values)):
-    col_name = con_data_df.columns[col]
-    for shape in range(con_data.shape[col]): # notice not dataframe but the array
-        con_data_df.loc[con_data_df.iloc[:,col] == shape,col_name]\
-        = index_values[col][shape]
-
-# Add group status
-Group_status = np.array(["CTRL"]*len(con_data_df["Subject_ID"]))
-Group_status[np.array([i in cases for i in con_data_df["Subject_ID"]])] = "PTSD"
-# Add to dataframe
-con_data_df.insert(3, "Group_status", Group_status)
-
-# Remove all diagonal and upper-matrix entries
-con_data_df = con_data_df.iloc[con_data_df["Value"].to_numpy().nonzero()]
-
-# Save df
-con_data_df.to_pickle(os.path.join(Feature_savepath,"con_data_source_drop_interpol_df.pkl"))
-
-# %% Estimate Granger's Causality in source space
-# Load source labels
-with open("custom_aparc2009_Li_et_al_2022.pkl", "rb") as file:
-    labels = pickle.load(file)
-label_names = [label.name for label in labels]
-n_sources = len(label_names)
-
-# Load source time series
-with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file:
-    STCs_list = pickle.load(file)
-
-# Granger's causality might be influenced by volume conduction, thus working with CSD might be beneficial
-# But since I already used source modelling to alleviate this problem I will not apply CSD
-# Barrett et al, 2012 also do not apply CSD on source GC
-
-# GC assumes stationarity, thus I will test for stationarity using ADF test
-# The null hypothesis of ADF is that it has unit root, i.e. is non-stationary
-# I will test how many can reject the null hypothesis, i.e. are stationary
-
-# Due to the low numerical values in STC the ADF test is unstable, thus I multiply it to be around 1e0
-
-stationary_test_arr = [0]*n_subjects
-n_tests = [0]*n_subjects
-for i in range(n_subjects):
-    # Get data
-    data_arr = STCs_list[i]
-    # Get shape
-    n_epochs, n_channels, n_timepoints = data_arr.shape
-    # Create array for indices to print out progress
-    ep_progress_idx = np.arange(n_epochs//5,n_epochs,n_epochs//5)
-    # Calculate number of tests performed for each subject
-    n_tests[i] = n_epochs*n_channels
-    # Prepare empty array (with 2's as 0 and 1 will be used)
-    stationary_test_arr0 = np.zeros((n_epochs,n_channels))+2 # make array of 2's
-    for ep in range(n_epochs):
-        for c in range(n_channels):
-            ADF = adfuller(data_arr[ep,c,:]*1e14) # multilying with a constant does not change ADF, but helps against numerical instability
-            p_value = ADF[1]
-            if p_value < 0.05:
-                stationary_test_arr0[ep,c] = True # Stationary set to 1
-            else:
-                stationary_test_arr0[ep,c] = False # Non-stationary set to 0
-        # Print partial progress
-        if len(np.where(ep_progress_idx==ep)[0]) > 0:
-            print("Finished epoch number: {} out of {}".format(ep,n_epochs))
-    # Indices that were not tested
-    no_test_idx = np.where(stationary_test_arr0==2)[0]
-    if len(no_test_idx) > 0:
-        print("An unexpected error occurred and {} was not tested".format(no_test_idx))
-    # Save to list
-    stationary_test_arr[i] = stationary_test_arr0
-    # Print progress
-    print("Finished subject {} out of {}".format(i+1,n_subjects))
-
-with open(Stat_savepath+"Source_drop_interpol_GC_stationarity_tests.pkl", "wb") as filehandle:
-    # The data is stored as binary data stream
-    pickle.dump(stationary_test_arr, filehandle)
-
-# I used a threshold of 0.05
-# This means that on average I would expect 5% false positives among the tests that showed significance for stationarity
-ratio_stationary = [0]*n_subjects
-for i in range(n_subjects):
-    # Ratio of tests that showed stationarity
-    ratio_stationary[i] = np.sum(stationary_test_arr[i])/n_tests[i]
-
-print("Ratio of stationary time series: {0:.3f}".format(np.mean(ratio_stationary))) # 88%
-
-# The pre-processing have already ensured that most of the data fulfills the stationarity assumption.
-
-# Divide the data into eyes closed and open
-ch_names = label_names
-n_channels = len(ch_names)
-
-STC_eye_data = []
-for i in range(n_subjects):
-    # Get index for eyes open and eyes closed
-    EC_index = final_epochs[i].events[:,2] == 1
-    EO_index = final_epochs[i].events[:,2] == 2
-    # Get the data
-    EC_epoch_data = STCs_list[i][EC_index,:,:] # eye index
-    EO_epoch_data = STCs_list[i][EO_index,:,:]
-    # Save to list
-    STC_eye_data.append([EC_epoch_data, EO_epoch_data])
-
-# Make each epoch a TimeSeries object
-# Input for TimeSeries is: (ch, time)
-eye_status = list(final_epochs[0].event_id.keys())
-n_eye_status = len(eye_status)
-sfreq = final_epochs[0].info["sfreq"]
-
-Timeseries_data = []
-for i in range(n_subjects):
-    temp_list1 = []
-    for e in range(n_eye_status):
-        temp_list2 = []
-        n_epochs = STC_eye_data[i][e].shape[0]
-        for ep in range(n_epochs):
-            # Convert to TimeSeries
-            time_series = nts.TimeSeries(STC_eye_data[i][e][ep,:,:], sampling_rate=sfreq)
-            # Save the object
-            temp_list2.append(time_series)
-        # Save the timeseries across eye status
-        temp_list1.append(temp_list2)
-    # Save the timeseries across subjects
-    Timeseries_data.append(temp_list1) # output [subject][eye][epoch](ch,time)
-
-# Test multiple specified model orders of AR models, each combination has its own model
-m_orders = np.linspace(1,25,25) # the model orders tested
-m_orders = np.round(m_orders)
-n_timepoints = len(Timeseries_data[0][0][0])
-n_ch_combinations = scipy.special.comb(n_channels,2, exact=True, repetition=False)
-
-# To reduce computation time I only test representative epochs (1 from each 1 min session)
-# There will be 5 epochs from eyes closed and 5 from eyes open
-n_rep_epoch = 5
-# The subjects have different number of epochs due to dropped epochs
-gaps_trials_idx = np.load("Gaps_trials_idx.npy") # time_points between sessions
-# I convert the gap time points to epoch number used as representative epoch
-epoch_idx = np.zeros((n_subjects,n_eye_status,n_rep_epoch), dtype=int) # prepare array
-epoch_idx[:,:,0:4] = np.round(gaps_trials_idx/n_timepoints,0)-8 # take random epoch from sessions 1 to 4
-epoch_idx[:,:,4] = np.round(gaps_trials_idx[:,:,3]/n_timepoints,0)+5 # take random epoch from session 5
-
-# Checking if all epoch idx exists
-for i in range(n_subjects):
-    EC_index = final_epochs[i].events[:,2] == 1
-    EO_index = final_epochs[i].events[:,2] == 2
-    assert np.sum(EC_index) >= epoch_idx[i,0,4]
-    assert np.sum(EO_index) >= epoch_idx[i,1,4]
-
-# Prepare model order estimation
-AIC_arr = np.zeros((len(m_orders),n_subjects,n_eye_status,n_rep_epoch,n_ch_combinations))
-BIC_arr = np.zeros((len(m_orders),n_subjects,n_eye_status,n_rep_epoch,n_ch_combinations))
-
-def GC_model_order_est(i):
-    AIC_arr0 = np.zeros((len(m_orders),n_eye_status,n_rep_epoch,n_ch_combinations))
-    BIC_arr0 = np.zeros((len(m_orders),n_eye_status,n_rep_epoch,n_ch_combinations))
-    for e in range(n_eye_status):
-        n_epochs = STC_eye_data[i][e].shape[0]
-        N_total = n_timepoints*n_epochs # total number of datapoints for specific eye condition
-        for ep in range(n_rep_epoch):
-            epp = epoch_idx[i,e,ep]
-            for o in range(len(m_orders)):
-                order = int(m_orders[o])
-                # Make the Granger Causality object
-                GCA1 = nta.GrangerAnalyzer(Timeseries_data[i][e][epp-1], order=order,
-                                           n_freqs=2000)
-                for c in range(n_ch_combinations):
-                    # Retrieve error covariance matrix for all combinations
-                    ecov = np.array(list(GCA1.error_cov.values()))
-                    # Calculate AIC
-                    AIC = ntsu.akaike_information_criterion(ecov[c,:,:], p = n_channels,
-                                                            m=order, Ntotal=N_total)
-                    # Calculate BIC
-                    BIC = ntsu.bayesian_information_criterion(ecov[c,:,:], p = n_channels,
-                                                              m=order, Ntotal=N_total)
-                    # Save the information criterions
-                    AIC_arr0[o,e,ep,c] = AIC
-                    BIC_arr0[o,e,ep,c] = BIC
-
-    print("{} out of {} finished testing".format(i+1,n_subjects))
-    return i, AIC_arr0, BIC_arr0
-
-# Get current time
-c_time1 = time.localtime()
-c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
-print(c_time1)
-
-with concurrent.futures.ProcessPoolExecutor() as executor:
-    for i, AIC_result, BIC_result in executor.map(GC_model_order_est, range(n_subjects)): # Function and arguments
-        AIC_arr[:,i] = AIC_result
-        BIC_arr[:,i] = BIC_result
-
-# Get current time
-c_time2 = time.localtime()
-c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
-print("Started", c_time1, "\nCurrent Time",c_time2)
-
-# Save the AIC and BIC results
-np.save(Feature_savepath+"AIC_Source_drop_interpol_GC_model_order.npy", AIC_arr) # (m. order, subject, eye, epoch, combination)
-np.save(Feature_savepath+"BIC_Source_drop_interpol_GC_model_order.npy", BIC_arr) # (m. order, subject, eye, epoch, combination)
-
-# Load data
-AIC_arr = np.load(Feature_savepath+"AIC_Source_drop_interpol_GC_model_order.npy")
-BIC_arr = np.load(Feature_savepath+"BIC_Source_drop_interpol_GC_model_order.npy")
-
-# Average across all subjects, eye status, epochs and combinations
-plt.figure(figsize=(8,6))
-plt.plot(m_orders, np.nanmean(AIC_arr, axis=(1,2,3,4)), label="AIC")
-plt.plot(m_orders, np.nanmean(BIC_arr, axis=(1,2,3,4)), label="BIC")
-plt.title("Average information criteria value")
-plt.xlabel("Model order (Lag)")
-plt.legend()
-
-np.sum(np.isnan(AIC_arr))/AIC_arr.size # around 0.07% NaN due to non-convergence
-np.sum(np.isnan(BIC_arr))/BIC_arr.size # around 0.07% NaN due to non-convergence
-
-# If we look at each subject
-mean_subject_AIC = np.nanmean(AIC_arr, axis=(2,3,4))
-
-plt.figure(figsize=(8,6))
-for i in range(n_subjects):
-    plt.plot(m_orders, mean_subject_AIC[:,i])
-plt.title("Average AIC for each subject")
-plt.xlabel("Model order (Lag)")
-
-mean_subject_BIC = np.nanmean(BIC_arr, axis=(2,3,4))
-plt.figure(figsize=(8,6))
-for i in range(n_subjects):
-    plt.plot(m_orders, mean_subject_BIC[:,i])
-plt.title("Average BIC for each subject")
-plt.xlabel("Model order (Lag)")
-
-# We see that for many cases in BIC, it does not converge. Monotonic increasing!
-
-# We can look at the distribution of chosen order for each time series analyzed
-# I.e. I will find the minima in model order for each model
-AIC_min_arr = np.argmin(AIC_arr, axis=0)
-BIC_min_arr = np.argmin(BIC_arr, axis=0)
-
-# Plot the distributions of the model order chosen
-plt.figure(figsize=(8,6))
-sns.distplot(AIC_min_arr.reshape(-1)+1, kde=False, norm_hist=True,
-             bins=np.linspace(0.75,30.25,60), label="AIC")
-plt.ylabel("Frequency density")
-plt.xlabel("Model order")
-plt.title("AIC Model Order Estimation")
-
-plt.figure(figsize=(8,6))
-sns.distplot(BIC_min_arr.reshape(-1)+1, kde=False, norm_hist=True, color="blue",
-             bins=np.linspace(0.75,30.25,60), label="BIC")
-plt.ylabel("Frequency density")
-plt.xlabel("Model order")
-plt.title("BIC Model Order Estimation")
-# It is clear from the BIC model that most have model order 1
-# which reflect their monotonic increasing nature without convergence
-# Thus I will only use AIC
-
-# There is a bias variance trade-off with model order [Stokes & Purdon, 2017]
-# Lower order is associated with higher bias and higher order with variance
-# I will choose the model order that is chosen the most (i.e. majority voting)
-AR_order = int(np.nanquantile(AIC_min_arr.reshape(-1), q=0.5))
-# Order = 5
-
-# Calculate Granger Causality for each subject, eye and epoch
-Freq_Bands = {"delta": [1.25, 4.0],
-              "theta": [4.0, 8.0],
-              "alpha": [8.0, 13.0],
-              "beta": [13.0, 30.0],
-              "gamma": [30.0, 49.0]}
-n_freq_bands = len(Freq_Bands)
-
-# Pre-allocate memory
-GC_data = np.zeros((2,n_subjects,n_eye_status,n_channels,n_channels,n_freq_bands))
+#     print("{} out of {} finished".format(i+1,n_subjects))
+#     return i, con_data0, n_epochs_matrix0
+
+# with concurrent.futures.ProcessPoolExecutor(max_workers=16) as executor:
+#     for i, con_result, n_epochs_mat in executor.map(connectivity_estimation, range(n_subjects)): # Function and arguments
+#         con_data[:,i,:,:,:,:] = con_result
+#         n_epochs_matrix[i] = n_epochs_mat
+
+# # Get current time
+# c_time = time.localtime()
+# c_time = time.strftime("%H:%M:%S", c_time)
+# print(c_time)
+
+# # Save the results
+# np.save(Feature_savepath+"Source_drop_interpol_ch_connectivity_measures_data.npy", con_data) # (con_measure,subject,eye,ch,ch,freq)
+
+# # Also save as dataframe format for feature selection
+# # Convert to Pandas dataframe
+# # The dimensions will each be a column with numbers and the last column will be the actual values
+# arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, con_data.shape), indexing="ij"))) + [con_data.ravel()])
+# con_data_df = pd.DataFrame(arr, columns = ["Con_measurement", "Subject_ID", "Eye_status", "chx", "chy", "Freq_band", "Value"])
+# # Change from numerical coding to actual values
+# eye_status = list(final_epochs[0].event_id.keys())
+# freq_bands_name = list(Freq_Bands.keys())
+
+# index_values = [connectivity_methods,Subject_id,eye_status,label_names,label_names,freq_bands_name]
+# for col in range(len(index_values)):
+#     col_name = con_data_df.columns[col]
+#     for shape in range(con_data.shape[col]): # notice not dataframe but the array
+#         con_data_df.loc[con_data_df.iloc[:,col] == shape,col_name]\
+#         = index_values[col][shape]
+
+# # Add group status
+# Group_status = np.array(["CTRL"]*len(con_data_df["Subject_ID"]))
+# Group_status[np.array([i in cases for i in con_data_df["Subject_ID"]])] = "PTSD"
+# # Add to dataframe
+# con_data_df.insert(3, "Group_status", Group_status)
+
+# # Remove all diagonal and upper-matrix entries
+# con_data_df = con_data_df.iloc[con_data_df["Value"].to_numpy().nonzero()]
+
+# # Save df
+# con_data_df.to_pickle(os.path.join(Feature_savepath,"con_data_source_drop_interpol_df.pkl"))
+
+# # %% Estimate Granger's Causality in source space
+# # Load source labels
+# with open("custom_aparc2009_Li_et_al_2022.pkl", "rb") as file:
+#     labels = pickle.load(file)
+# label_names = [label.name for label in labels]
+# n_sources = len(label_names)
+
+# # Load source time series
+# with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file:
+#     STCs_list = pickle.load(file)
 
-def GC_analysis(i):
-    GC_data0 = np.zeros((2,n_eye_status,n_channels,n_channels,n_freq_bands))
-    for e in range(n_eye_status):
-        n_epochs = STC_eye_data[i][e].shape[0]
-        # Make temporary array to save GC for each epoch
-        temp_GC_data = np.zeros((2,n_epochs,n_channels,n_channels,n_freq_bands))
-        for ep in range(n_epochs):
-            # Fit the AR model
-            GCA = nta.GrangerAnalyzer(Timeseries_data[i][e][ep], order=AR_order,
-                                       n_freqs=int(800)) # n_Freq=800 correspond to step of 0.25Hz, the same as multitaper for power estimation
-            for f in range(n_freq_bands):
-                # Define lower and upper band
-                f_lb = list(Freq_Bands.values())[f][0]
-                f_ub = list(Freq_Bands.values())[f][1]
-                # Get index corresponding to the frequency bands of interest
-                freq_idx_G = np.where((GCA.frequencies >= f_lb) * (GCA.frequencies < f_ub))[0]
-                # Calculcate Granger causality quantities
-                g_xy = np.mean(GCA.causality_xy[:, :, freq_idx_G], -1) # avg on last dimension
-                g_yx = np.mean(GCA.causality_yx[:, :, freq_idx_G], -1) # avg on last dimension
-                # Transpose to use same format as con_measurement and save
-                temp_GC_data[0,ep,:,:,f] = np.transpose(g_xy)
-                temp_GC_data[1,ep,:,:,f] = np.transpose(g_yx)
+# # Granger's causality might be influenced by volume conduction, thus working with CSD might be beneficial
+# # But since I already used source modelling to alleviate this problem I will not apply CSD
+# # Barrett et al, 2012 also do not apply CSD on source GC
+
+# # GC assumes stationarity, thus I will test for stationarity using ADF test
+# # The null hypothesis of ADF is that it has unit root, i.e. is non-stationary
+# # I will test how many can reject the null hypothesis, i.e. are stationary
+
+# # Due to the low numerical values in STC the ADF test is unstable, thus I multiply it to be around 1e0
+
+# stationary_test_arr = [0]*n_subjects
+# n_tests = [0]*n_subjects
+# for i in range(n_subjects):
+#     # Get data
+#     data_arr = STCs_list[i]
+#     # Get shape
+#     n_epochs, n_channels, n_timepoints = data_arr.shape
+#     # Create array for indices to print out progress
+#     ep_progress_idx = np.arange(n_epochs//5,n_epochs,n_epochs//5)
+#     # Calculate number of tests performed for each subject
+#     n_tests[i] = n_epochs*n_channels
+#     # Prepare empty array (with 2's as 0 and 1 will be used)
+#     stationary_test_arr0 = np.zeros((n_epochs,n_channels))+2 # make array of 2's
+#     for ep in range(n_epochs):
+#         for c in range(n_channels):
+#             ADF = adfuller(data_arr[ep,c,:]*1e14) # multilying with a constant does not change ADF, but helps against numerical instability
+#             p_value = ADF[1]
+#             if p_value < 0.05:
+#                 stationary_test_arr0[ep,c] = True # Stationary set to 1
+#             else:
+#                 stationary_test_arr0[ep,c] = False # Non-stationary set to 0
+#         # Print partial progress
+#         if len(np.where(ep_progress_idx==ep)[0]) > 0:
+#             print("Finished epoch number: {} out of {}".format(ep,n_epochs))
+#     # Indices that were not tested
+#     no_test_idx = np.where(stationary_test_arr0==2)[0]
+#     if len(no_test_idx) > 0:
+#         print("An unexpected error occurred and {} was not tested".format(no_test_idx))
+#     # Save to list
+#     stationary_test_arr[i] = stationary_test_arr0
+#     # Print progress
+#     print("Finished subject {} out of {}".format(i+1,n_subjects))
+
+# with open(Stat_savepath+"Source_drop_interpol_GC_stationarity_tests.pkl", "wb") as filehandle:
+#     # The data is stored as binary data stream
+#     pickle.dump(stationary_test_arr, filehandle)
+
+# # I used a threshold of 0.05
+# # This means that on average I would expect 5% false positives among the tests that showed significance for stationarity
+# ratio_stationary = [0]*n_subjects
+# for i in range(n_subjects):
+#     # Ratio of tests that showed stationarity
+#     ratio_stationary[i] = np.sum(stationary_test_arr[i])/n_tests[i]
+
+# print("Ratio of stationary time series: {0:.3f}".format(np.mean(ratio_stationary))) # 88%
+
+# # The pre-processing have already ensured that most of the data fulfills the stationarity assumption.
+
+# # Divide the data into eyes closed and open
+# ch_names = label_names
+# n_channels = len(ch_names)
+
+# STC_eye_data = []
+# for i in range(n_subjects):
+#     # Get index for eyes open and eyes closed
+#     EC_index = final_epochs[i].events[:,2] == 1
+#     EO_index = final_epochs[i].events[:,2] == 2
+#     # Get the data
+#     EC_epoch_data = STCs_list[i][EC_index,:,:] # eye index
+#     EO_epoch_data = STCs_list[i][EO_index,:,:]
+#     # Save to list
+#     STC_eye_data.append([EC_epoch_data, EO_epoch_data])
+
+# # Make each epoch a TimeSeries object
+# # Input for TimeSeries is: (ch, time)
+# eye_status = list(final_epochs[0].event_id.keys())
+# n_eye_status = len(eye_status)
+# sfreq = final_epochs[0].info["sfreq"]
+
+# Timeseries_data = []
+# for i in range(n_subjects):
+#     temp_list1 = []
+#     for e in range(n_eye_status):
+#         temp_list2 = []
+#         n_epochs = STC_eye_data[i][e].shape[0]
+#         for ep in range(n_epochs):
+#             # Convert to TimeSeries
+#             time_series = nts.TimeSeries(STC_eye_data[i][e][ep,:,:], sampling_rate=sfreq)
+#             # Save the object
+#             temp_list2.append(time_series)
+#         # Save the timeseries across eye status
+#         temp_list1.append(temp_list2)
+#     # Save the timeseries across subjects
+#     Timeseries_data.append(temp_list1) # output [subject][eye][epoch](ch,time)
+
+# # Test multiple specified model orders of AR models, each combination has its own model
+# m_orders = np.linspace(1,25,25) # the model orders tested
+# m_orders = np.round(m_orders)
+# n_timepoints = len(Timeseries_data[0][0][0])
+# n_ch_combinations = scipy.special.comb(n_channels,2, exact=True, repetition=False)
+
+# # To reduce computation time I only test representative epochs (1 from each 1 min session)
+# # There will be 5 epochs from eyes closed and 5 from eyes open
+# n_rep_epoch = 5
+# # The subjects have different number of epochs due to dropped epochs
+# gaps_trials_idx = np.load("Gaps_trials_idx.npy") # time_points between sessions
+# # I convert the gap time points to epoch number used as representative epoch
+# epoch_idx = np.zeros((n_subjects,n_eye_status,n_rep_epoch), dtype=int) # prepare array
+# epoch_idx[:,:,0:4] = np.round(gaps_trials_idx/n_timepoints,0)-8 # take random epoch from sessions 1 to 4
+# epoch_idx[:,:,4] = np.round(gaps_trials_idx[:,:,3]/n_timepoints,0)+5 # take random epoch from session 5
+
+# # Checking if all epoch idx exists
+# for i in range(n_subjects):
+#     EC_index = final_epochs[i].events[:,2] == 1
+#     EO_index = final_epochs[i].events[:,2] == 2
+#     assert np.sum(EC_index) >= epoch_idx[i,0,4]
+#     assert np.sum(EO_index) >= epoch_idx[i,1,4]
+
+# # Prepare model order estimation
+# AIC_arr = np.zeros((len(m_orders),n_subjects,n_eye_status,n_rep_epoch,n_ch_combinations))
+# BIC_arr = np.zeros((len(m_orders),n_subjects,n_eye_status,n_rep_epoch,n_ch_combinations))
+
+# def GC_model_order_est(i):
+#     AIC_arr0 = np.zeros((len(m_orders),n_eye_status,n_rep_epoch,n_ch_combinations))
+#     BIC_arr0 = np.zeros((len(m_orders),n_eye_status,n_rep_epoch,n_ch_combinations))
+#     for e in range(n_eye_status):
+#         n_epochs = STC_eye_data[i][e].shape[0]
+#         N_total = n_timepoints*n_epochs # total number of datapoints for specific eye condition
+#         for ep in range(n_rep_epoch):
+#             epp = epoch_idx[i,e,ep]
+#             for o in range(len(m_orders)):
+#                 order = int(m_orders[o])
+#                 # Make the Granger Causality object
+#                 GCA1 = nta.GrangerAnalyzer(Timeseries_data[i][e][epp-1], order=order,
+#                                            n_freqs=2000)
+#                 for c in range(n_ch_combinations):
+#                     # Retrieve error covariance matrix for all combinations
+#                     ecov = np.array(list(GCA1.error_cov.values()))
+#                     # Calculate AIC
+#                     AIC = ntsu.akaike_information_criterion(ecov[c,:,:], p = n_channels,
+#                                                             m=order, Ntotal=N_total)
+#                     # Calculate BIC
+#                     BIC = ntsu.bayesian_information_criterion(ecov[c,:,:], p = n_channels,
+#                                                               m=order, Ntotal=N_total)
+#                     # Save the information criterions
+#                     AIC_arr0[o,e,ep,c] = AIC
+#                     BIC_arr0[o,e,ep,c] = BIC
+
+#     print("{} out of {} finished testing".format(i+1,n_subjects))
+#     return i, AIC_arr0, BIC_arr0
+
+# # Get current time
+# c_time1 = time.localtime()
+# c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
+# print(c_time1)
+
+# with concurrent.futures.ProcessPoolExecutor() as executor:
+#     for i, AIC_result, BIC_result in executor.map(GC_model_order_est, range(n_subjects)): # Function and arguments
+#         AIC_arr[:,i] = AIC_result
+#         BIC_arr[:,i] = BIC_result
+
+# # Get current time
+# c_time2 = time.localtime()
+# c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
+# print("Started", c_time1, "\nCurrent Time",c_time2)
+
+# # Save the AIC and BIC results
+# np.save(Feature_savepath+"AIC_Source_drop_interpol_GC_model_order.npy", AIC_arr) # (m. order, subject, eye, epoch, combination)
+# np.save(Feature_savepath+"BIC_Source_drop_interpol_GC_model_order.npy", BIC_arr) # (m. order, subject, eye, epoch, combination)
+
+# # Load data
+# AIC_arr = np.load(Feature_savepath+"AIC_Source_drop_interpol_GC_model_order.npy")
+# BIC_arr = np.load(Feature_savepath+"BIC_Source_drop_interpol_GC_model_order.npy")
+
+# # Average across all subjects, eye status, epochs and combinations
+# plt.figure(figsize=(8,6))
+# plt.plot(m_orders, np.nanmean(AIC_arr, axis=(1,2,3,4)), label="AIC")
+# plt.plot(m_orders, np.nanmean(BIC_arr, axis=(1,2,3,4)), label="BIC")
+# plt.title("Average information criteria value")
+# plt.xlabel("Model order (Lag)")
+# plt.legend()
+
+# np.sum(np.isnan(AIC_arr))/AIC_arr.size # around 0.07% NaN due to non-convergence
+# np.sum(np.isnan(BIC_arr))/BIC_arr.size # around 0.07% NaN due to non-convergence
+
+# # If we look at each subject
+# mean_subject_AIC = np.nanmean(AIC_arr, axis=(2,3,4))
+
+# plt.figure(figsize=(8,6))
+# for i in range(n_subjects):
+#     plt.plot(m_orders, mean_subject_AIC[:,i])
+# plt.title("Average AIC for each subject")
+# plt.xlabel("Model order (Lag)")
+
+# mean_subject_BIC = np.nanmean(BIC_arr, axis=(2,3,4))
+# plt.figure(figsize=(8,6))
+# for i in range(n_subjects):
+#     plt.plot(m_orders, mean_subject_BIC[:,i])
+# plt.title("Average BIC for each subject")
+# plt.xlabel("Model order (Lag)")
+
+# # We see that for many cases in BIC, it does not converge. Monotonic increasing!
+
+# # We can look at the distribution of chosen order for each time series analyzed
+# # I.e. I will find the minima in model order for each model
+# AIC_min_arr = np.argmin(AIC_arr, axis=0)
+# BIC_min_arr = np.argmin(BIC_arr, axis=0)
+
+# # Plot the distributions of the model order chosen
+# plt.figure(figsize=(8,6))
+# sns.distplot(AIC_min_arr.reshape(-1)+1, kde=False, norm_hist=True,
+#              bins=np.linspace(0.75,30.25,60), label="AIC")
+# plt.ylabel("Frequency density")
+# plt.xlabel("Model order")
+# plt.title("AIC Model Order Estimation")
+
+# plt.figure(figsize=(8,6))
+# sns.distplot(BIC_min_arr.reshape(-1)+1, kde=False, norm_hist=True, color="blue",
+#              bins=np.linspace(0.75,30.25,60), label="BIC")
+# plt.ylabel("Frequency density")
+# plt.xlabel("Model order")
+# plt.title("BIC Model Order Estimation")
+# # It is clear from the BIC model that most have model order 1
+# # which reflect their monotonic increasing nature without convergence
+# # Thus I will only use AIC
+
+# # There is a bias variance trade-off with model order [Stokes & Purdon, 2017]
+# # Lower order is associated with higher bias and higher order with variance
+# # I will choose the model order that is chosen the most (i.e. majority voting)
+# AR_order = int(np.nanquantile(AIC_min_arr.reshape(-1), q=0.5))
+# # Order = 5
+
+# # Calculate Granger Causality for each subject, eye and epoch
+# Freq_Bands = {"delta": [1.25, 4.0],
+#               "theta": [4.0, 8.0],
+#               "alpha": [8.0, 13.0],
+#               "beta": [13.0, 30.0],
+#               "gamma": [30.0, 49.0]}
+# n_freq_bands = len(Freq_Bands)
+
+# # Pre-allocate memory
+# GC_data = np.zeros((2,n_subjects,n_eye_status,n_channels,n_channels,n_freq_bands))
+
+# def GC_analysis(i):
+#     GC_data0 = np.zeros((2,n_eye_status,n_channels,n_channels,n_freq_bands))
+#     for e in range(n_eye_status):
+#         n_epochs = STC_eye_data[i][e].shape[0]
+#         # Make temporary array to save GC for each epoch
+#         temp_GC_data = np.zeros((2,n_epochs,n_channels,n_channels,n_freq_bands))
+#         for ep in range(n_epochs):
+#             # Fit the AR model
+#             GCA = nta.GrangerAnalyzer(Timeseries_data[i][e][ep], order=AR_order,
+#                                        n_freqs=int(800)) # n_Freq=800 correspond to step of 0.25Hz, the same as multitaper for power estimation
+#             for f in range(n_freq_bands):
+#                 # Define lower and upper band
+#                 f_lb = list(Freq_Bands.values())[f][0]
+#                 f_ub = list(Freq_Bands.values())[f][1]
+#                 # Get index corresponding to the frequency bands of interest
+#                 freq_idx_G = np.where((GCA.frequencies >= f_lb) * (GCA.frequencies < f_ub))[0]
+#                 # Calculcate Granger causality quantities
+#                 g_xy = np.mean(GCA.causality_xy[:, :, freq_idx_G], -1) # avg on last dimension
+#                 g_yx = np.mean(GCA.causality_yx[:, :, freq_idx_G], -1) # avg on last dimension
+#                 # Transpose to use same format as con_measurement and save
+#                 temp_GC_data[0,ep,:,:,f] = np.transpose(g_xy)
+#                 temp_GC_data[1,ep,:,:,f] = np.transpose(g_yx)
         
-        # Average over epochs for each person, eye condition, direction and frequency band
-        temp_GC_epoch_mean = np.nanmean(temp_GC_data, axis=1) # sometimes Log(Sxx/xx_auto_component) is nan
-        # Save to array
-        GC_data0[:,e,:,:,:] = temp_GC_epoch_mean
+#         # Average over epochs for each person, eye condition, direction and frequency band
+#         temp_GC_epoch_mean = np.nanmean(temp_GC_data, axis=1) # sometimes Log(Sxx/xx_auto_component) is nan
+#         # Save to array
+#         GC_data0[:,e,:,:,:] = temp_GC_epoch_mean
         
-    print("{} out of {} finished analyzing".format(i+1,n_subjects))
-    return i, GC_data0
+#     print("{} out of {} finished analyzing".format(i+1,n_subjects))
+#     return i, GC_data0
 
-# Get current time
-c_time1 = time.localtime()
-c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
-print(c_time1)
+# # Get current time
+# c_time1 = time.localtime()
+# c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
+# print(c_time1)
 
-with concurrent.futures.ProcessPoolExecutor() as executor:
-    for i, GC_result in executor.map(GC_analysis, range(n_subjects)): # Function and arguments
-        GC_data[:,i] = GC_result
+# with concurrent.futures.ProcessPoolExecutor() as executor:
+#     for i, GC_result in executor.map(GC_analysis, range(n_subjects)): # Function and arguments
+#         GC_data[:,i] = GC_result
         
-# Get current time
-c_time2 = time.localtime()
-c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
-print("Started", c_time1, "\nCurrent Time",c_time2)
-
-# Output: GC_data (g_xy/g_yx, subject, eye, chx, chy, freq)
-# Notice that for g_xy ([0,...]) it means "chy" Granger causes "chx"
-# and for g_yx ([1,...]) it means "chx" Granger causes "chy"
-# This is due to the transposing which flipped the results on to the lower-part of the diagonal
-
-# Save the Granger_Causality data
-np.save(Feature_savepath+"Source_drop_interpol_GrangerCausality_data.npy", GC_data)
-
-# Theoretically negative GC values should be impossible, but in practice
-# they can still occur due to problems with model fitting (see Stokes & Purdon, 2017)
-print("{:.3f}% negative GC values".\
-      format(np.sum(GC_data[~np.isnan(GC_data)]<0)/np.sum(~np.isnan(GC_data))*100)) # 0.08% negative values
-# These values cannot be interpreted, but seems to occur mostly for true non-causal connections
-# Hence I set them to 0
-with np.errstate(invalid="ignore"): # invalid number refers to np.nan, which will be set to False for comparisons
-    GC_data[(GC_data<0)] = 0
-
-# Save as dataframe for further processing with other features
-# Convert to Pandas dataframe
-# The dimensions will each be a column with numbers and the last column will be the actual values
-arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, GC_data.shape), indexing="ij"))) + [GC_data.ravel()])
-GC_data_df = pd.DataFrame(arr, columns = ["GC_direction", "Subject_ID", "Eye_status", "chx", "chy", "Freq_band", "Value"])
-# Change from numerical coding to actual values
-eye_status = list(final_epochs[0].event_id.keys())
-freq_bands_name = list(Freq_Bands.keys())
-GC_directions_info = ["chy -> chx", "chx -> chy"]
-
-index_values = [GC_directions_info,Subject_id,eye_status,ch_names,ch_names,freq_bands_name]
-for col in range(len(index_values)):
-    col_name = GC_data_df.columns[col]
-    for shape in range(GC_data.shape[col]): # notice not dataframe but the array
-        GC_data_df.loc[GC_data_df.iloc[:,col] == shape,col_name]\
-        = index_values[col][shape]
-
-# Add group status
-Group_status = np.array(["CTRL"]*len(GC_data_df["Subject_ID"]))
-Group_status[np.array([i in cases for i in GC_data_df["Subject_ID"]])] = "PTSD"
-# Add to dataframe
-GC_data_df.insert(3, "Group_status", Group_status)
-
-# Remove all nan (including diagonal and upper-matrix entries)
-GC_data_df = GC_data_df.iloc[np.invert(np.isnan(GC_data_df["Value"].to_numpy()))]
-
-# Swap ch values for GC_direction chy -> chx (so it is always chx -> chy)
-tempchy = GC_data_df[GC_data_df["GC_direction"] == "chy -> chx"]["chy"] # save chy
-GC_data_df.loc[GC_data_df["GC_direction"] == "chy -> chx","chy"] =\
-             GC_data_df.loc[GC_data_df["GC_direction"] == "chy -> chx","chx"] # overwrite old chy
-GC_data_df.loc[GC_data_df["GC_direction"] == "chy -> chx","chx"] = tempchy # overwrite chx
-
-# Drop the GC_direction column
-GC_data_df = GC_data_df.drop("GC_direction", axis=1)
-
-# Save df
-GC_data_df.to_pickle(os.path.join(Feature_savepath,"GC_data_source_drop_interpol_df.pkl"))
-
-# Testing if df was formatted correctly
-expected_GC_values = n_subjects*n_eye_status*n_ch_combinations*n_freq_bands*2 # 2 because it is bidirectional
-assert GC_data_df.shape[0] == expected_GC_values
-# Testing a random GC value
-random_connection = np.random.randint(0,GC_data_df.shape[0])
-test_connection = GC_data_df.iloc[random_connection,:]
-i = np.where(Subject_id==test_connection["Subject_ID"])[0]
-e = np.where(np.array(eye_status)==test_connection["Eye_status"])[0]
-chx = np.where(np.array(ch_names)==test_connection["chx"])[0]
-chy = np.where(np.array(ch_names)==test_connection["chy"])[0]
-f = np.where(np.array(freq_bands_name)==test_connection["Freq_band"])[0]
-value = test_connection["Value"]
-if chx < chy: # the GC array is only lower diagonal to save memory
-    assert GC_data[0,i,e,chy,chx,f] == value
-if chx > chy:
-    assert GC_data[1,i,e,chx,chy,f] == value
+# # Get current time
+# c_time2 = time.localtime()
+# c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
+# print("Started", c_time1, "\nCurrent Time",c_time2)
+
+# # Output: GC_data (g_xy/g_yx, subject, eye, chx, chy, freq)
+# # Notice that for g_xy ([0,...]) it means "chy" Granger causes "chx"
+# # and for g_yx ([1,...]) it means "chx" Granger causes "chy"
+# # This is due to the transposing which flipped the results on to the lower-part of the diagonal
+
+# # Save the Granger_Causality data
+# np.save(Feature_savepath+"Source_drop_interpol_GrangerCausality_data.npy", GC_data)
+
+# # Theoretically negative GC values should be impossible, but in practice
+# # they can still occur due to problems with model fitting (see Stokes & Purdon, 2017)
+# print("{:.3f}% negative GC values".\
+#       format(np.sum(GC_data[~np.isnan(GC_data)]<0)/np.sum(~np.isnan(GC_data))*100)) # 0.08% negative values
+# # These values cannot be interpreted, but seems to occur mostly for true non-causal connections
+# # Hence I set them to 0
+# with np.errstate(invalid="ignore"): # invalid number refers to np.nan, which will be set to False for comparisons
+#     GC_data[(GC_data<0)] = 0
+
+# # Save as dataframe for further processing with other features
+# # Convert to Pandas dataframe
+# # The dimensions will each be a column with numbers and the last column will be the actual values
+# arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, GC_data.shape), indexing="ij"))) + [GC_data.ravel()])
+# GC_data_df = pd.DataFrame(arr, columns = ["GC_direction", "Subject_ID", "Eye_status", "chx", "chy", "Freq_band", "Value"])
+# # Change from numerical coding to actual values
+# eye_status = list(final_epochs[0].event_id.keys())
+# freq_bands_name = list(Freq_Bands.keys())
+# GC_directions_info = ["chy -> chx", "chx -> chy"]
+
+# index_values = [GC_directions_info,Subject_id,eye_status,ch_names,ch_names,freq_bands_name]
+# for col in range(len(index_values)):
+#     col_name = GC_data_df.columns[col]
+#     for shape in range(GC_data.shape[col]): # notice not dataframe but the array
+#         GC_data_df.loc[GC_data_df.iloc[:,col] == shape,col_name]\
+#         = index_values[col][shape]
+
+# # Add group status
+# Group_status = np.array(["CTRL"]*len(GC_data_df["Subject_ID"]))
+# Group_status[np.array([i in cases for i in GC_data_df["Subject_ID"]])] = "PTSD"
+# # Add to dataframe
+# GC_data_df.insert(3, "Group_status", Group_status)
+
+# # Remove all nan (including diagonal and upper-matrix entries)
+# GC_data_df = GC_data_df.iloc[np.invert(np.isnan(GC_data_df["Value"].to_numpy()))]
+
+# # Swap ch values for GC_direction chy -> chx (so it is always chx -> chy)
+# tempchy = GC_data_df[GC_data_df["GC_direction"] == "chy -> chx"]["chy"] # save chy
+# GC_data_df.loc[GC_data_df["GC_direction"] == "chy -> chx","chy"] =\
+#              GC_data_df.loc[GC_data_df["GC_direction"] == "chy -> chx","chx"] # overwrite old chy
+# GC_data_df.loc[GC_data_df["GC_direction"] == "chy -> chx","chx"] = tempchy # overwrite chx
+
+# # Drop the GC_direction column
+# GC_data_df = GC_data_df.drop("GC_direction", axis=1)
+
+# # Save df
+# GC_data_df.to_pickle(os.path.join(Feature_savepath,"GC_data_source_drop_interpol_df.pkl"))
+
+# # Testing if df was formatted correctly
+# expected_GC_values = n_subjects*n_eye_status*n_ch_combinations*n_freq_bands*2 # 2 because it is bidirectional
+# assert GC_data_df.shape[0] == expected_GC_values
+# # Testing a random GC value
+# random_connection = np.random.randint(0,GC_data_df.shape[0])
+# test_connection = GC_data_df.iloc[random_connection,:]
+# i = np.where(Subject_id==test_connection["Subject_ID"])[0]
+# e = np.where(np.array(eye_status)==test_connection["Eye_status"])[0]
+# chx = np.where(np.array(ch_names)==test_connection["chx"])[0]
+# chy = np.where(np.array(ch_names)==test_connection["chy"])[0]
+# f = np.where(np.array(freq_bands_name)==test_connection["Freq_band"])[0]
+# value = test_connection["Value"]
+# if chx < chy: # the GC array is only lower diagonal to save memory
+#     assert GC_data[0,i,e,chy,chx,f] == value
+# if chx > chy:
+#     assert GC_data[1,i,e,chx,chy,f] == value
diff --git a/Preprocessing.py b/Preprocessing.py
index b3381ae6252478b97de7ee18d179f4d4cd768dbd..4c14f0e1803be275fa8ca659b3083b5039841e6c 100644
--- a/Preprocessing.py
+++ b/Preprocessing.py
@@ -40,6 +40,7 @@ from mne.datasets import eegbci
 n_subjects = 78
 Subject_id = list(range(1,n_subjects+1))
 # Download and get filenames
+<<<<<<< HEAD
 data_path = "/data/may2020/QEEG"
 files = []
 for r, d, f in os.walk(data_path):
@@ -54,18 +55,37 @@ n_subjects = 51
 Subject_id = list(range(1,n_subjects+1)) 
 # Original code to get filenames in a folder
 data_path = "/data/raw/FOR_DTU/rawEEGforDTU"
+=======
+
+from mne.datasets import eegbci
+files = []
+for i in range(n_subjects):
+    raw_fnames = eegbci.load_data(Subject_id[i], [1,2]) # The first 2 runs
+    files.append(raw_fnames)
+"""
+# Original code to get filenames in a folder
+data_path = "/Users/benj3542/Desktop/Uni/Noter/Semester_6/Bachelor/Test_bdf"
+>>>>>>> 592a824fc18e7b7d354f8c8638fd1d4cda16ad34
 # Get filenames
 holder = []
 files = []
 for r, d, f in os.walk(data_path):
     for file in f:
         if ".bdf" in file:
+<<<<<<< HEAD
             holder.append(os.path.join(r, file))
     files.append(holder)
     holder = []
 files.pop(0)
 """            
 
+=======
+            holder.append((os.path.join(r, file)))
+    files.append(holder)
+    holder = []
+files.pop(0)
+"""
+>>>>>>> 592a824fc18e7b7d354f8c8638fd1d4cda16ad34
 # Eye status
 anno_to_event = {'Eyes Closed': 1, 'Eyes Open': 2} # manually defined event id
 eye_status = list(anno_to_event.keys())