FeatureEstimation.py

# Reversed in inner loop is used to avoid sequencial data being overwritten.
# E.g. if 0 is renamed to 1, then the next loop all 1's will be renamed to 2

# Add group status
Group_status = np.array(["CTRL"]*len(microstate_time_df["Subject_ID"]))
Group_status[np.array([i in cases for i in microstate_time_df["Subject_ID"]])] = "PTSD"
# Add to dataframe
microstate_time_df.insert(2, "Group_status", Group_status)

# Save df
microstate_time_df.to_pickle(os.path.join(Feature_savepath,"microstate_time_df.pkl"))

# Transition data - mean
# Get index for groups
PTSD_idx = np.array([i in cases for i in Subject_id])
CTRL_idx = np.array([not i in cases for i in Subject_id])
n_groups = 2

microstate_transition_data_mean = np.zeros((n_groups,n_eye_status,n_maps,n_maps))
microstate_transition_data_mean[0,:,:,:] = np.mean(microstate_transition_data[PTSD_idx,:,:,:], axis=0)
microstate_transition_data_mean[1,:,:,:] = np.mean(microstate_transition_data[CTRL_idx,:,:,:], axis=0)

# Convert entropy data to Pandas dataframe
# The dimensions will each be a column with numbers and the last column will be the actual values
arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, microstate_entropy_data.shape), indexing="ij"))) + [microstate_entropy_data.ravel()])
microstate_entropy_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Value"])
# Change from numerical coding to actual values
eye_status = list(final_epochs[0].event_id.keys())

index_values = [Subject_id,eye_status]
for col in range(len(index_values)):
    col_name = microstate_entropy_df.columns[col]
    for shape in reversed(range(microstate_entropy_data.shape[col])): # notice this is the shape of original numpy array. Not shape of DF
        microstate_entropy_df.loc[microstate_entropy_df.iloc[:,col] == shape,col_name]\
        = index_values[col][shape]
# Reversed in inner loop is used to avoid sequencial data being overwritten.
# E.g. if 0 is renamed to 1, then the next loop all 1's will be renamed to 2

# Add group status
Group_status = np.array(["CTRL"]*len(microstate_entropy_df["Subject_ID"]))
Group_status[np.array([i in cases for i in microstate_entropy_df["Subject_ID"]])] = "PTSD"
# Add to dataframe
microstate_entropy_df.insert(2, "Group_status", Group_status)
# Add dummy variable for re-using plot code
dummy_variable = ["Entropy"]*len(Group_status)
microstate_entropy_df.insert(3, "Measurement", dummy_variable)

# Save df
microstate_entropy_df.to_pickle(os.path.join(Feature_savepath,"microstate_entropy_df.pkl"))

# # %% Long-range temporal correlations (LRTC)
# """
# See Hardstone et al, 2012
# Hurst exponent estimation steps:
#     1. Preprocess
#     2. Band-pass filter for frequency band of interest
#     3. Hilbert transform to obtain amplitude envelope
#     4. Perform DFA
#         4.1 Compute cumulative sum of time series to create signal profile
#         4.2 Define set of window sizes (see below)
#         4.3 Remove the linear trend using least-squares for each window
#         4.4 Calculate standard deviation for each window and take the mean
#         4.5 Plot fluctuation function (Standard deviation) as function
#             for all window sizes, on double logarithmic scale
#         4.6 The DFA exponent alpha correspond to Hurst exponent
#             f(L) = sd = L^alpha (with alpha as linear coefficient in log plot)

# If 0 < alpha < 0.5: The process exhibits anti-correlations
# If 0.5 < alpha < 1: The process exhibits positive correlations
# If alpha = 0.5: The process is indistinguishable from a random process
# If 1.0 < alpha < 2.0: The process is non-stationary. H = alpha - 1

# Window sizes should be equally spaced on a logarithmic scale
# Sizes should be at least 4 samples and up to 10% of total signal length
# Filters can influence neighboring samples, thus filters should be tested
# on white noise to estimate window sizes that are unaffected by filters

# filter_length=str(2*1/fmin)+"s" # cannot be used with default transition bandwidth

# """
# # From simulations with white noise I determined window size thresholds for the 5 frequency bands:
# thresholds = [7,7,7,6.5,6.5]
# # And their corresponding log step sizes
# with open("LRTC_log_win_sizes.pkl", "rb") as filehandle:
#     log_win_sizes = pickle.load(filehandle)

# # Variables for the the different conditions
# # Sampling frequency
# sfreq = final_epochs[0].info["sfreq"]
# # Channels
# ch_names = final_epochs[0].info["ch_names"]
# n_channels = len(ch_names)
# # Frequency
# Freq_Bands = {"delta": [1.25, 4.0],
#               "theta": [4.0, 8.0],
#               "alpha": [8.0, 13.0],
#               "beta": [13.0, 30.0],
#               "gamma": [30.0, 49.0]}
# n_freq_bands = len(Freq_Bands)
# # Eye status
# eye_status = list(final_epochs[0].event_id.keys())
# n_eye_status = len(eye_status)

# ### Estimating Hurst exponent for the data
# # The data should be re-referenced to common average (Already done)

# # Data are transformed to numpy arrays
# # Then divided into EO and EC and further into each of the 5 trials
# # So DFA is estimated for each trial separately, which was concluded from simulations
# gaps_trials_idx = np.load("Gaps_trials_idx.npy") # re-used from microstate analysis
# n_trials = 5

# H_data = []
# for i in range(n_subjects):
#     # Transform data to correct shape
#     temp_arr = final_epochs[i].get_data() # get data
#     arr_shape = temp_arr.shape # get shape
#     temp_arr = temp_arr.swapaxes(1,2) # swap ch and time axis
#     temp_arr = temp_arr.reshape(arr_shape[0]*arr_shape[2],arr_shape[1]) # reshape by combining epochs and times
#     # Get indices for eyes open and closed
#     EC_index = final_epochs[i].events[:,2] == 1
#     EO_index = final_epochs[i].events[:,2] == 2
#     # Repeat with 4s * sample frequency to correct for concatenation of times and epochs
#     EC_index = np.repeat(EC_index,4*sfreq)
#     EO_index = np.repeat(EO_index,4*sfreq)
#     # Divide into eye status
#     EC_data = temp_arr[EC_index]
#     EO_data = temp_arr[EO_index]
#     # Divide into trials
#     EC_gap_idx = np.array([0]+list(gaps_trials_idx[i,0])+[len(EC_data)])
#     EO_gap_idx = np.array([0]+list(gaps_trials_idx[i,1])+[len(EO_data)])
    
#     EC_trial_data = []
#     EO_trial_data = []
#     for t in range(n_trials):
#         EC_trial_data.append(EC_data[EC_gap_idx[t]:EC_gap_idx[t+1]])
#         EO_trial_data.append(EO_data[EO_gap_idx[t]:EO_gap_idx[t+1]])
        
#     # Save data
#     H_data.append([EC_trial_data,EO_trial_data]) # output [subject][eye][trial][time,ch]

# # Calculate H for each subject, eye status, trial, freq and channel
# H_arr = np.zeros((n_subjects,n_eye_status,n_trials,n_channels,n_freq_bands))
# w_len = [len(ele) for ele in log_win_sizes]
# DFA_arr = np.empty((n_subjects,n_eye_status,n_trials,n_channels,n_freq_bands,2,np.max(w_len)))
# DFA_arr[:] = np.nan

# # Get current time
# c_time1 = time.localtime()
# c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
# print("Started",c_time1)

# # Nolds are already using all cores so multiprocessing with make it slower
# # Warning occurs when R2 is estimated during detrending - but R2 is not used
# warnings.simplefilter("ignore")
# for i in range(n_subjects):
#     # Pre-allocate memory
#     DFA_temp = np.empty((n_eye_status,n_trials,n_channels,n_freq_bands,2,np.max(w_len)))
#     DFA_temp[:] = np.nan
#     H_temp = np.empty((n_eye_status,n_trials,n_channels,n_freq_bands))
#     for e in range(n_eye_status):
#         for trial in range(n_trials):
#             for c in range(n_channels):
#                 # Get the data
#                 signal = H_data[i][e][trial][:,c]
                
#                 counter = 0 # prepare counter
#                 for fmin, fmax in Freq_Bands.values():
#                     # Filter for each freq band
#                     signal_filtered = mne.filter.filter_data(signal, sfreq=sfreq, verbose=0,
#                                                   l_freq=fmin, h_freq=fmax)
#                     # Hilbert transform
#                     analytic_signal = scipy.signal.hilbert(signal_filtered)
#                     # Get Amplitude envelope
#                     # np.abs is the same as np.linalg.norm, i.e. the length for complex input which is the amplitude
#                     ampltude_envelope = np.abs(analytic_signal)
#                     # Perform DFA using predefined window sizes from simulation
#                     a, dfa_data = nolds.dfa(ampltude_envelope,
#                                             nvals=np.exp(log_win_sizes[counter]).astype("int"),
#                                             debug_data=True)
#                     # Save DFA results
#                     DFA_temp[e,trial,c,counter,:,0:w_len[counter]] = dfa_data[0:2]
#                     H_temp[e,trial,c,counter] = a
#                     # Update counter
#                     counter += 1

#     # Print run status
#     print("Finished {} out of {}".format(i+1,n_subjects))
#     # Save the results
#     H_arr[i] = H_temp
#     DFA_arr[i] = DFA_temp

# warnings.simplefilter("default")

# # Get current time
# c_time2 = time.localtime()
# c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
# print("Started", c_time1, "\nCurrent Time",c_time2)

# # Save the DFA analysis data 
# np.save(Feature_savepath+"DFA_arr.npy", DFA_arr)
# np.save(Feature_savepath+"H_arr.npy", H_arr)

# # Load
# DFA_arr = np.load(Feature_savepath+"DFA_arr.npy")
# H_arr = np.load(Feature_savepath+"H_arr.npy")

# # Average the Hurst Exponent across trials
# H_arr = np.mean(H_arr, axis=2)

# # Convert to Pandas dataframe (Hurst exponent)
# # The dimensions will each be a column with numbers and the last column will be the actual values
# arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, H_arr.shape), indexing="ij"))) + [H_arr.ravel()])
# H_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Channel", "Freq_band", "Value"])
# # Change from numerical coding to actual values
# eye_status = list(final_epochs[0].event_id.keys())
# ch_name = final_epochs[0].info["ch_names"]

# index_values = [Subject_id,eye_status,ch_name,list(Freq_Bands.keys())]
# for col in range(len(index_values)):
#     col_name = H_data_df.columns[col]
#     for shape in range(H_arr.shape[col]): # notice this is the shape of original numpy array. Not shape of DF
#         H_data_df.loc[H_data_df.iloc[:,col] == shape,col_name]\
#         = index_values[col][shape]

# # Add group status
# Group_status = np.array(["CTRL"]*len(H_data_df["Subject_ID"]))
# Group_status[np.array([i in cases for i in H_data_df["Subject_ID"]])] = "PTSD"
# # Add to dataframe
# H_data_df.insert(2, "Group_status", Group_status)

# # Fix Freq_band categorical order
# H_data_df["Freq_band"] = H_data_df["Freq_band"].astype("category").\
#             cat.reorder_categories(list(Freq_Bands.keys()), ordered=True)

# # Global Hurst exponent
# H_data_df_global = H_data_df.groupby(["Subject_ID", "Eye_status", "Freq_band"]).mean().reset_index() # by default pandas mean skip nan
# # Add group status (cannot use group_by as each subject only have 1 group, not both)
# Group_status = np.array(["CTRL"]*len(H_data_df_global["Subject_ID"]))
# Group_status[np.array([i in cases for i in H_data_df_global["Subject_ID"]])] = "PTSD"
# # Add to dataframe
# H_data_df_global.insert(2, "Group_status", Group_status)
# # Add dummy variable for re-using plot code
# dummy_variable = ["Global Hurst Exponent"]*H_data_df_global.shape[0]
# H_data_df_global.insert(3, "Measurement", dummy_variable )

# # Save the data
# H_data_df.to_pickle(os.path.join(Feature_savepath,"H_data_df.pkl"))
# H_data_df_global.to_pickle(os.path.join(Feature_savepath,"H_data_global_df.pkl"))

# # %% Source localization of sensor data
# # Using non-interpolated channels
# # Even interpolated channels during preprocessing and visual inspection
# # are dropped

# # Prepare epochs for estimation of source connectivity
# source_epochs = [0]*n_subjects
# for i in range(n_subjects):
#     source_epochs[i] = final_epochs[i].copy()

# ### Make forward solutions
# # A forward solution is first made for all individuals with no dropped channels
# # Afterwards individual forward solutions are made for subjects with bad
# # channels that were interpolated in preprocessing and these are dropped
# # First forward operator is computed using a template MRI for each dataset
# fs_dir = "/home/glia/MNE-fsaverage-data/fsaverage"
# subjects_dir = os.path.dirname(fs_dir)
# trans = "fsaverage"
# src = os.path.join(fs_dir, "bem", "fsaverage-ico-5-src.fif")
# bem = os.path.join(fs_dir, "bem", "fsaverage-5120-5120-5120-bem-sol.fif")

# # Read the template sourcespace
# sourcespace = mne.read_source_spaces(src)

# temp_idx = 0 # Index with subject that had no bad channels
# subject_eeg = source_epochs[temp_idx].copy()
# subject_eeg.set_eeg_reference(projection=True) # needed for inverse modelling
# # Make forward solution
# fwd = mne.make_forward_solution(subject_eeg.info, trans=trans, src=src,
#                             bem=bem, eeg=True, mindist=5.0, n_jobs=1)
# # Save forward operator
# fname_fwd = "./Source_fwd/fsaverage-fwd.fif"
# mne.write_forward_solution(fname_fwd, fwd, overwrite=True)

# # A specific forward solution is also made for each subject with bad channels
# with open("./Preprocessing/bad_ch.pkl", "rb") as file:
#    bad_ch = pickle.load(file)

# All_bad_ch = bad_ch
# All_drop_epochs = dropped_epochs_df
# All_dropped_ch = []

# Bad_ch_idx = [idx for idx, item in enumerate(All_bad_ch) if item != 0]
# Bad_ch_subjects = All_drop_epochs["Subject_ID"][Bad_ch_idx]
# # For each subject with bad channels, drop the channels and make forward operator
# for n in range(len(Bad_ch_subjects)):
#     Subject = Bad_ch_subjects.iloc[n]
#     try:
#         Subject_idx = Subject_id.index(Subject)
#         # Get unique bad channels
#         Bad_ch0 = All_bad_ch[Bad_ch_idx[n]]
#         Bad_ch1 = []
#         for i2 in range(len(Bad_ch0)):
#             if type(Bad_ch0[i2]) == list:
#                 for i3 in range(len(Bad_ch0[i2])):
#                     Bad_ch1.append(Bad_ch0[i2][i3])
#             elif type(Bad_ch0[i2]) == str:
#                 Bad_ch1.append(Bad_ch0[i2])
#         Bad_ch1 = np.unique(Bad_ch1)
#         # Drop the bad channels
#         source_epochs[Subject_idx].drop_channels(Bad_ch1)
#         # Save the overview of dropped channels
#         All_dropped_ch.append([Subject,Subject_idx,Bad_ch1])
#         # Make forward operator
#         subject_eeg = source_epochs[Subject_idx].copy()
#         subject_eeg.set_eeg_reference(projection=True) # needed for inverse modelling
#         # Make forward solution
#         fwd = mne.make_forward_solution(subject_eeg.info, trans=trans, src=src,
#                                     bem=bem, eeg=True, mindist=5.0, n_jobs=1)
#         # Save forward operator
#         fname_fwd = "./Source_fwd/fsaverage_{}-fwd.fif".format(Subject)
#         mne.write_forward_solution(fname_fwd, fwd, overwrite=True)
#     except:
#         print(Subject,"was already dropped")

# with open("./Preprocessing/All_datasets_bad_ch.pkl", "wb") as filehandle:
#     pickle.dump(All_dropped_ch, filehandle)


# # %% Load forward operators
# # Re-use for all subjects without dropped channels
# fname_fwd = "./Source_fwd/fsaverage-fwd.fif"
# fwd = mne.read_forward_solution(fname_fwd)

# fwd_list = [fwd]*n_subjects

# # Use specific forward solutions for subjects with dropped channels
# with open("./Preprocessing/All_datasets_bad_ch.pkl", "rb") as file:
#    All_dropped_ch = pickle.load(file)

# for i in range(len(All_dropped_ch)):
#     Subject = All_dropped_ch[i][0]
#     Subject_idx = All_dropped_ch[i][1]
#     fname_fwd = "./Source_fwd/fsaverage_{}-fwd.fif".format(Subject)
#     fwd = mne.read_forward_solution(fname_fwd)
#     fwd_list[Subject_idx] = fwd

# # Check the correct number of channels are present in fwd
# random_point = int(np.random.randint(0,len(All_dropped_ch)-1,1))
# assert len(fwds[All_dropped_ch[random_point][1]].ch_names) == source_epochs[All_dropped_ch[random_point][1]].info["nchan"]

# # %% Make parcellation
# # After mapping to source space, I end up with 20484 vertices
# # but I wanted to map to fewer sources and not many more
# # Thus I need to perform parcellation
# # Get labels for FreeSurfer "aparc" cortical parcellation (example with 74 labels/hemi - Destriuex)
# labels_aparc = mne.read_labels_from_annot("fsaverage", parc="aparc.a2009s",
#                                     subjects_dir=subjects_dir)
# labels_aparc = labels_aparc[:-2] # remove unknowns

# labels_aparc_names = [label.name for label in labels_aparc]

# # Manually adding the 31 ROIs (14-lh/rh + 3 in midline) from Toll et al, 2020
# # Making fuction to take subset of a label
# def label_subset(label, subset, name="ROI_name"):
#     label_subset = mne.Label(label.vertices[subset], label.pos[subset,:],
#                          label.values[subset], label.hemi,
#                          name = "{}-{}".format(name,label.hemi),
#                          subject = label.subject, color = None)
#     return label_subset

# ### Visual area 1 (V1 and somatosensory cortex BA1-3)
# label_filenames = ["lh.V1.label", "rh.V1.label",
#                    "lh.BA1.label", "rh.BA1.label",
#                    "lh.BA2.label", "rh.BA2.label",
#                    "lh.BA3a.label", "rh.BA3a.label",
#                    "lh.BA3b.label", "rh.BA3b.label"]
# labels0 = [0]*len(label_filenames)
# for i, filename in enumerate(label_filenames):
#     labels0[i] = mne.read_label(os.path.join(fs_dir, "label", filename), subject="fsaverage")
# # Add V1 to final label variable
# labels = labels0[:2]
# # Rename to remove redundant hemi information
# labels[0].name = "V1-{}".format(labels[0].hemi)
# labels[1].name = "V1-{}".format(labels[1].hemi)
# # Assign a color
# labels[0].color = matplotlib.colors.to_rgba("salmon")
# labels[1].color = matplotlib.colors.to_rgba("salmon")
# # Combine Brodmann Areas for SMC. Only use vertices ones to avoid duplication error
# SMC_labels = labels0[2:]
# for hem in range(2):
#     SMC_p1 = SMC_labels[hem]
#     for i in range(1,len(SMC_labels)//2):
#         SMC_p2 = SMC_labels[hem+2*i]
#         p2_idx = np.isin(SMC_p2.vertices, SMC_p1.vertices, invert=True)
#         SMC_p21 = label_subset(SMC_p2, p2_idx, "SMC")
#         SMC_p1 = SMC_p1.__add__(SMC_p21)
#     SMC_p1.name = SMC_p21.name
#     # Assign a color
#     SMC_p1.color = matplotlib.colors.to_rgba("orange")
#     labels.append(SMC_p1)

# ### Inferior frontal junction
# # Located at junction between inferior frontal and inferior precentral sulcus
# label_aparc_names0 = ["S_front_inf","S_precentral-inf-part"]
# temp_labels = []
# for i in range(len(label_aparc_names0)):
#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
#     for i2 in range(len(labels_aparc_idx)):
#         temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())

# pos1 = temp_labels[0].pos
# pos2 = temp_labels[2].pos
# distm = scipy.spatial.distance.cdist(pos1,pos2)
# # Find the closest points between the 2 ROIs
# l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.001))[0]) # q chosen to correspond to around 10% of ROI
# l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[1]) # q chosen to correspond to around 10% of ROI

# IFJ_label_p1 = label_subset(temp_labels[0], l1_idx, "IFJ")
# IFJ_label_p2 = label_subset(temp_labels[2], l2_idx, "IFJ")
# # Combine the 2 parts
# IFJ_label = IFJ_label_p1.__add__(IFJ_label_p2)
# IFJ_label.name = IFJ_label_p1.name
# # Assign a color
# IFJ_label.color = matplotlib.colors.to_rgba("chartreuse")
# # Append to final list
# labels.append(IFJ_label)

# # Do the same for the right hemisphere
# pos1 = temp_labels[1].pos
# pos2 = temp_labels[3].pos
# distm = scipy.spatial.distance.cdist(pos1,pos2)
# # Find the closest points between the 2 ROIs
# l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.00075))[0]) # q chosen to correspond to around 10% of ROI
# l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[1]) # q chosen to correspond to around 10% of ROI
# IFJ_label_p1 = label_subset(temp_labels[1], l1_idx, "IFJ")
# IFJ_label_p2 = label_subset(temp_labels[3], l2_idx, "IFJ")
# # Combine the 2 parts
# IFJ_label = IFJ_label_p1.__add__(IFJ_label_p2)
# IFJ_label.name = IFJ_label_p1.name
# # Assign a color
# IFJ_label.color = matplotlib.colors.to_rgba("chartreuse")
# # Append to final list
# labels.append(IFJ_label)

# ### Intraparietal sulcus
# label_aparc_names0 = ["S_intrapariet_and_P_trans"]
# labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[0])]
# for i in range(len(labels_aparc_idx)):
#     labels.append(labels_aparc[labels_aparc_idx[i]].copy())
#     labels[-1].name = "IPS-{}".format(labels[-1].hemi)

# ### Frontal eye field as intersection between middle frontal gyrus and precentral gyrus
# label_aparc_names0 = ["G_front_middle","G_precentral"]
# temp_labels = []
# for i in range(len(label_aparc_names0)):
#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
#     for i2 in range(len(labels_aparc_idx)):
#         temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())

# # Take 10% of middle frontal gyrus closest to precentral gyrus (most posterior)
# temp_label0 = temp_labels[0]
# G_fm_y = temp_label0.pos[:,1]
# thres_G_fm_y = np.sort(G_fm_y)[len(G_fm_y)//10]
# idx_p1 = np.where(G_fm_y<thres_G_fm_y)[0]
# FEF_label_p1 = label_subset(temp_label0, idx_p1, "FEF")
# # Take 10% closest for precentral gyrus (most anterior)
# temp_label0 = temp_labels[2]
# # I cannot only use y (anterior/posterior) but also need to restrict z-position
# G_pre_cen_z = temp_label0.pos[:,2]
# thres_G_pre_cen_z = 0.04 # visually inspected threshold
# G_pre_cen_y = temp_label0.pos[:,1]
# thres_G_pre_cen_y = np.sort(G_pre_cen_y[G_pre_cen_z>thres_G_pre_cen_z])[-len(G_pre_cen_y)//10] # notice - for anterior
# idx_p2 = np.where((G_pre_cen_y>thres_G_pre_cen_y) & (G_pre_cen_z>thres_G_pre_cen_z))[0]
# FEF_label_p2 = label_subset(temp_label0, idx_p2, "FEF")
# # Combine the 2 parts
# FEF_label = FEF_label_p1.__add__(FEF_label_p2)
# FEF_label.name = FEF_label_p1.name
# # Assign a color
# FEF_label.color = matplotlib.colors.to_rgba("aqua")
# # Append to final list
# labels.append(FEF_label)

# # Do the same for the right hemisphere
# temp_label0 = temp_labels[1]
# G_fm_y = temp_label0.pos[:,1]
# thres_G_fm_y = np.sort(G_fm_y)[len(G_fm_y)//10]
# idx_p1 = np.where(G_fm_y<thres_G_fm_y)[0]
# FEF_label_p1 = label_subset(temp_label0, idx_p1, "FEF")

# temp_label0 = temp_labels[3]
# G_pre_cen_z = temp_label0.pos[:,2]
# thres_G_pre_cen_z = 0.04 # visually inspected threshold
# G_pre_cen_y = temp_label0.pos[:,1]
# thres_G_pre_cen_y = np.sort(G_pre_cen_y[G_pre_cen_z>thres_G_pre_cen_z])[-len(G_pre_cen_y)//10] # notice - for anterior
# idx_p2 = np.where((G_pre_cen_y>thres_G_pre_cen_y) & (G_pre_cen_z>thres_G_pre_cen_z))[0]
# FEF_label_p2 = label_subset(temp_label0, idx_p2, "FEF")
# # Combine the 2 parts
# FEF_label = FEF_label_p1.__add__(FEF_label_p2)
# FEF_label.name = FEF_label_p1.name
# # Assign a color
# FEF_label.color = matplotlib.colors.to_rgba("aqua")
# # Append to final list
# labels.append(FEF_label)

# ### Supplementary eye fields
# # Located at caudal end of frontal gyrus and upper part of paracentral sulcus
# label_aparc_names0 = ["G_and_S_paracentral","G_front_sup"]
# temp_labels = []
# for i in range(len(label_aparc_names0)):
#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
#     for i2 in range(len(labels_aparc_idx)):
#         temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())

# pos1 = temp_labels[0].pos
# pos2 = temp_labels[2].pos
# distm = scipy.spatial.distance.cdist(pos1,pos2)
# # Find the closest points between the 2 ROIs
# l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[0]) # q chosen to correspond to around 15% of ROI
# l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.005))[1]) # q chosen to correspond to around 10% of ROI
# # Notice that superior frontal gyrus is around 4 times bigger than paracentral
# len(l1_idx)/pos1.shape[0]
# len(l2_idx)/pos2.shape[0]
# # Only use upper part
# z_threshold = 0.06 # visually inspected
# l1_idx = l1_idx[pos1[l1_idx,2] > z_threshold]
# l2_idx = l2_idx[pos2[l2_idx,2] > z_threshold]

# SEF_label_p1 = label_subset(temp_labels[0], l1_idx, "SEF")
# SEF_label_p2 = label_subset(temp_labels[2], l2_idx, "SEF")
# # Combine the 2 parts
# SEF_label = SEF_label_p1.__add__(SEF_label_p2)
# SEF_label.name = SEF_label_p1.name
# # Assign a color
# SEF_label.color = matplotlib.colors.to_rgba("royalblue")
# # Append to final list
# labels.append(SEF_label)

# # Do the same for the right hemisphere
# pos1 = temp_labels[1].pos
# pos2 = temp_labels[3].pos
# distm = scipy.spatial.distance.cdist(pos1,pos2)
# # Find the closest points between the 2 ROIs
# l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[0]) # q chosen to correspond to around 15% of ROI
# l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.005))[1]) # q chosen to correspond to around 10% of ROI
# # Notice that superior frontal gyrus is around 4 times bigger than paracentral
# len(l1_idx)/pos1.shape[0]
# len(l2_idx)/pos2.shape[0]
# # Only use upper part
# z_threshold = 0.06 # visually inspected
# l1_idx = l1_idx[pos1[l1_idx,2] > z_threshold]
# l2_idx = l2_idx[pos2[l2_idx,2] > z_threshold]

# SEF_label_p1 = label_subset(temp_labels[1], l1_idx, "SEF")
# SEF_label_p2 = label_subset(temp_labels[3], l2_idx, "SEF")
# # Combine the 2 parts
# SEF_label = SEF_label_p1.__add__(SEF_label_p2)
# SEF_label.name = SEF_label_p1.name
# # Assign a color
# SEF_label.color = matplotlib.colors.to_rgba("royalblue")
# # Append to final list
# labels.append(SEF_label)

# ### Posterior cingulate cortex
# label_aparc_names0 = ["G_cingul-Post-dorsal", "G_cingul-Post-ventral"]
# temp_labels = []
# for i in range(len(label_aparc_names0)):
#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
#     for i2 in range(len(labels_aparc_idx)):
#         temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
# labels0 = []
# for hem in range(2):
#     PCC_p1 = temp_labels[hem]
#     for i in range(1,len(temp_labels)//2):
#         PCC_p2 = temp_labels[hem+2*i]
#         PCC_p1 = PCC_p1.__add__(PCC_p2)
#     PCC_p1.name = "PCC-{}".format(PCC_p1.hemi)
#     labels0.append(PCC_p1)
# # Combine the 2 hemisphere in 1 label
# labels.append(labels0[0].__add__(labels0[1]))

# ### Medial prefrontal cortex
# # From their schematic it looks like rostral 1/4 of superior frontal gyrus
# label_aparc_names0 = ["G_front_sup"]
# temp_labels = []
# for i in range(len(label_aparc_names0)):
#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
#     for i2 in range(len(labels_aparc_idx)):
#         temp_labels0 = labels_aparc[labels_aparc_idx[i2]].copy()
#         temp_labels0 = temp_labels0.split(4, subjects_dir=subjects_dir)[3]
#         temp_labels0.name = "mPFC-{}".format(temp_labels0.hemi)
#         temp_labels.append(temp_labels0)
# # Combine the 2 hemisphere in 1 label
# labels.append(temp_labels[0].__add__(temp_labels[1]))

# ### Angular gyrus
# label_aparc_names0 = ["G_pariet_inf-Angular"]
# for i in range(len(label_aparc_names0)):
#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
#     for i2 in range(len(labels_aparc_idx)):
#         temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
#         temp_labels.name = "ANG-{}".format(temp_labels.hemi)
#         labels.append(temp_labels)

# ### Posterior middle frontal gyrus
# label_aparc_names0 = ["G_front_middle"]
# for i in range(len(label_aparc_names0)):
#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
#     for i2 in range(len(labels_aparc_idx)):
#         temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
#         temp_labels = temp_labels.split(2, subjects_dir=subjects_dir)[0]
#         temp_labels.name = "PMFG-{}".format(temp_labels.hemi)
#         labels.append(temp_labels)

# ### Inferior parietal lobule
# # From their parcellation figure seems to be rostral angular gyrus and posterior supramarginal gyrus
# label_aparc_names0 = ["G_pariet_inf-Angular","G_pariet_inf-Supramar"]
# temp_labels = []
# for i in range(len(label_aparc_names0)):
#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
#     for i2 in range(len(labels_aparc_idx)):
#         temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
# # Split angular in 2 and get rostral part
# temp_labels[0] = temp_labels[0].split(2, subjects_dir=subjects_dir)[1]
# temp_labels[1] = temp_labels[1].split(2, subjects_dir=subjects_dir)[1]
# # Split supramarginal in 2 and get posterior part
# temp_labels[2] = temp_labels[2].split(2, subjects_dir=subjects_dir)[0]
# temp_labels[3] = temp_labels[3].split(2, subjects_dir=subjects_dir)[0]

# for hem in range(2):
#     PCC_p1 = temp_labels[hem]
#     for i in range(1,len(temp_labels)//2):
#         PCC_p2 = temp_labels[hem+2*i]
#         PCC_p1 = PCC_p1.__add__(PCC_p2)
#     PCC_p1.name = "IPL-{}".format(PCC_p1.hemi)
#     labels.append(PCC_p1)

# ### Orbital gyrus
# # From their figure it seems to correspond to orbital part of inferior frontal gyrus
# label_aparc_names0 = ["G_front_inf-Orbital"]
# for i in range(len(label_aparc_names0)):
#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
#     for i2 in range(len(labels_aparc_idx)):
#         temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
#         temp_labels.name = "ORB-{}".format(temp_labels.hemi)
#         labels.append(temp_labels)

# ### Middle temporal gyrus
# # From their figure it seems to only be 1/4 of MTG at the 2nd to last caudal part
# label_aparc_names0 = ["G_temporal_middle"]
# for i in range(len(label_aparc_names0)):
#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
#     for i2 in range(len(labels_aparc_idx)):
#         temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
#         temp_labels = temp_labels.split(4, subjects_dir=subjects_dir)[1]
#         temp_labels.name = "MTG-{}".format(temp_labels.hemi)
#         labels.append(temp_labels)

# ### Anterior middle frontal gyrus
# label_aparc_names0 = ["G_front_middle"]
# for i in range(len(label_aparc_names0)):
#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
#     for i2 in range(len(labels_aparc_idx)):
#         temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
#         temp_labels = temp_labels.split(2, subjects_dir=subjects_dir)[1]
#         temp_labels.name = "AMFG-{}".format(temp_labels.hemi)
#         labels.append(temp_labels)

# ### Insula
# label_aparc_names0 = ["G_Ins_lg_and_S_cent_ins","G_insular_short"]
# temp_labels = []
# for i in range(len(label_aparc_names0)):
#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
#     for i2 in range(len(labels_aparc_idx)):
#         temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
# for hem in range(2):
#     PCC_p1 = temp_labels[hem]
#     for i in range(1,len(temp_labels)//2):
#         PCC_p2 = temp_labels[hem+2*i]
#         PCC_p1 = PCC_p1.__add__(PCC_p2)
#     PCC_p1.name = "INS-{}".format(PCC_p1.hemi)
#     labels.append(PCC_p1)

# ### (Dorsal) Anterior Cingulate Cortex
# label_aparc_names0 = ["G_and_S_cingul-Ant"]
# temp_labels = []
# for i in range(len(label_aparc_names0)):
#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
#     for i2 in range(len(labels_aparc_idx)):
#         temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
#         temp_labels[-1].name = "ACC-{}".format(temp_labels[-1].hemi)
# # Combine the 2 hemisphere in 1 label
# labels.append(temp_labels[0].__add__(temp_labels[1]))

# ### Supramarginal Gyrus
# label_aparc_names0 = ["G_pariet_inf-Supramar"]
# for i in range(len(label_aparc_names0)):
#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
#     for i2 in range(len(labels_aparc_idx)):
#         temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
#         temp_labels.name = "SUP-{}".format(temp_labels.hemi)
#         labels.append(temp_labels)

# print("{} ROIs have been defined".format(len(labels)))

# # # Visualize positions
# # fig = plt.figure()
# # ax = fig.add_subplot(111, projection="3d")
# # for i in range(0,3):
# #     temp_pos = temp_labels[i].pos
# #     ax.scatter(temp_pos[:,0],temp_pos[:,1],temp_pos[:,2], marker="o", alpha=0.1)
# # # Add to plot
# # ax.scatter(labels[-1].pos[:,0],labels[-1].pos[:,1],labels[-1].pos[:,2], marker="o")

# # # Visualize the labels
# # # temp_l = labels_aparc[labels_aparc_idx[0]]
# # temp_l = labels[-2]
# # l_stc = stc[100].in_label(temp_l)
# # l_stc.vertices

# # l_stc.plot(**surfer_kwargs)

# # Save the annotation file
# with open("custom_aparc2009_Li_et_al_2022.pkl", "wb") as file:
#     pickle.dump(labels, file)

# # %% Calculate orthogonalized power envelope connectivity in source space
# # In non-interpolated channels
# # Updated 22/1 - 2021 to use delta = 1/81 and assumption
# # about non-correlated and equal variance noise covariance matrix for channels

# # Load
# with open("custom_aparc2009_Li_et_al_2022.pkl", "rb") as file:
#     labels = pickle.load(file)
# label_names = [label.name for label in labels]

# # Define function to estimate PEC
# def PEC_estimation(x, freq_bands, sfreq=200):
#     """
#     This function takes a source timeseries signal x and performs:
#         1. Bandpass filtering
#         2. Hilbert transform to yield analytical signal
#         3. Compute all to all connectivity by iteratively computing for each pair
#             a. Orthogonalization
#             b. Computing power envelopes by squaring the signals |x|^2
#             c. Log-transform to enhance normality
#             d. Pearson's correlation between each pair
#             e. Fisher's r-to-z transform to enhance normality
#     The code has been optimized by inspiration from MNE-Python's function:
#     mne.connectivity.enelope_correlation.
    
#     In MNE-python version < 0.22 there was a bug, but after the fix in 0.22
#     the mne function is equivalent to my implementation, although they don't
#     use epsilon but gives same result with a RuntimeWarning about log(0)
    
#     IMPORTANT NOTE:
#         Filtering introduce artifacts for first and last timepoint
#     The values are very low, more than 1e-12 less than the others
#     If they are not removed, then they will heavily influence Pearson's
#     correlation as it is outlier sensitive
    
#     Inputs:
#         x - The signal in source space as np.array with shape (ROIs,Timepoints)
#         freq_bands - The frequency bands of interest as a dictionary e.g.
#                      {"alpha": [8.0, 13.0], "beta": [13.0, 30.0]}
#         sfreq - The sampling frequency in Hertz
    
#     Output:
#         The pairwise connectivity matrix
#     """
#     n_roi, n_timepoints = x.shape
#     n_freq_bands = len(freq_bands)
    
#     epsilon = 1e-100 # small value to prevent log(0) errors
    
#     # Filter the signal in the different freq bands
#     PEC_con0 = np.zeros((n_roi,n_roi,n_freq_bands))
#     for fname, frange in freq_bands.items():
#         fmin, fmax = [float(interval) for interval in frange]
#         signal_filtered = mne.filter.filter_data(x, sfreq, fmin, fmax,
#                                           fir_design="firwin", verbose=0)
#         # Filtering on finite signals will yield very low values for first
#         # and last timepoint, which can create outliers. E.g. 1e-29 compared to 1e-14
#         # Outlier sensitive methods, like Pearson's correlation, is therefore
#         # heavily affected and this systematic error is removed by removing
#         # the first and last timepoint
#         signal_filtered = signal_filtered[:,1:-1]
        
#         # Hilbert transform
#         analytic_signal = scipy.signal.hilbert(signal_filtered)
#         # I will use x and y to keep track of orthogonalization
#         x0 = analytic_signal
#         # Get power envelope
#         x0_mag = np.abs(x0)
#         # Get scaled conjugate used for orthogonalization estimation
#         x0_conj_scaled = x0.conj()
#         x0_conj_scaled /= x0_mag
#         # Take square power envelope
#         PEx = np.square(x0_mag)
#         # Take log transform
#         lnPEx = np.log(PEx+epsilon)
#         # Remove mean for Pearson correlation calculation
#         lnPEx_nomean = lnPEx - np.mean(lnPEx, axis=-1, keepdims=True) # normalize each roi timeseries
#         # Get std for Pearson correlation calculation
#         lnPEx_std = np.std(lnPEx, axis=-1)
#         lnPEx_std[lnPEx_std == 0] = 1 # Prevent std = 0 problems
#         # Prepare con matrix
#         con0 = np.zeros((n_roi,n_roi))
#         for roi_r, y0 in enumerate(x0): # for each y0
#             # Calculate orthogonalized signal y with respect to x for all x
#             # Using y_ort = imag(y*x_conj/|x|)
#             # I checked the formula in temp_v3 and it works as intended
#             # I want to orthogonalize element wise for each timepoint
#             y0_ort = (y0*x0_conj_scaled).imag
#             # Here y0_ort.shape = (n_roi, n_timepoints)
#             # So y is current roi and the first axis gives each x it is orthogonalized to
#             # Take the abs to get power envelope
#             y0_ort = np.abs(y0_ort)
#             # Prevent log(0) error when calculating y_ort on y
#             y0_ort[roi_r] = 1. # this will be 0 zero after mean subtraction
#             # Take square power envelope
#             PEy = np.square(y0_ort) # squared power envelope
#             # Take log transform
#             lnPEy = np.log(PEy+epsilon)
#             # Remove mean for pearson correlation calculation
#             lnPEy_nomean = lnPEy - np.mean(lnPEy, axis=-1, keepdims=True)
#             # Get std for Pearson correlation calculation
#             lnPEy_std = np.std(lnPEy, axis=-1)
#             lnPEy_std[lnPEy_std == 0] = 1.
#             # Pearson correlation is expectation of X_nomean * Y_nomean for each time-series divided with standard deviations
#             PEC = np.mean(lnPEx_nomean*lnPEy_nomean, axis=-1)
#             PEC /= lnPEx_std
#             PEC /= lnPEy_std
#             con0[roi_r] = PEC
#         # The con0 connectivity matrix should be read as correlation between
#         # orthogonalized y (row number) and x (column number)
#         # It is not symmetrical, as cor(roi2_ort, roi1) is not cor(roi1_ort, roi2)
#         # To make it symmetrical the average of the absolute correlation
#         # of the 2 possibilities for each pair are taken
#         con0 = np.abs(con0)
#         con0 = (con0.T+con0)/2.
#         # Fisher's z transform - which is equivalent to arctanh
#         con0 = np.arctanh(con0)
#         # The diagonal is not 0 as I wanted to avoid numerical errors with log(0)
#         # and used a small epsilon value. Thus the diagonal is explicitly set to 0
        
#         # Save to array
#         PEC_con0[:,:,list(freq_bands.keys()).index(fname)] = con0
#     return PEC_con0

# # Prepare variables
# Freq_Bands = {"delta": [1.25, 4.0],
#               "theta": [4.0, 8.0],
#               "alpha": [8.0, 13.0],
#               "beta": [13.0, 30.0],
#               "gamma": [30.0, 49.0]}
# n_freq_bands = len(Freq_Bands)
# n_roi = len(labels)

# # Get current time
# c_time1 = time.localtime()
# c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
# print(c_time1)

# # PEC analysis
# PEC_data_list = [0]*n_subjects
# STCs_list = [0]*n_subjects

# # Using inverse operator as generator interferes with concurrent processes
# # If I run it for multiple subjects I run out of ram
# # Thus concurrent processes are used inside the for loop
# def PEC_analysis(input_args): # iterable epoch number and corresponding ts
#     i2, ts = input_args
#     # Estimate PEC
#     PEC_con0 = PEC_estimation(ts, Freq_Bands, sfreq)
#     print("Finished {} out of {} epochs".format(i2+1,n_epochs))
#     return i2, PEC_con0, ts

# for i in range(n_subjects):
#     n_epochs, n_ch, n_timepoints = source_epochs[i].get_data().shape
#     # Use different forward solutions depending on number of channels
#     cur_subject_id = Subject_id[i]
#     fwd = fwds[i]
    
#     # Using assumption about equal variance and no correlations I make a diagonal matrix
#     # Using the default option for 0.2µV std for EEG data
#     noise_cov = mne.make_ad_hoc_cov(source_epochs[i].info, None)
    
#     # Make inverse operator
#     # Using default depth parameter = 0.8 and free orientation (loose = 1)
#     inverse_operator = mne.minimum_norm.make_inverse_operator(source_epochs[i].info,
#                                                               fwd, noise_cov,
#                                                               loose = 1, depth = 0.8,
#                                                               verbose = 0)
#     src_inv = inverse_operator["src"]
#     # Compute inverse solution and retrieve time series for each label
#     # Preallocate memory
#     label_ts = np.full((n_epochs,len(labels),n_timepoints),np.nan)
#     # Define regularization
#     snr = 9 # Zhang et al, 2020 used delta = 1/81, which is inverse SNR and correspond to lambda2
#     # A for loop is used for each label due to memory issues when doing all labels at the same time
#     for l in range(len(labels)):
#         stc = mne.minimum_norm.apply_inverse_epochs(source_epochs[i],inverse_operator,
#                                                     lambda2 = 1/(snr**2),
#                                                     label = labels[l],
#                                                     pick_ori = "vector",
#                                                     return_generator=False,
#                                                     method = "MNE", verbose = 0)
#         # Use PCA to reduce the 3 orthogonal directions to 1 principal direction with max power
#         # There can be ambiguity about the orientation, thus the one that
#         # is pointing most "normal", i.e. closest 90 degrees to the skull is used
#         stc_pca = [0]*len(stc)
#         for ep in range(n_epochs):
#             stc_pca[ep], pca_dir = stc[ep].project(directions="pca", src=src_inv)
#         # Get mean time series for the whole label
#         temp_label_ts = mne.extract_label_time_course(stc_pca, labels[l], src_inv, mode="mean_flip",
#                                          return_generator=False, verbose=0)
#         # Save to array
#         label_ts[:,l,:] = np.squeeze(np.array(temp_label_ts))
#         print("Finished estimating STC for {} out of {} ROIs".format(l+1,len(labels)))
    
#     # Free up memory
#     del stc

#     # Prepare variables
#     sfreq=source_epochs[i].info["sfreq"]
#     n_epochs = len(source_epochs[i])
#     # Estimate the pairwise PEC for each epoch
#     PEC_con_subject = np.zeros((n_epochs,n_roi,n_roi,n_freq_bands))
#     stcs0 = np.zeros((n_epochs,n_roi,int(sfreq)*4)) # 4s epochs
#     # Make list of arguments to pass into PEC_analysis using the helper func
#     args = []
#     for i2 in range(n_epochs):
#         args.append((i2,label_ts[i2]))
    
#     with concurrent.futures.ProcessPoolExecutor(max_workers=16) as executor:
#         for i2, PEC_result, stc_result in executor.map(PEC_analysis, args): # Function and arguments
#             PEC_con_subject[i2] = PEC_result
#             stcs0[i2] = stc_result
    
#     # Save to list
#     PEC_data_list[i] = PEC_con_subject # [subject](epoch,ch,ch,freq)
#     STCs_list[i] = stcs0 # [subject][epoch,roi,timepoint]
    
#     # Print progress
#     print("Finished {} out of {} subjects".format(i+1,n_subjects))

# # Get current time
# c_time2 = time.localtime()
# c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
# print("Started", c_time1, "\nFinished",c_time2)

# with open(Feature_savepath+"PEC_each_epoch_drop_interpol_ch_fix_snr.pkl", "wb") as file:
#     pickle.dump(PEC_data_list, file)
# with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "wb") as file:
#     pickle.dump(STCs_list, file)

# # # # Load
# # with open(Feature_savepath+"PEC_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file:
# #     PEC_data_list = pickle.load(file)

# # # Load
# # with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file:
# #     STCs_list = pickle.load(file)

# # Average over eye status
# eye_status = list(source_epochs[0].event_id.keys())
# n_eye_status = len(eye_status)
# pec_data = np.zeros((n_subjects,n_eye_status,n_roi,n_roi,n_freq_bands))
# for i in range(n_subjects):
#     # Get indices for eyes open and closed
#     EC_index = source_epochs[i].events[:,2] == 1
#     EO_index = source_epochs[i].events[:,2] == 2
#     # Average over the indices and save to array
#     pec_data[i,0] = np.mean(PEC_data_list[i][EC_index], axis=0)
#     pec_data[i,1] = np.mean(PEC_data_list[i][EO_index], axis=0)
#     # Only use the lower diagonal as the diagonal should be 0 (or very small due to numerical errors)
#     # And it is symmetric
#     for f in range(n_freq_bands):
#         pec_data[i,0,:,:,f] = np.tril(pec_data[i,0,:,:,f],k=-1)
#         pec_data[i,1,:,:,f] = np.tril(pec_data[i,1,:,:,f],k=-1)

# # Also save as dataframe format for feature selection
# # Convert to Pandas dataframe
# # The dimensions will each be a column with numbers and the last column will be the actual values
# arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, pec_data.shape), indexing="ij"))) + [pec_data.ravel()])
# pec_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "chx", "chy", "Freq_band", "Value"])
# # Change from numerical coding to actual values
# eye_status = list(source_epochs[0].event_id.keys())
# freq_bands_name = list(Freq_Bands.keys())
# label_names = [label.name for label in labels]

# index_values = [Subject_id,eye_status,label_names,label_names,freq_bands_name]
# for col in range(len(index_values)):
#     col_name = pec_data_df.columns[col]
#     for shape in range(pec_data.shape[col]): # notice not dataframe but the array
#         pec_data_df.loc[pec_data_df.iloc[:,col] == shape,col_name]\
#         = index_values[col][shape]

# # Add group status