FeatureEstimation.py

PBF_data_df = PBF_data_df[(PBF_data_df["Brain_region"] == "Frontal")] # Frontal beta peak frequencys


# Save the dataframes
PAF_data_df.to_pickle(os.path.join(Feature_savepath,"PAF_data_FOOOF_df.pkl"))
PAF_data_df_global.to_pickle(os.path.join(Feature_savepath,"PAF_data_FOOOF_global_df.pkl"))
PTF_data_df.to_pickle(os.path.join(Feature_savepath,"PTF_data_FOOOF_df.pkl"))
PTF_data_df_global.to_pickle(os.path.join(Feature_savepath,"PTF_data_FOOOF_global_df.pkl"))
PBF_data_df.to_pickle(os.path.join(Feature_savepath,"PBF_data_FOOOF_df.pkl"))
PBF_data_df_global.to_pickle(os.path.join(Feature_savepath,"PBF_data_FOOOF_global_df.pkl"))
"""
# # Convert to Pandas dataframe (only keep exponent parameter for OOF)
# # The dimensions will each be a column with numbers and the last column will be the actual values
# ori = OOF_data[:,:,:,1]
# arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, ori.shape), indexing="ij"))) + [ori.ravel()])
# PAF_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Channel", "Value"])
# # Change from numerical coding to actual values

# index_values = [Subject_id,eye_status,ch_names]
# temp_df = PAF_data_df.copy() # make temp df to not sequential overwrite what is changed
# for col in range(len(index_values)):
#     col_name = PAF_data_df.columns[col]
#     for shape in range(ori.shape[col]):
#         temp_df.loc[PAF_data_df.iloc[:,col] == shape,col_name]\
#         = index_values[col][shape]
# OOF_data_df = temp_df # replace original df 

# # Add group status
# Group_status = np.array(["CTRL"]*len(OOF_data_df["Subject_ID"]))
# Group_status[np.array([i in cases for i in OOF_data_df["Subject_ID"]])] = "PTSD"
# # Add to dataframe
# OOF_data_df.insert(3, "Group_status", Group_status)

# # Regional OOF
# OOF_data_df.insert(4, "Brain_region", list(Brain_region)*int(PAF_data_df.shape[0]/len(Brain_region)))

# # Save the dataframes
# OOF_data_df.to_pickle(os.path.join(Feature_savepath,"OOF_data_FOOOF_df.pkl"))
"""
"""
# %% Microstate analysis
# The function takes the data as a numpy array (n_t, n_ch)
# The data is already re-referenced to common average
# Variables for the clustering function are extracted
sfreq = final_epochs[0].info["sfreq"]
eye_status = list(final_epochs[0].event_id.keys())
n_eye_status = len(eye_status)
ch_names = final_epochs[0].info["ch_names"]
n_channels = len(ch_names)
locs = np.zeros((n_channels,2)) # xy coordinates of the electrodes
for c in range(n_channels):
    locs[c] = final_epochs[0].info["chs"][c]["loc"][0:2]

# The epochs are transformed to numpy arrays
micro_data = []
EC_micro_data = []
EO_micro_data = []
for i in range(n_subjects):
    # Transform data to correct shape
    micro_data.append(final_epochs[i].get_data()) # get data
    arr_shape = micro_data[i].shape # get shape
    micro_data[i] = micro_data[i].swapaxes(1,2) # swap ch and time axis
    micro_data[i] = micro_data[i].reshape(arr_shape[0]*arr_shape[2],arr_shape[1]) # reshape by combining epochs and times
    # Get indices for eyes open and closed
    EC_index = final_epochs[i].events[:,2] == 1
    EO_index = final_epochs[i].events[:,2] == 2
    # Repeat with 4s * sample frequency to correct for concatenation of times and epochs
    EC_index = np.repeat(EC_index,4*sfreq)
    EO_index = np.repeat(EO_index,4*sfreq)
    # Save data where it is divided into eye status
    EC_micro_data.append(micro_data[i][EC_index])
    EO_micro_data.append(micro_data[i][EO_index])

# Global explained variance and Cross-validation criterion is used to determine number of microstates
# First all data is concatenated to find the optimal number of maps for all data
micro_data_all = np.vstack(micro_data)

# Determine the number of clusters
# I use a slightly modified kmeans function which returns the cv_min
"""
global_gev = []
cv_criterion = []
for n_maps in range(2,7):
    maps, L, gfp_peaks, gev, cv_min = kmeans_return_all(micro_data_all, n_maps)
    global_gev.append(np.sum(gev))
    cv_criterion.append(cv_min)
# Save run results
cluster_results = np.array([global_gev,cv_criterion])
np.save("Microstate_n_cluster_test_results.npy", cluster_results) # (gev/cv_crit, n_maps from 2 to 6)

#cluster_results = np.load("Microstate_n_cluster_test_results.npy")
#global_gev = cluster_results[0,:]
#cv_criterion = cluster_results[1,:]

# Evaluate best n_maps
plt.figure()
plt.plot(np.linspace(2,6,len(cv_criterion)),(cv_criterion/np.sum(cv_criterion)), label="CV Criterion")
plt.plot(np.linspace(2,6,len(cv_criterion)),(global_gev/np.sum(global_gev)), label="GEV")
plt.legend()
plt.ylabel("Normalized to total")
"""
# The lower CV the better.
# But the higher GEV the better.
# Based on the plots and the recommendation by vong Wegner & Laufs 2018
# we used 5 microstates

# In order to compare between groups, I fix the microstates by clustering on data from both groups
# Due to instability of maps when running multiple times, I increased n_maps from 4 to 6
n_maps = 5
mode = ["aahc", "kmeans", "kmedoids", "pca", "ica"][1]

# K-means is stochastic, thus I run it multiple times in order to find the maps with highest GEV
# Each K-means is run 5 times and best map is chosen. But I do this 10 times more, so in total 50 times!
n_run = 10
# Pre-allocate memory
microstate_cluster_results = []

# Parallel processing can only be implemented by ensuring different seeds
# Otherwise the iteration would be the same.
# However the k-means already use parallel processing so making outer loop with
# concurrent processes make it use too many processors
# Get current time
c_time1 = time.localtime()
c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
print(c_time1)
# Change datatype due to error with computational power in clustering 
EC_down = np.array(EC_micro_data, dtype = object)
#EC_down = EC_down.astype('float32')
EO_down = np.array(EO_micro_data, dtype = object)
#EO_down = EO_down.astype('float32')

for r in range(n_run):
    maps = [0]*2
    m_labels = [0]*2
    gfp_peaks = [0]*2
    gev = [0]*2
    # Eyes closed
    counter = 0
    maps_, x_, gfp_peaks_, gev_ = clustering(
        np.vstack(EC_down), sfreq, ch_names, locs, mode, n_maps, doplot=False) # doplot=True is bugged
    maps[counter] = maps_
    m_labels[counter] = x_
    gfp_peaks[counter] = gfp_peaks_
    gev[counter] = gev_
    counter += 1
    # Eyes open
    maps_, x_, gfp_peaks_, gev_ = clustering(
        np.vstack(EO_down), sfreq, ch_names, locs, mode, n_maps, doplot=False) # doplot=True is bugged
    maps[counter] = maps_
    m_labels[counter] = x_
    gfp_peaks[counter] = gfp_peaks_
    gev[counter] = gev_
    counter += 1
    
    microstate_cluster_results.append([maps, m_labels, gfp_peaks, gev])
    print("Finished {} out of {}".format(r+1, n_run))

# Get current time
c_time2 = time.localtime()
c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
print("Started", c_time1, "\nFinished",c_time2)

# Save the results
with open(Feature_savepath+"Microstate_5_maps_10x5_k_means_results.pkl", "wb") as file:
    pickle.dump(microstate_cluster_results, file)

# # Load
# with open(Feature_savepath+"Microstate_4_maps_10x5_k_means_results.pkl", "rb") as file:
#     microstate_cluster_results = pickle.load(file)

# Find the best maps (Highest GEV across all the K-means clusters)
EC_total_gevs = np.sum(np.vstack(np.array(microstate_cluster_results)[:,3,0]), axis=1) # (runs, maps/labels/gfp/gev, ec/eo)
EO_total_gevs = np.sum(np.vstack(np.array(microstate_cluster_results)[:,3,1]), axis=1)
Best_EC_idx = np.argmax(EC_total_gevs)
Best_EO_idx = np.argmax(EO_total_gevs)
# Update the variables for the best maps
maps = [microstate_cluster_results[Best_EC_idx][0][0],microstate_cluster_results[Best_EO_idx][0][1]]
m_labels = [microstate_cluster_results[Best_EC_idx][1][0],microstate_cluster_results[Best_EO_idx][1][1]]
gfp_peaks = [microstate_cluster_results[Best_EC_idx][2][0],microstate_cluster_results[Best_EO_idx][2][1]]
gev = [microstate_cluster_results[Best_EC_idx][3][0],microstate_cluster_results[Best_EO_idx][3][1]]

# Plot the maps
plt.style.use('default')
labels = ["EC", "EO"] #Eyes-closed, Eyes-open
for i in range(len(labels)):    
    fig, axarr = plt.subplots(1, n_maps, figsize=(20,5))
    fig.patch.set_facecolor('white')
    for imap in range(n_maps):
        mne.viz.plot_topomap(maps[i][imap,:], pos = final_epochs[0].info, axes = axarr[imap]) # plot
        axarr[imap].set_title("GEV: {:.2f}".format(gev[i][imap]), fontsize=16, fontweight="bold") # title
    fig.suptitle("Microstates: {}".format(labels[i]), fontsize=20, fontweight="bold")

# Manual re-order the maps
# Due the random initiation of K-means this have to be modified every time clusters are made!
# Assign map labels (e.g. 0, 2, 1, 3)
order = [0]*2
order[0] = [3,0,1,2,4] # EC
order[1] = [3,1,0,2,4] # EO
for i in range(len(order)):
    maps[i] = maps[i][order[i],:] # re-order maps
    gev[i] = gev[i][order[i]] # re-order GEV
    # Make directory to find and replace map labels
    dic0 = {value:key for key, value in enumerate(order[i])}
    m_labels[i][:] = [dic0.get(n, n) for n in m_labels[i]] # re-order labels

# The maps seems to be correlated both negatively and positively (see spatial correlation plots)
# Thus the sign of the map does not really reflect which areas are positive or negative (absolute)
# But more which areas are different during each state (relatively)
# I can therefore change the sign of the map for the visualizaiton
sign_swap = [[1,-1,1,1,1],[1,1,1,-1,1]]
for i in range(len(order)):
    for m in range(n_maps):
        maps[i][m] *= sign_swap[i][m]

# Plot the maps and save
save_path = "/home/s200431/Figures/Microstates"
labels = ["EC", "EO"]
for i in range(len(labels)):    
    fig, axarr = plt.subplots(1, n_maps, figsize=(20,5))
    fig.patch.set_facecolor('white')
    for imap in range(n_maps):
        mne.viz.plot_topomap(maps[i][imap,:], pos = final_epochs[0].info, axes = axarr[imap]) # plot
        axarr[imap].set_title("GEV: {:.2f}".format(gev[i][imap]), fontsize=16, fontweight="bold") # title
    fig.suptitle("Microstates: {} - Total GEV: {:.2f}".format(labels[i],sum(gev[i])), fontsize=20, fontweight="bold")
    # Save the figure
    fig.savefig(os.path.join(save_path,str("Microstates_{}".format(labels[i]) + ".png")))

# Calculate spatial correlation between maps and actual data points (topography)
# The sign of the map is changed so the correlation is positive
# By default the code looks for highest spatial correlation (regardless of sign)
# Thus depending on random initiation point the map might be opposite
plt.style.use('ggplot')
def spatial_correlation(data, maps):
    n_t = data.shape[0]
    n_ch = data.shape[1]
    data = data - data.mean(axis=1, keepdims=True)

    # GFP peaks
    gfp = np.std(data, axis=1)
    gfp_peaks = locmax(gfp)
    gfp_values = gfp[gfp_peaks]
    gfp2 = np.sum(gfp_values**2) # normalizing constant in GEV
    n_gfp = gfp_peaks.shape[0]

    # Spatial correlation
    C = np.dot(data, maps.T)
    C /= (n_ch*np.outer(gfp, np.std(maps, axis=1)))
    L = np.argmax(C**2, axis=1) # C is squared here which means the maps do no retain information about the sign of the correlation
    
    return C

C_EC = spatial_correlation(np.vstack(np.array(EC_micro_data)), maps[0])
C_EO = spatial_correlation(np.vstack(np.array(EO_micro_data)), maps[1])
C = [C_EC, C_EO]

# Plot the distribution of spatial correlation for each label and each map
labels = ["EC", "EO"]
for i in range(len(labels)):
    fig, axarr = plt.subplots(n_maps, n_maps, figsize=(16,16))
    for Lmap in range(n_maps):
        for Mmap in range(n_maps):
            sns.distplot(C[i][m_labels[i] == Lmap,Mmap], ax = axarr[Lmap,Mmap])
            axarr[Lmap,Mmap].set_xlabel("Spatial correlation")
    plt.suptitle("Distribution of spatial correlation_{}".format(labels[i]), fontsize=20, fontweight="bold")
    # Add common x and y axis labels by making one big axis
    fig.add_subplot(111, frameon=False)
    plt.tick_params(labelcolor="none", top="off", bottom="off", left="off", right="off") # hide tick labels and ticks
    plt.grid(False) # remove global grid
    plt.xlabel("Microstate number", labelpad=20)
    plt.ylabel("Label number", labelpad=10)
    fig.savefig(os.path.join(save_path,str("Microstates_Spatial_Correlation_Label_State_{}".format(labels[i]) + ".png")))

# Plot the distribution of spatial correlation for all data and each map
labels = ["EC", "EO"]
for i in range(len(labels)):
    fig, axarr = plt.subplots(1,n_maps, figsize=(20,5))
    for imap in range(n_maps):
        sns.distplot(C[i][:,imap], ax = axarr[imap])
        plt.xlabel("Spatial correlation")
    plt.suptitle("Distribution of spatial correlation", fontsize=20, fontweight="bold")
    # Add common x and y axis labels by making one big axis
    fig.add_subplot(111, frameon=False)
    plt.tick_params(labelcolor="none", top="off", bottom="off", left="off", right="off") # hide tick labels and ticks
    plt.grid(False) # remove global grid
    plt.xlabel("Microstate number", labelpad=20)
    plt.ylabel("Label number")

# Prepare for calculation of transition matrix
# I modified the function, so it takes the list argument gap_index
# gap_index should have the indices right before gaps in data

# Gaps: Between dropped epochs, trials (eo/ec) and subjects
# The between subjects gaps is removed by dividing the data into subjects
n_trials = 5
n_epoch_length = final_epochs[0].get_data().shape[2]

micro_labels = []
micro_subject_EC_idx = [0]
micro_subject_EO_idx = [0]
gaps_idx = []
gaps_trials_idx = []
for i in range(n_subjects):
    # Get indices for subject
    micro_subject_EC_idx.append(micro_subject_EC_idx[i]+EC_micro_data[i].shape[0])
    temp_EC = m_labels[0][micro_subject_EC_idx[i]:micro_subject_EC_idx[i+1]]
    # Get labels for subject i EO
    micro_subject_EO_idx.append(micro_subject_EO_idx[i]+EO_micro_data[i].shape[0])
    temp_EO = m_labels[1][micro_subject_EO_idx[i]:micro_subject_EO_idx[i+1]]
    # Save
    micro_labels.append([temp_EC,temp_EO]) # (subject, eye)
    
    # Get indices with gaps
    # Dropped epochs are first considered
    # Each epoch last 4s, which correspond to 2000 samples and a trial is 15 epochs - dropped epochs
    # Get epochs for each condition
    EC_drop_epochs = Drop_epochs_df.iloc[i,1:][Drop_epochs_df.iloc[i,1:] <= 75].to_numpy()
    EO_drop_epochs = Drop_epochs_df.iloc[i,1:][(Drop_epochs_df.iloc[i,1:] >= 75)&
                                            (Drop_epochs_df.iloc[i,1:] <= 150)].to_numpy()
    # Get indices for the epochs for EC that were dropped and correct for changing index due to drop
    EC_drop_epochs_gaps_idx = []
    counter = 0
    for d in range(len(EC_drop_epochs)):
        drop_epoch_number = EC_drop_epochs[d]
        Drop_epoch_idx = (drop_epoch_number-counter)*n_epoch_length # counter subtracted as the drop index is before dropped
        EC_drop_epochs_gaps_idx.append(Drop_epoch_idx-1) # -1 for point just before gap
        counter += 1
    # Negative index might occur if the first epochs were removed. This index is not needed for transition matrix
    if len(EC_drop_epochs_gaps_idx) > 0:
        for d in range(len(EC_drop_epochs_gaps_idx)): # check all, e.g. if epoch 0,1,2,3 are dropped then all should be caught
            if EC_drop_epochs_gaps_idx[0] == -1:
                EC_drop_epochs_gaps_idx = EC_drop_epochs_gaps_idx[1:len(EC_drop_epochs)]
    
    # Get indices for the epochs for EO that were dropped and correct for changing index due to drop
    EO_drop_epochs_gaps_idx = []
    counter = 0
    for d in range(len(EO_drop_epochs)):
        drop_epoch_number = EO_drop_epochs[d]-75
        Drop_epoch_idx = (drop_epoch_number-counter)*n_epoch_length # counter subtracted as the drop index is before dropped
        EO_drop_epochs_gaps_idx.append(Drop_epoch_idx-1) # -1 for point just before gap
        counter += 1
    # Negative index might occur if the first epoch was removed. This index is not needed for transition matrix
    if len(EO_drop_epochs_gaps_idx) > 0:
        for d in range(len(EO_drop_epochs_gaps_idx)): # check all, e.g. if epoch 0,1,2,3 are dropped then all should be caught
            if EO_drop_epochs_gaps_idx[0] == -1:
                EO_drop_epochs_gaps_idx = EO_drop_epochs_gaps_idx[1:len(EO_drop_epochs)]
    
    # Gaps between trials
    Trial_indices = [0, 15, 30, 45, 60, 75] # all the indices for start and end of the 5 trials
    EC_trial_gaps_idx = []
    EO_trial_gaps_idx = []
    counter_EC = 0
    counter_EO = 0
    for t in range(len(Trial_indices)-2): # -2 as start and end is not used in transition matrix
        temp_drop = EC_drop_epochs[(EC_drop_epochs >= Trial_indices[t])&
                            (EC_drop_epochs < Trial_indices[t+1])]
        # Correct the trial id for any potential drops within that trial
        counter_EC += len(temp_drop)
        trial_idx_corrected_for_drops = 15*(t+1)-counter_EC
        EC_trial_gaps_idx.append((trial_idx_corrected_for_drops*n_epoch_length)-1) # multiply id with length of epoch and subtract 1
        
        temp_drop = EO_drop_epochs[(EO_drop_epochs >= Trial_indices[t]+75)&
                            (EO_drop_epochs < Trial_indices[t+1]+75)]
        # Correct the trial id for any potential drops within that trial
        counter_EO += len(temp_drop)
        trial_idx_corrected_for_drops = 15*(t+1)-counter_EO
        EO_trial_gaps_idx.append((trial_idx_corrected_for_drops*n_epoch_length)-1) # multiply id with length of epoch and subtract 1
    
    # Concatenate all drop indices
    gaps_idx.append([np.unique(np.sort(EC_drop_epochs_gaps_idx+EC_trial_gaps_idx)),
                    np.unique(np.sort(EO_drop_epochs_gaps_idx+EO_trial_gaps_idx))])
    # Make on with trial gaps only for use in LRTC analysis
    gaps_trials_idx.append([EC_trial_gaps_idx,EO_trial_gaps_idx])

# Save the gap idx files
np.save("Gaps_idx.npy",np.array(gaps_idx))
np.save("Gaps_trials_idx.npy",np.array(gaps_trials_idx))

# %% Calculate microstate features
# Symbol distribution (also called ratio of time covered RTT)
# Transition matrix
# Shannon entropy
EC_p_hat = p_empirical(m_labels[0], n_maps)
EO_p_hat = p_empirical(m_labels[1], n_maps)
# Sanity check: Overall between EC and EO

microstate_time_data = np.zeros((n_subjects,n_eye_status,n_maps))
microstate_transition_data = np.zeros((n_subjects,n_eye_status,n_maps,n_maps))
microstate_entropy_data = np.zeros((n_subjects,n_eye_status))
microstate_orrurence_data = np.zeros((n_subjects,n_eye_status,n_maps))
microstate_mean_duration_data = np.zeros((n_subjects,n_eye_status,n_maps))
for i in range(n_subjects):
    # Calculate ratio of time covered
    temp_EC_p_hat = p_empirical(micro_labels[i][0], n_maps)
    temp_EO_p_hat = p_empirical(micro_labels[i][1], n_maps)

    # Calcuate number of occurences for each microstate
    for j in range(len(micro_labels[i][0])-1):
       if micro_labels[i][0][j] != micro_labels[i][0][j+1]:
            microstate_orrurence_data[i][0][micro_labels[i][0][j]] += 1
    for j in range(len(micro_labels[i][1])-1):
        if micro_labels[i][1][j] != micro_labels[i][1][j+1]:
            microstate_orrurence_data[i][1][micro_labels[i][1][j]] += 1

    # Calculate mean duration of each microstate
    for j in range(n_maps):
        microstate_mean_duration_data[i][0][j] = sum(micro_labels[i][0] == j)/microstate_orrurence_data[i][0][j]
        microstate_mean_duration_data[i][1][j] = sum(micro_labels[i][1] == j)/microstate_orrurence_data[i][1][j]

    # Calculate transition matrix
    """
    temp_EC_T_hat = T_empirical(micro_labels[i][0], n_maps, gaps_idx[i][0])
    temp_EO_T_hat = T_empirical(micro_labels[i][1], n_maps, gaps_idx[i][1])
    """
    temp_EC_T_hat = T_empirical(micro_labels[i][0], n_maps)
    temp_EO_T_hat = T_empirical(micro_labels[i][1], n_maps)
    # Calculate Shannon entropy
    temp_EC_h_hat = H_1(micro_labels[i][0], n_maps)
    temp_EO_h_hat = H_1(micro_labels[i][1], n_maps)
    
    # Save the data
    microstate_time_data[i,0,:] = temp_EC_p_hat
    microstate_time_data[i,1,:] = temp_EO_p_hat
    microstate_transition_data[i,0,:,:] = temp_EC_T_hat
    microstate_transition_data[i,1,:,:] = temp_EO_T_hat
    microstate_entropy_data[i,0] = temp_EC_h_hat/max_entropy(n_maps) # ratio of max entropy
    microstate_entropy_data[i,1] = temp_EO_h_hat/max_entropy(n_maps) # ratio of max entropy

# Save transition data
np.save(Feature_savepath+"microstate_transition_data.npy", microstate_transition_data)
# Convert transition data to dataframe for further processing with other features
# Transition matrix should be read as probability of row to column
microstate_transition_data_arr =\
     microstate_transition_data.reshape((n_subjects,n_eye_status,n_maps*n_maps)) # flatten 5 x 5 matrix to 1D
transition_info = ["M1->M1", "M1->M2", "M1->M3", "M1->M4", "M1->M5",
                   "M2->M1", "M2->M2", "M2->M3", "M2->M4", "M2-M5",
                   "M3->M1", "M3->M2", "M3->M3", "M3->M4", "M3->M5",
                   "M4->M1", "M4->M2", "M4->M3", "M4->M4", "M4->M5",
                   "M5->M1", "M5->M2", "M5->M3", "M5->M4", "M5->M5"]

arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, microstate_transition_data_arr.shape), indexing="ij"))) + [microstate_transition_data_arr.ravel()])
microstate_transition_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Transition", "Value"])
# Change from numerical coding to actual values
eye_status = list(final_epochs[0].event_id.keys())

index_values = [Subject_id,eye_status,transition_info]
for col in range(len(index_values)):
    col_name = microstate_transition_data_df.columns[col]
    for shape in reversed(range(microstate_transition_data_arr.shape[col])): # notice this is the shape of original numpy array. Not shape of DF
        microstate_transition_data_df.loc[microstate_transition_data_df.iloc[:,col] == shape,col_name]\
        = index_values[col][shape]

# Add group status
Group_status = np.array(["CTRL"]*len(microstate_transition_data_df["Subject_ID"]))
Group_status[np.array([i in cases for i in microstate_transition_data_df["Subject_ID"]])] = "PTSD"
# Add to dataframe
microstate_transition_data_df.insert(2, "Group_status", Group_status)

# Save df
microstate_transition_data_df.to_pickle(os.path.join(Feature_savepath,"microstate_transition_data_df.pkl"))

# Convert time covered data to Pandas dataframe
# Convert orrurence data to Pandas dataframe
# Convert mean duration data to Pandas dataframe
# The dimensions will each be a column with numbers and the last column will be the actual values
arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, microstate_time_data.shape), indexing="ij"))) + [microstate_time_data.ravel()])
arr_2 = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, microstate_orrurence_data.shape), indexing="ij"))) + [microstate_orrurence_data.ravel()])
arr_3 = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, microstate_mean_duration_data.shape), indexing="ij"))) + [microstate_mean_duration_data.ravel()])
microstate_time_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Microstate", "Value"])
microstate_orrurence_df = pd.DataFrame(arr_2, columns = ["Subject_ID", "Eye_status", "Microstate", "Value"])
microstate_mean_duration_df = pd.DataFrame(arr_3, columns = ["Subject_ID", "Eye_status", "Microstate", "Value"])

# Change from numerical coding to actual values
eye_status = list(final_epochs[0].event_id.keys())
microstates = [1,2,3,4,5]

index_values = [Subject_id,eye_status,microstates]
for col in range(len(index_values)):
    col_name = microstate_time_df.columns[col]
    col_name_2 = microstate_orrurence_df.columns[col]
    col_name_3 = microstate_mean_duration_df.columns[col]
    for shape in reversed(range(microstate_time_data.shape[col])): # notice this is the shape of original numpy array. Not shape of DF
        microstate_time_df.loc[microstate_time_df.iloc[:,col] == shape,col_name]\
        = index_values[col][shape]
        microstate_orrurence_df.loc[microstate_orrurence_df.iloc[:,col] == shape,col_name_2]\
        = index_values[col][shape]
        microstate_mean_duration_df.loc[microstate_mean_duration_df.iloc[:,col] == shape,col_name_3]\
        = index_values[col][shape]
# Reversed in inner loop is used to avoid sequencial data being overwritten.
# E.g. if 0 is renamed to 1, then the next loop all 1's will be renamed to 2

# Add group status
Group_status = np.array(["CTRL"]*len(microstate_time_df["Subject_ID"]))
Group_status[np.array([i in cases for i in microstate_time_df["Subject_ID"]])] = "PTSD"
Group_status_2 = np.array(["CTRL"]*len(microstate_orrurence_df["Subject_ID"]))
Group_status_2[np.array([i in cases for i in microstate_orrurence_df["Subject_ID"]])] = "PTSD"
Group_status_3 = np.array(["CTRL"]*len(microstate_mean_duration_df["Subject_ID"]))
Group_status_3[np.array([i in cases for i in microstate_mean_duration_df["Subject_ID"]])] = "PTSD"

# Add to dataframe
microstate_time_df.insert(2, "Group_status", Group_status)
microstate_orrurence_df.insert(2, "Group_status", Group_status_2)
microstate_mean_duration_df.insert(2, "Group_status", Group_status_3)

# Save df
microstate_time_df.to_pickle(os.path.join(Feature_savepath,"microstate_time_df.pkl"))
microstate_orrurence_df.to_pickle(os.path.join(Feature_savepath,"microstate_orrurence_df.pkl"))
microstate_mean_duration_df.to_pickle(os.path.join(Feature_savepath,"microstate_mean_duration_df.pkl"))

# Transition data - mean
# Get index for groups
PTSD_idx = np.array([i in cases for i in Subject_id])
CTRL_idx = np.array([not i in cases for i in Subject_id])
n_groups = 2

microstate_transition_data_mean = np.zeros((n_groups,n_eye_status,n_maps,n_maps))
microstate_transition_data_mean[0,:,:,:] = np.mean(microstate_transition_data[PTSD_idx,:,:,:], axis=0)
microstate_transition_data_mean[1,:,:,:] = np.mean(microstate_transition_data[CTRL_idx,:,:,:], axis=0)

# Convert entropy data to Pandas dataframe
# The dimensions will each be a column with numbers and the last column will be the actual values
arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, microstate_entropy_data.shape), indexing="ij"))) + [microstate_entropy_data.ravel()])
microstate_entropy_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Value"])
# Change from numerical coding to actual values
eye_status = list(final_epochs[0].event_id.keys())

index_values = [Subject_id,eye_status]
for col in range(len(index_values)):
    col_name = microstate_entropy_df.columns[col]
    for shape in reversed(range(microstate_entropy_data.shape[col])): # notice this is the shape of original numpy array. Not shape of DF
        microstate_entropy_df.loc[microstate_entropy_df.iloc[:,col] == shape,col_name]\
        = index_values[col][shape]
# Reversed in inner loop is used to avoid sequencial data being overwritten.
# E.g. if 0 is renamed to 1, then the next loop all 1's will be renamed to 2

# Add group status
Group_status = np.array(["CTRL"]*len(microstate_entropy_df["Subject_ID"]))
Group_status[np.array([i in cases for i in microstate_entropy_df["Subject_ID"]])] = "PTSD"
# Add to dataframe
microstate_entropy_df.insert(2, "Group_status", Group_status)
# Add dummy variable for re-using plot code
dummy_variable = ["Entropy"]*len(Group_status)
microstate_entropy_df.insert(3, "Measurement", dummy_variable)

# Save df
microstate_entropy_df.to_pickle(os.path.join(Feature_savepath,"microstate_entropy_df.pkl"))

# # %% Long-range temporal correlations (LRTC)
# """
# See Hardstone et al, 2012
# Hurst exponent estimation steps:
#     1. Preprocess
#     2. Band-pass filter for frequency band of interest
#     3. Hilbert transform to obtain amplitude envelope
#     4. Perform DFA
#         4.1 Compute cumulative sum of time series to create signal profile
#         4.2 Define set of window sizes (see below)
#         4.3 Remove the linear trend using least-squares for each window
#         4.4 Calculate standard deviation for each window and take the mean
#         4.5 Plot fluctuation function (Standard deviation) as function
#             for all window sizes, on double logarithmic scale
#         4.6 The DFA exponent alpha correspond to Hurst exponent
#             f(L) = sd = L^alpha (with alpha as linear coefficient in log plot)

# If 0 < alpha < 0.5: The process exhibits anti-correlations
# If 0.5 < alpha < 1: The process exhibits positive correlations
# If alpha = 0.5: The process is indistinguishable from a random process
# If 1.0 < alpha < 2.0: The process is non-stationary. H = alpha - 1

# Window sizes should be equally spaced on a logarithmic scale
# Sizes should be at least 4 samples and up to 10% of total signal length
# Filters can influence neighboring samples, thus filters should be tested
# on white noise to estimate window sizes that are unaffected by filters

# filter_length=str(2*1/fmin)+"s" # cannot be used with default transition bandwidth

# """
# # From simulations with white noise I determined window size thresholds for the 5 frequency bands:
# thresholds = [7,7,7,6.5,6.5]
# # And their corresponding log step sizes
# with open("LRTC_log_win_sizes.pkl", "rb") as filehandle:
#     log_win_sizes = pickle.load(filehandle)

# # Variables for the the different conditions
# # Sampling frequency
# sfreq = final_epochs[0].info["sfreq"]
# # Channels
# ch_names = final_epochs[0].info["ch_names"]
# n_channels = len(ch_names)
# # Frequency
# Freq_Bands = {"delta": [1.25, 4.0],
#               "theta": [4.0, 8.0],
#               "alpha": [8.0, 13.0],
#               "beta": [13.0, 30.0],
#               "gamma": [30.0, 49.0]}
# n_freq_bands = len(Freq_Bands)
# # Eye status
# eye_status = list(final_epochs[0].event_id.keys())
# n_eye_status = len(eye_status)

# ### Estimating Hurst exponent for the data
# # The data should be re-referenced to common average (Already done)

# # Data are transformed to numpy arrays
# # Then divided into EO and EC and further into each of the 5 trials
# # So DFA is estimated for each trial separately, which was concluded from simulations
# gaps_trials_idx = np.load("Gaps_trials_idx.npy") # re-used from microstate analysis
# n_trials = 5

# H_data = []
# for i in range(n_subjects):
#     # Transform data to correct shape
#     temp_arr = final_epochs[i].get_data() # get data
#     arr_shape = temp_arr.shape # get shape
#     temp_arr = temp_arr.swapaxes(1,2) # swap ch and time axis
#     temp_arr = temp_arr.reshape(arr_shape[0]*arr_shape[2],arr_shape[1]) # reshape by combining epochs and times
#     # Get indices for eyes open and closed
#     EC_index = final_epochs[i].events[:,2] == 1
#     EO_index = final_epochs[i].events[:,2] == 2
#     # Repeat with 4s * sample frequency to correct for concatenation of times and epochs
#     EC_index = np.repeat(EC_index,4*sfreq)
#     EO_index = np.repeat(EO_index,4*sfreq)
#     # Divide into eye status
#     EC_data = temp_arr[EC_index]
#     EO_data = temp_arr[EO_index]
#     # Divide into trials
#     EC_gap_idx = np.array([0]+list(gaps_trials_idx[i,0])+[len(EC_data)])
#     EO_gap_idx = np.array([0]+list(gaps_trials_idx[i,1])+[len(EO_data)])
    
#     EC_trial_data = []
#     EO_trial_data = []
#     for t in range(n_trials):
#         EC_trial_data.append(EC_data[EC_gap_idx[t]:EC_gap_idx[t+1]])
#         EO_trial_data.append(EO_data[EO_gap_idx[t]:EO_gap_idx[t+1]])
        
#     # Save data
#     H_data.append([EC_trial_data,EO_trial_data]) # output [subject][eye][trial][time,ch]

# # Calculate H for each subject, eye status, trial, freq and channel
# H_arr = np.zeros((n_subjects,n_eye_status,n_trials,n_channels,n_freq_bands))
# w_len = [len(ele) for ele in log_win_sizes]
# DFA_arr = np.empty((n_subjects,n_eye_status,n_trials,n_channels,n_freq_bands,2,np.max(w_len)))
# DFA_arr[:] = np.nan

# # Get current time
# c_time1 = time.localtime()
# c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
# print("Started",c_time1)

# # Nolds are already using all cores so multiprocessing with make it slower
# # Warning occurs when R2 is estimated during detrending - but R2 is not used
# warnings.simplefilter("ignore")
# for i in range(n_subjects):
#     # Pre-allocate memory
#     DFA_temp = np.empty((n_eye_status,n_trials,n_channels,n_freq_bands,2,np.max(w_len)))
#     DFA_temp[:] = np.nan
#     H_temp = np.empty((n_eye_status,n_trials,n_channels,n_freq_bands))
#     for e in range(n_eye_status):
#         for trial in range(n_trials):
#             for c in range(n_channels):
#                 # Get the data
#                 signal = H_data[i][e][trial][:,c]
                
#                 counter = 0 # prepare counter
#                 for fmin, fmax in Freq_Bands.values():
#                     # Filter for each freq band
#                     signal_filtered = mne.filter.filter_data(signal, sfreq=sfreq, verbose=0,
#                                                   l_freq=fmin, h_freq=fmax)
#                     # Hilbert transform
#                     analytic_signal = scipy.signal.hilbert(signal_filtered)
#                     # Get Amplitude envelope
#                     # np.abs is the same as np.linalg.norm, i.e. the length for complex input which is the amplitude
#                     ampltude_envelope = np.abs(analytic_signal)
#                     # Perform DFA using predefined window sizes from simulation
#                     a, dfa_data = nolds.dfa(ampltude_envelope,
#                                             nvals=np.exp(log_win_sizes[counter]).astype("int"),
#                                             debug_data=True)
#                     # Save DFA results
#                     DFA_temp[e,trial,c,counter,:,0:w_len[counter]] = dfa_data[0:2]
#                     H_temp[e,trial,c,counter] = a
#                     # Update counter
#                     counter += 1

#     # Print run status
#     print("Finished {} out of {}".format(i+1,n_subjects))
#     # Save the results
#     H_arr[i] = H_temp
#     DFA_arr[i] = DFA_temp

# warnings.simplefilter("default")

# # Get current time
# c_time2 = time.localtime()
# c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
# print("Started", c_time1, "\nCurrent Time",c_time2)

# # Save the DFA analysis data 
# np.save(Feature_savepath+"DFA_arr.npy", DFA_arr)
# np.save(Feature_savepath+"H_arr.npy", H_arr)

# # Load
# DFA_arr = np.load(Feature_savepath+"DFA_arr.npy")
# H_arr = np.load(Feature_savepath+"H_arr.npy")

# # Average the Hurst Exponent across trials
# H_arr = np.mean(H_arr, axis=2)

# # Convert to Pandas dataframe (Hurst exponent)
# # The dimensions will each be a column with numbers and the last column will be the actual values
# arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, H_arr.shape), indexing="ij"))) + [H_arr.ravel()])
# H_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Channel", "Freq_band", "Value"])
# # Change from numerical coding to actual values
# eye_status = list(final_epochs[0].event_id.keys())
# ch_name = final_epochs[0].info["ch_names"]

# index_values = [Subject_id,eye_status,ch_name,list(Freq_Bands.keys())]
# for col in range(len(index_values)):
#     col_name = H_data_df.columns[col]
#     for shape in range(H_arr.shape[col]): # notice this is the shape of original numpy array. Not shape of DF
#         H_data_df.loc[H_data_df.iloc[:,col] == shape,col_name]\
#         = index_values[col][shape]

# # Add group status
# Group_status = np.array(["CTRL"]*len(H_data_df["Subject_ID"]))
# Group_status[np.array([i in cases for i in H_data_df["Subject_ID"]])] = "PTSD"
# # Add to dataframe
# H_data_df.insert(2, "Group_status", Group_status)

# # Fix Freq_band categorical order
# H_data_df["Freq_band"] = H_data_df["Freq_band"].astype("category").\
#             cat.reorder_categories(list(Freq_Bands.keys()), ordered=True)

# # Global Hurst exponent
# H_data_df_global = H_data_df.groupby(["Subject_ID", "Eye_status", "Freq_band"]).mean().reset_index() # by default pandas mean skip nan
# # Add group status (cannot use group_by as each subject only have 1 group, not both)
# Group_status = np.array(["CTRL"]*len(H_data_df_global["Subject_ID"]))
# Group_status[np.array([i in cases for i in H_data_df_global["Subject_ID"]])] = "PTSD"
# # Add to dataframe
# H_data_df_global.insert(2, "Group_status", Group_status)
# # Add dummy variable for re-using plot code
# dummy_variable = ["Global Hurst Exponent"]*H_data_df_global.shape[0]
# H_data_df_global.insert(3, "Measurement", dummy_variable )

# # Save the data
# H_data_df.to_pickle(os.path.join(Feature_savepath,"H_data_df.pkl"))
# H_data_df_global.to_pickle(os.path.join(Feature_savepath,"H_data_global_df.pkl"))

# # %% Source localization of sensor data
# # Using non-interpolated channels
# # Even interpolated channels during preprocessing and visual inspection
# # are dropped

# # Prepare epochs for estimation of source connectivity
# source_epochs = [0]*n_subjects
# for i in range(n_subjects):
#     source_epochs[i] = final_epochs[i].copy()

# ### Make forward solutions
# # A forward solution is first made for all individuals with no dropped channels
# # Afterwards individual forward solutions are made for subjects with bad
# # channels that were interpolated in preprocessing and these are dropped
# # First forward operator is computed using a template MRI for each dataset
# fs_dir = "/home/glia/MNE-fsaverage-data/fsaverage"
# subjects_dir = os.path.dirname(fs_dir)
# trans = "fsaverage"
# src = os.path.join(fs_dir, "bem", "fsaverage-ico-5-src.fif")
# bem = os.path.join(fs_dir, "bem", "fsaverage-5120-5120-5120-bem-sol.fif")

# # Read the template sourcespace
# sourcespace = mne.read_source_spaces(src)

# temp_idx = 0 # Index with subject that had no bad channels
# subject_eeg = source_epochs[temp_idx].copy()
# subject_eeg.set_eeg_reference(projection=True) # needed for inverse modelling
# # Make forward solution
# fwd = mne.make_forward_solution(subject_eeg.info, trans=trans, src=src,
#                             bem=bem, eeg=True, mindist=5.0, n_jobs=1)
# # Save forward operator
# fname_fwd = "./Source_fwd/fsaverage-fwd.fif"
# mne.write_forward_solution(fname_fwd, fwd, overwrite=True)

# # A specific forward solution is also made for each subject with bad channels
# with open("./Preprocessing/bad_ch.pkl", "rb") as file:
#    bad_ch = pickle.load(file)

# All_bad_ch = bad_ch
# All_drop_epochs = dropped_epochs_df
# All_dropped_ch = []

# Bad_ch_idx = [idx for idx, item in enumerate(All_bad_ch) if item != 0]
# Bad_ch_subjects = All_drop_epochs["Subject_ID"][Bad_ch_idx]
# # For each subject with bad channels, drop the channels and make forward operator
# for n in range(len(Bad_ch_subjects)):
#     Subject = Bad_ch_subjects.iloc[n]
#     try:
#         Subject_idx = Subject_id.index(Subject)
#         # Get unique bad channels
#         Bad_ch0 = All_bad_ch[Bad_ch_idx[n]]
#         Bad_ch1 = []
#         for i2 in range(len(Bad_ch0)):
#             if type(Bad_ch0[i2]) == list:
#                 for i3 in range(len(Bad_ch0[i2])):
#                     Bad_ch1.append(Bad_ch0[i2][i3])
#             elif type(Bad_ch0[i2]) == str:
#                 Bad_ch1.append(Bad_ch0[i2])
#         Bad_ch1 = np.unique(Bad_ch1)
#         # Drop the bad channels
#         source_epochs[Subject_idx].drop_channels(Bad_ch1)
#         # Save the overview of dropped channels
#         All_dropped_ch.append([Subject,Subject_idx,Bad_ch1])
#         # Make forward operator
#         subject_eeg = source_epochs[Subject_idx].copy()
#         subject_eeg.set_eeg_reference(projection=True) # needed for inverse modelling
#         # Make forward solution
#         fwd = mne.make_forward_solution(subject_eeg.info, trans=trans, src=src,
#                                     bem=bem, eeg=True, mindist=5.0, n_jobs=1)
#         # Save forward operator
#         fname_fwd = "./Source_fwd/fsaverage_{}-fwd.fif".format(Subject)
#         mne.write_forward_solution(fname_fwd, fwd, overwrite=True)
#     except:
#         print(Subject,"was already dropped")

# with open("./Preprocessing/All_datasets_bad_ch.pkl", "wb") as filehandle:
#     pickle.dump(All_dropped_ch, filehandle)


# # %% Load forward operators
# # Re-use for all subjects without dropped channels
# fname_fwd = "./Source_fwd/fsaverage-fwd.fif"
# fwd = mne.read_forward_solution(fname_fwd)

# fwd_list = [fwd]*n_subjects

# # Use specific forward solutions for subjects with dropped channels
# with open("./Preprocessing/All_datasets_bad_ch.pkl", "rb") as file:
#    All_dropped_ch = pickle.load(file)

# for i in range(len(All_dropped_ch)):
#     Subject = All_dropped_ch[i][0]
#     Subject_idx = All_dropped_ch[i][1]
#     fname_fwd = "./Source_fwd/fsaverage_{}-fwd.fif".format(Subject)
#     fwd = mne.read_forward_solution(fname_fwd)
#     fwd_list[Subject_idx] = fwd

# # Check the correct number of channels are present in fwd
# random_point = int(np.random.randint(0,len(All_dropped_ch)-1,1))
# assert len(fwds[All_dropped_ch[random_point][1]].ch_names) == source_epochs[All_dropped_ch[random_point][1]].info["nchan"]

# # %% Make parcellation
# # After mapping to source space, I end up with 20484 vertices
# # but I wanted to map to fewer sources and not many more
# # Thus I need to perform parcellation
# # Get labels for FreeSurfer "aparc" cortical parcellation (example with 74 labels/hemi - Destriuex)
# labels_aparc = mne.read_labels_from_annot("fsaverage", parc="aparc.a2009s",
#                                     subjects_dir=subjects_dir)
# labels_aparc = labels_aparc[:-2] # remove unknowns

# labels_aparc_names = [label.name for label in labels_aparc]

# # Manually adding the 31 ROIs (14-lh/rh + 3 in midline) from Toll et al, 2020
# # Making fuction to take subset of a label
# def label_subset(label, subset, name="ROI_name"):
#     label_subset = mne.Label(label.vertices[subset], label.pos[subset,:],
#                          label.values[subset], label.hemi,
#                          name = "{}-{}".format(name,label.hemi),
#                          subject = label.subject, color = None)
#     return label_subset

# ### Visual area 1 (V1 and somatosensory cortex BA1-3)
# label_filenames = ["lh.V1.label", "rh.V1.label",
#                    "lh.BA1.label", "rh.BA1.label",
#                    "lh.BA2.label", "rh.BA2.label",
#                    "lh.BA3a.label", "rh.BA3a.label",
#                    "lh.BA3b.label", "rh.BA3b.label"]
# labels0 = [0]*len(label_filenames)
# for i, filename in enumerate(label_filenames):
#     labels0[i] = mne.read_label(os.path.join(fs_dir, "label", filename), subject="fsaverage")
# # Add V1 to final label variable
# labels = labels0[:2]
# # Rename to remove redundant hemi information
# labels[0].name = "V1-{}".format(labels[0].hemi)
# labels[1].name = "V1-{}".format(labels[1].hemi)
# # Assign a color
# labels[0].color = matplotlib.colors.to_rgba("salmon")
# labels[1].color = matplotlib.colors.to_rgba("salmon")
# # Combine Brodmann Areas for SMC. Only use vertices ones to avoid duplication error
# SMC_labels = labels0[2:]
# for hem in range(2):
#     SMC_p1 = SMC_labels[hem]
#     for i in range(1,len(SMC_labels)//2):
#         SMC_p2 = SMC_labels[hem+2*i]
#         p2_idx = np.isin(SMC_p2.vertices, SMC_p1.vertices, invert=True)
#         SMC_p21 = label_subset(SMC_p2, p2_idx, "SMC")
#         SMC_p1 = SMC_p1.__add__(SMC_p21)
#     SMC_p1.name = SMC_p21.name
#     # Assign a color
#     SMC_p1.color = matplotlib.colors.to_rgba("orange")
#     labels.append(SMC_p1)

# ### Inferior frontal junction
# # Located at junction between inferior frontal and inferior precentral sulcus
# label_aparc_names0 = ["S_front_inf","S_precentral-inf-part"]
# temp_labels = []
# for i in range(len(label_aparc_names0)):
#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
#     for i2 in range(len(labels_aparc_idx)):
#         temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())

# pos1 = temp_labels[0].pos
# pos2 = temp_labels[2].pos
# distm = scipy.spatial.distance.cdist(pos1,pos2)
# # Find the closest points between the 2 ROIs
# l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.001))[0]) # q chosen to correspond to around 10% of ROI
# l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[1]) # q chosen to correspond to around 10% of ROI

# IFJ_label_p1 = label_subset(temp_labels[0], l1_idx, "IFJ")
# IFJ_label_p2 = label_subset(temp_labels[2], l2_idx, "IFJ")
# # Combine the 2 parts
# IFJ_label = IFJ_label_p1.__add__(IFJ_label_p2)
# IFJ_label.name = IFJ_label_p1.name
# # Assign a color
# IFJ_label.color = matplotlib.colors.to_rgba("chartreuse")
# # Append to final list
# labels.append(IFJ_label)

# # Do the same for the right hemisphere
# pos1 = temp_labels[1].pos
# pos2 = temp_labels[3].pos
# distm = scipy.spatial.distance.cdist(pos1,pos2)
# # Find the closest points between the 2 ROIs
# l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.00075))[0]) # q chosen to correspond to around 10% of ROI
# l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[1]) # q chosen to correspond to around 10% of ROI
# IFJ_label_p1 = label_subset(temp_labels[1], l1_idx, "IFJ")
# IFJ_label_p2 = label_subset(temp_labels[3], l2_idx, "IFJ")
# # Combine the 2 parts
# IFJ_label = IFJ_label_p1.__add__(IFJ_label_p2)
# IFJ_label.name = IFJ_label_p1.name
# # Assign a color
# IFJ_label.color = matplotlib.colors.to_rgba("chartreuse")
# # Append to final list
# labels.append(IFJ_label)

# ### Intraparietal sulcus
# label_aparc_names0 = ["S_intrapariet_and_P_trans"]
# labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[0])]
# for i in range(len(labels_aparc_idx)):
#     labels.append(labels_aparc[labels_aparc_idx[i]].copy())
#     labels[-1].name = "IPS-{}".format(labels[-1].hemi)

# ### Frontal eye field as intersection between middle frontal gyrus and precentral gyrus
# label_aparc_names0 = ["G_front_middle","G_precentral"]
# temp_labels = []
# for i in range(len(label_aparc_names0)):
#     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
#     for i2 in range(len(labels_aparc_idx)):
#         temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())

# # Take 10% of middle frontal gyrus closest to precentral gyrus (most posterior)
# temp_label0 = temp_labels[0]
# G_fm_y = temp_label0.pos[:,1]
# thres_G_fm_y = np.sort(G_fm_y)[len(G_fm_y)//10]
# idx_p1 = np.where(G_fm_y<thres_G_fm_y)[0]
# FEF_label_p1 = label_subset(temp_label0, idx_p1, "FEF")
# # Take 10% closest for precentral gyrus (most anterior)
# temp_label0 = temp_labels[2]
# # I cannot only use y (anterior/posterior) but also need to restrict z-position
# G_pre_cen_z = temp_label0.pos[:,2]
# thres_G_pre_cen_z = 0.04 # visually inspected threshold
# G_pre_cen_y = temp_label0.pos[:,1]
# thres_G_pre_cen_y = np.sort(G_pre_cen_y[G_pre_cen_z>thres_G_pre_cen_z])[-len(G_pre_cen_y)//10] # notice - for anterior
# idx_p2 = np.where((G_pre_cen_y>thres_G_pre_cen_y) & (G_pre_cen_z>thres_G_pre_cen_z))[0]
# FEF_label_p2 = label_subset(temp_label0, idx_p2, "FEF")
# # Combine the 2 parts
# FEF_label = FEF_label_p1.__add__(FEF_label_p2)
# FEF_label.name = FEF_label_p1.name
# # Assign a color
# FEF_label.color = matplotlib.colors.to_rgba("aqua")
# # Append to final list
# labels.append(FEF_label)

# # Do the same for the right hemisphere
# temp_label0 = temp_labels[1]
# G_fm_y = temp_label0.pos[:,1]
# thres_G_fm_y = np.sort(G_fm_y)[len(G_fm_y)//10]
# idx_p1 = np.where(G_fm_y<thres_G_fm_y)[0]
# FEF_label_p1 = label_subset(temp_label0, idx_p1, "FEF")

# temp_label0 = temp_labels[3]
# G_pre_cen_z = temp_label0.pos[:,2]
# thres_G_pre_cen_z = 0.04 # visually inspected threshold
# G_pre_cen_y = temp_label0.pos[:,1]
# thres_G_pre_cen_y = np.sort(G_pre_cen_y[G_pre_cen_z>thres_G_pre_cen_z])[-len(G_pre_cen_y)//10] # notice - for anterior
# idx_p2 = np.where((G_pre_cen_y>thres_G_pre_cen_y) & (G_pre_cen_z>thres_G_pre_cen_z))[0]
# FEF_label_p2 = label_subset(temp_label0, idx_p2, "FEF")
# # Combine the 2 parts
# FEF_label = FEF_label_p1.__add__(FEF_label_p2)
# FEF_label.name = FEF_label_p1.name
# # Assign a color
# FEF_label.color = matplotlib.colors.to_rgba("aqua")
# # Append to final list