diff --git a/Main.py b/Main.py new file mode 100644 index 0000000000000000000000000000000000000000..06df78e100f0cd9658078f95350125319dc96e68 --- /dev/null +++ b/Main.py @@ -0,0 +1,1925 @@ +# -*- coding: utf-8 -*- +""" +Updated Aug 7 2024 + +@author: Qianliang Li (glia@dtu.dk) + +This is the main Python file containing the code that support the findings of +https://doi.org/10.1101/2024.05.06.592342 + +The data used in this analysis was previously described and preprocessed +by Zimmermann, M., Lomoriello, A. S., and Konvalinka, I. +Intra-individual behavioural and neural signatures of audience effects and +interactions in a mirror-game paradigm. Royal Society Open Science, 9(2) 2022 +""" + +# %% Load libraries +import os +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import mne +import pickle +import mat73 +import time +import seaborn as sns +import nolds +from tqdm import tqdm # progress bar + +# import Python script for microstates [von Wegner & Lauf, 2018] +# originally downloaded from https://github.com/Frederic-vW/eeg_microstates +# I modified the script for estimating two-brain microstates +# by defining kmeans_return_all and kmeans_dualmicro +from eeg_microstates3 import (kmeans_return_all, kmeans_dualmicro) +# import helper functions +from helper import (numpy_arr_to_pandas_df, time_now) + +from dualmicro_functions import (load_epoch_from_fieldtrip, prepare_1P_micro_arr, + plot_microstates, reorder_microstate_results, + single_micro_fit_all_feature_computation, + interbrain_microstate_feature_computation, + prepare_2P_micro_arr_collapsed_events, + plot_dualmicro, sign_swap_microstates, + dualmicro_fit_all_feature_computation, + load_microstate_arrays, + get_synch_events_from_pseudo_pairs, + combine_two_person_microstate_arrays, + pseudo_pair_dualmicro_backfitting, + dualmicro_fit_all_pseudo_pair_feature_computation, + compute_dualmicro_DFA, compute_dualmicro_DFA_pseudo, + shifted_interbrain_microstate_feature_computation) + +# Style for matplotlib/seaborn +plt.style.use('default') + +# Root for project +os.chdir("C:/Users/glia/Documents/MirrorGame") + +# Paths +data_path = "./data/external/EEG/" +mov_data_path = "./data/external/movement/" +fig_save_path = "./reports/figures/" +feat_save_path = "./data/features/" +microstate_save_path = "./data/features/microstates2/" +mov_save_path = "./data/features/movement/" + + +# %% Load preprocessed EEG data +# The data was originally preprocessed in Fieldtrip by Marius Zimmermann +# Get filenames for the EEG data +files = [] +for r, d, f in os.walk(data_path): + for file in f: + if (".mat" in file) & ("ppn" in file): + files.append(os.path.join(r, file)) +# Sort the filenames +files.sort() + +n_subjects = len(files) +# Get Subject_id +Subject_id = [0]*n_subjects +for i in range(n_subjects): + id_number = files[i].split("/")[-1].split(".")[0].split("pair")[-1].split("pair")[-1].replace("_ppn","") + Subject_id[i] = int(id_number)+1000 # add 1000 to keep the first 0 + +# There are data from 23 pairs +# Pair 21 and 25 were excluded in the original analysis +# After looking at the data, it seems pair 21, participant 1 and pair 25 +# participant 2 only had 1254 and 1440 epochs respectively. +# Their data also do not end with resting-state condition +# All the other EEG data have around 2400 1s epochs and start and ends with rest +bad_subjects = [1211, 1212, 1251, 1252] # the whole pair is dropped +good_subject_idx = [not i in bad_subjects for i in Subject_id] +# Update Subject_id and files +Subject_id = list(np.array(Subject_id)[good_subject_idx]) +n_subjects = len(Subject_id) +files = list(np.array(files)[good_subject_idx]) + +Pair_id = [0]*(n_subjects//2) +for i in range(n_subjects//2): + Pair_id[i] = int(str(Subject_id[2*i])[1:-1]) +# Add 100 to pair_id to fix sorting for 1 digit numbers, e.g. 03 +Pair_id = [ele+100 for ele in Pair_id] +n_pairs = len(Pair_id) + +# Save the IDs as environmental variables to be used in functions +# from dualmicro_functions.py +os.environ["Subject_id"] = Subject_id +os.environ["Pair_id"] = Pair_id + +event_id = {"rest":1, "uncoupled":2, "coupled": 3, "observe, actor": 4, + "observe, observer": 6, "imitate, leader": 5, "imitate, follower": 7, + "control": 8} + +# Clarification of the labels +# Cond4: ppn1 is observer, ppn2 is actor +# Cond6: ppn1 is actor, ppn2 is observer +# Cond5: ppn1 is follower, ppn2 is leader +# Cond5: ppn1 is leader, ppn2 is follower + +event_id_inv = {v: k for k, v in event_id.items()} + +# We collapsed condition 4 and 6 & 5 and 7 for two-brain microstates +# By swapping the EEG of ppn1 and ppn2 so ppn1 is always observer/follower and +# ppn2 actor/leader +collapsed_event_id = {"rest":1, "uncoupled":2, "coupled": 3, + "observer_actor": 4, "follower_leader": 5, "control": 8} +collapsed_event_id_inv = {v: k for k, v in collapsed_event_id.items()} + +# Load the first EEG to get info about sfreq and n_channels +i = 0 +epoch, trialinfo = load_epoch_from_fieldtrip(0, files, event_id) +n_channels = epoch.info["nchan"] +sfreq = int(epoch.info["sfreq"]) + +# Visualize the data +# epoch.plot(scalings=40e-6, n_channels=32) +# mne.viz.plot_events(epoch.events, sfreq = 1, event_id = event_id, first_samp=-3) # sfreq set to epoch length in s to reflect experiment time + +# We compute microstates for the three frequency ranges +alpha_range = [8.0, 13.0] +beta_range = [13.0, 30.0] +broadband_range = None # Data is already 1 to 40 Hz broadband filtered +freq_names = ["alpha","beta","broadband"] +all_freq_ranges = [alpha_range, beta_range, broadband_range] + +# %% Intrabrain microstates fit all data +# All subjects from all pairs are concatenated to find common microstates +single_brain_event_id = {"rest":1, "uncoupled":2, "coupled": 3, "observer": 4, + "actor": 6, "follower": 5, "leader": 7, "control": 8} +ppn2_correction = {6:4, 4:6, 7:5, 5:7} + +# Loop over frequencies +for f in len(all_freq_ranges): + ff = freq_names[f] + freq_range0 = all_freq_ranges[f] + # ========================================================================= + # First the microstate topographies are determined + # It might be an advantage to run the estimation of microstates on a HPC + # ========================================================================= + # Get data from all pairs before performing kmeans + np.random.seed(1234) + n_clusters=[3, 4, 5, 6, 7, 8, 9, 10] + n_runs = 100 # increased to 100 runs! + # Get current time + c_time1 = time_now(); print(c_time1) + # Save RAM by appending directly to array instead of making list and then array + sub_arr_indices = [0] + trialinfo_list = [] + for i in range(n_subjects): + tmp_data, trialinfo = prepare_1P_micro_arr(i, ppn2_correction, sfreq, + freq_range=freq_range0, standardize=True) + sub_arr_indices.append(len(tmp_data)) + trialinfo_list.append([Subject_id[i],trialinfo]) + if i == 0: # first run initiation + micro_data_all = tmp_data + else: + micro_data_all = np.append(micro_data_all, tmp_data, axis=0) + del tmp_data # clear up space + print(f"Finished preparing microstate data for pair {Subject_id[i]}") + + # Use cumulative sum to determine indices for each subjects's data + subject_indices = np.cumsum(sub_arr_indices) + + # Save the trialinfos from all subjects, for easier access in later steps + with open(f"{microstate_save_path}Intrabrain_microstate_fit_all_{ff}_trialinfos.pkl", "wb") as filehandle: + pickle.dump(trialinfo_list, filehandle) + + # # with args parser in hpc + # n_maps = n_clusters[(args.map_idx-1)] + # print(f"Running analysis for maps: {n_maps}") + # print("Memory used by the micro data array (GB):",micro_data_all.nbytes*9.31e-10) + + # Run Kmeans + for n_maps in n_clusters: # Don't use for loop on the HPC! + # Run the 100 runs in batches of 10 to save underway in case the job script terminates + best_cv_crit = 9999 # initialize unreasonably high value + for r in range(10): + microstate_results = list(kmeans_return_all(micro_data_all, n_maps, + n_runs=int(n_runs/10),maxiter=1000)) + # Overwrite the maps if a lower CV criterion was found for the initiation + if microstate_results[4] < best_cv_crit: + microstate_results.append(subject_indices) + # Save results + with open(f"{microstate_save_path}Intrabrain_microstate_fit_all_{ff}{n_maps}.pkl", "wb") as filehandle: + pickle.dump(microstate_results, filehandle) # [maps, L, gfp_peaks, gev, cv_min, Subject_id] + print(f"Updated the microstates. Previous best CV: {best_cv_crit}", + f"new best CV criterion : {microstate_results[4]}") + # Update best cv criterion value + best_cv_crit = microstate_results[4] + + print(f"Finished sub-run {r+1} out of 10") + + print(f"Finished microstate analysis for n_maps = {n_maps}") + print("Started", c_time1, "\nCurrent",time_now()) + + # ========================================================================= + # Evaluate microstates fitted to all data + # ========================================================================= + # Get summary results + microstate_summary_results = [] + + for n_maps in n_clusters: + with open(f"{microstate_save_path}Intrabrain_microstate_fit_all_{ff}{n_maps}.pkl", "rb") as file: + microstate_results = pickle.load(file) + # Also save summary results across n_maps + microstate_summary_results.append([microstate_results[0],microstate_results[3],microstate_results[4]]) + + # Use CV criterion to estimate best number of microstates + cv_gev_arr = np.zeros((len(n_clusters),2)) + for imap in range(len(n_clusters)): + gev = np.sum(microstate_summary_results[imap][1]) + cv = microstate_summary_results[imap][2] + cv_gev_arr[imap,:] = [cv, gev] + + # Convert to Pandas dataframe + col_names = ["n_Microstates", "Fit_Criteria", "Value"] + Fit_Criteria = ["CV Criterion", "Global Explained Variance"] + dtypes = [int,str,"float64"] + + cv_gev_df = numpy_arr_to_pandas_df(cv_gev_arr, col_names = col_names, + col_values = [n_clusters,Fit_Criteria], + dtypes = dtypes) + + # Evaluate optimal n_Microstates + h_order = Fit_Criteria + g = sns.FacetGrid(data=cv_gev_df,row=None, + margin_titles=True, height=8, aspect=1.2) + g = g.map(sns.pointplot,"n_Microstates", "Value", "Fit_Criteria", + dodge=0, capsize=0.18, errorbar=None, linestyles=["-", "-"], + markers=["o", "o"], hue_order=h_order, palette=sns.color_palette()) + g.add_legend() + plt.subplots_adjust(top=0.9, right=0.85, left=0.1) + g.fig.suptitle("Mean CV Criterion and GEV", fontsize=18) + g.set_axis_labels(x_var="Number of Microstates", + y_var="GEV and CV", + fontsize=14) + # The lower CV the better. Measure of residual variance + # But the higher GEV the better. + # Save file + g.savefig(f"{fig_save_path}Microstates/Fit_all_{ff}/"+"Single_micro_fit_all_{ff}_CV_Criterion_GEV"+".png") + + # Count which number of microstates have the lowest cv criterion for each subject + min_idx = np.argmin(cv_gev_df.loc[cv_gev_df["Fit_Criteria"]=="CV Criterion","Value"]) + cv_gev_df.loc[cv_gev_df["Fit_Criteria"]=="CV Criterion"].iloc[min_idx] + + # Visualize all microstates prior to re-ordering + for ii in range(len(n_clusters)): + plot_microstates(n_clusters[ii], microstate_summary_results[ii][0], microstate_summary_results[ii][1], epoch.info) + + # ========================================================================= + # # Re-order intrabrain microstates + # ========================================================================= + # This is only run once, after microstates are created + # The optimal number of microstates were 5, with 56% GEV + n_maps = 5 + ii = n_clusters.index(n_maps) + + with open(f"{microstate_save_path}Intrabrain_microstate_fit_all_{ff}{n_maps}.pkl", "rb") as file: + microstate_results = pickle.load(file) + + maps, m_labels, gfp_peaks, gev, cv_min, sub_idx = microstate_results + + plot_microstates(n_maps, maps, gev) + + # Make dictionary with n_maps and new order + manual_reordering_template = {"5_alpha":[4,1,3,2,0], + "5_beta":[3,2,1,4,0], + "5_broadband":[3,2,4,1,0]} + new_order = manual_reordering_template[f"{n_maps}_{ff}"] + + # Re-order the microstates + maps, gev, m_labels = reorder_microstate_results(new_order, maps, gev, m_labels) + + # Plot again to check it worked + plot_microstates(n_maps, maps, gev, epoch.info) + + # Since neuronal activity is often oscillating, this causes polarity inversions + # Microstates ignores the sign, and hence the polarity in the map is arbitrary + # It is only the relative difference within the plot that is interesting + # depending on initiation. We can thus freely change the sign for visualization + # For two-person microstates, each person's map is sign-changed separately + manual_sign_correction = {"5_alpha":[1,-1,1,1,1], + "5_beta":[1,1,1,-1,-1], + "5_broadband":[-1,1,-1,1,1]} + sign_swap = manual_sign_correction[f"{n_maps}_{ff}"] + for m in range(n_maps): + maps[m] *= sign_swap[m] + + # Plot a final time for last confirmation + plot_microstates(n_maps, maps, gev, epoch.info) + + # Close all figures + plt.close("all") + + ### Save reordered results + n_maps = 5 + ii = n_clusters.index(n_maps) + + with open(f"{microstate_save_path}Intrabrain_microstate_fit_all_{ff}{n_maps}.pkl", "rb") as file: + microstate_results = pickle.load(file) + maps, m_labels, gfp_peaks, gev, cv, sub_indices = microstate_results + # Re-order + new_order = manual_reordering_template[str(n_maps)] + maps, gev, m_labels = reorder_microstate_results(new_order, maps, gev, m_labels) + # Sign swap + for m in range(n_maps): + maps[m] *= sign_swap[m] + # Overwrite variable + microstate_results = maps, m_labels, gfp_peaks, gev, cv, sub_indices + # Save to new file + with open(f"{microstate_save_path}Reordered/Intrabrain_microstate_fit_all_{ff}{n_maps}.pkl", "wb") as filehandle: + pickle.dump(microstate_results, filehandle) # [maps, L, gfp_peaks, gev, cv_min, sub_idx] + + # Save topomaps for the microstates + save_path = f"{fig_save_path}Microstates/Fit_all_{ff}/" + with open(f"{microstate_save_path}Reordered/Intrabrain_microstate_fit_all_{ff}{n_maps}.pkl", "rb") as file: + microstate_results = pickle.load(file) + maps, m_labels, gfp_peaks, gev, cv_min, sub_idx = microstate_results + fig = plot_microstates(n_maps, maps, gev, epoch.info) + fig.savefig(save_path+f"Intrabrain_fit_all_{ff}_maps{n_maps}"+".png") + # Save svg for Paper + fig.savefig(save_path+f"Intrabrain_fit_all_{ff}_maps{n_maps}"+".svg") + + # ========================================================================= + # # Estimate one-person microstate metrics/features + # # There might be a small error introduced due to gaps in the time series from + # # dropped segments, e.g. when calculating the transition probability as + # # the time series is discontinuous due to the gaps. But with the high sampling rate + # # only a very small fraction of the samples have discontinuous neighbors + # ========================================================================= + # The observer_actor and observer_observe conditions have been separated + # So there are observer and actor conditions. + # And the same for leader and follower. + """ + Overview of common (intrabrain) microstate features: + 1. Average duration a given microstate remains stable (Dur) + 2. Frequency occurrence, independent of individual duration (Occ) + Average number of times a microstate becomes dominant per second + 3. Ratio of total Time Covered (TCo) + 4. Transition probabilities (TMx) + 5. Ratio of shannon entropy relative to theoretical max chaos (Ent) + """ + # Hard-coded the optimal number of microstates based on CV criterion and GEV for dualmicro + n_maps = 5 + # Load all microstate results + with open(f"{microstate_save_path}Reordered/Intrabrain_microstate_fit_all_{ff}{n_maps}.pkl", "rb") as file: + microstate_results = pickle.load(file) + # Load all trialinfos + with open(f"{microstate_save_path}Intrabrain_microstate_fit_all_{ff}_trialinfos.pkl", "rb") as file: + trialinfo_list = pickle.load(file) + + Microstate_names = [chr(ele) for ele in range(65,65+n_maps)] + + m_labels = [0]*n_subjects + events = [0]*n_subjects + m_feats = [0]*n_subjects + + for i in range(n_subjects): + m_labels[i], events[i], m_feats[i] = single_micro_fit_all_feature_computation(i, + n_maps, microstate_results, trialinfo_list, sfreq, event_id, single_brain_event_id) + print(f"Finished computing microstate features for Subject {Subject_id[i]}") + + # Save the raw microstate features + with open(f"{microstate_save_path}/raw_features_single_micro_fit_all_{ff}_maps{n_maps}.pkl", "wb") as filehandle: + pickle.dump(m_feats, filehandle) # [Subject][Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr] [Event, map*] + # * the feature is calculated for each map, where applicable. + # Transition matrix is calculated for each map -> map transition probability + + # with open(f"{microstate_save_path}/raw_features_single_micro_fit_all_{ff}_maps{n_maps}.pkl", "rb") as file: + # m_feats = pickle.load(file) # [Subject][Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr] [Event, map*] + + ### Convert all features to dataframes for further processing + col_names = ["Subject_ID", "Event_ID", "Microstate", "Value"] + col_values = [Subject_id,list(single_brain_event_id.keys()),Microstate_names] + dtypes = ["int64",str,str,"float64"] + # Mean duration + Dur_arr = np.stack([ele[0] for ele in m_feats]) # [Subject, event, n_map] + Dur_df = numpy_arr_to_pandas_df(Dur_arr, col_names, col_values, dtypes) + # Add dummy variable to enabling combining of dataframes + measurement_id = ["Duration"]*len(Dur_df) + Dur_df.insert(2, "Measurement", measurement_id) + # Save df + Dur_df.to_pickle(os.path.join(microstate_save_path,f"Single_micro_fit_all_{ff}_maps{n_maps}_duration_df.pkl")) + + # Frequency of occurrence per sec + Occ_arr = np.stack([ele[1] for ele in m_feats]) # [Subject, event, n_map] + Occ_df = numpy_arr_to_pandas_df(Occ_arr, col_names, col_values, dtypes) + # Add dummy variable to enabling combining of dataframes + measurement_id = ["Occurrence"]*len(Occ_df) + Occ_df.insert(2, "Measurement", measurement_id) + # Save df + Occ_df.to_pickle(os.path.join(microstate_save_path,f"Single_micro_fit_all_{ff}_maps{n_maps}_occurrence_df.pkl")) + + # Ratio total Time Covered + TCo_arr = np.stack([ele[2] for ele in m_feats]) # [Subject, event, n_map] + TCo_df = numpy_arr_to_pandas_df(TCo_arr, col_names, col_values, dtypes) + # Add dummy variable to enabling combining of dataframes + measurement_id = ["Time_covered"]*len(TCo_df) + TCo_df.insert(2, "Measurement", measurement_id) + # Save df + TCo_df.to_pickle(os.path.join(microstate_save_path,f"Single_micro_fit_all_{ff}_maps{n_maps}_ratio_time_covered_df.pkl")) + + # Transition matrix should be read as probability of row to column + xi, xj = np.meshgrid(Microstate_names,Microstate_names) + _, arrow = np.meshgrid(Microstate_names,["->"]*n_maps) + + transition_info = np.char.add(np.char.add(xj,arrow),xi) + + TMx_arr = np.stack([ele[3] for ele in m_feats]) # [Subject, event, n_map, n_map] + TMx_arr = TMx_arr.reshape((n_subjects,len(single_brain_event_id),n_maps*n_maps)) # Flatten the maps to 1D + + col_names = ["Subject_ID", "Event_ID", "Transition", "Value"] + col_values = [Subject_id,list(single_brain_event_id.keys()),transition_info.flatten()] + TMx_df = numpy_arr_to_pandas_df(TMx_arr, col_names, col_values, dtypes) + # Add dummy variable to enabling combining of dataframes + measurement_id = ["Probability"]*len(TMx_df) + TMx_df.insert(2, "Measurement", measurement_id) + # Save df + TMx_df.to_pickle(os.path.join(microstate_save_path,f"Single_micro_fit_all_{ff}_maps{n_maps}_transition_df.pkl")) + + # Entropy + Ent_arr = np.stack([ele[4] for ele in m_feats]) # [Subject, event] + col_names = ["Subject_ID", "Event_ID", "Value"] + col_values = [Subject_id,list(single_brain_event_id.keys())] + dtypes = ["int64",str,"float64"] + Ent_df = numpy_arr_to_pandas_df(Ent_arr, col_names, col_values, dtypes) + # Add dummy variable to enabling combining of dataframes + measurement_id = ["Entropy"]*len(Ent_df) + Ent_df.insert(2, "Measurement", measurement_id) + # Save df + Ent_df.to_pickle(os.path.join(microstate_save_path,f"Single_micro_fit_all_{ff}_maps{n_maps}_ratio_entropy_df.pkl")) + +# ========================================================================= +# We also did it for 8 alpha microstates to use the same number as +# the two-brain microstates +# ========================================================================= +# This is only run once, after microstates are created +ff = "alpha" +n_maps = 8 +ii = n_clusters.index(n_maps) + +with open(f"{microstate_save_path}Intrabrain_microstate_fit_all_{ff}{n_maps}.pkl", "rb") as file: + microstate_results = pickle.load(file) + +maps, m_labels, gfp_peaks, gev, cv_min, sub_idx = microstate_results + +plot_microstates(n_maps, maps, gev) + +# Make dictionary with n_maps and new order +manual_reordering_template = {"8":[6,0,5,1,7,2,3,4]} +new_order = manual_reordering_template[str(n_maps)] + +# Re-order the microstates +maps, gev, m_labels = reorder_microstate_results(new_order, maps, gev, m_labels) + +# Plot again to check it worked +plot_microstates(n_maps, maps, gev, epoch.info) + +# Since neuronal activity is often oscillating, this causes polarity inversions +# Microstates ignores the sign, and hence the polarity in the map is arbitrary +# It is only the relative difference within the plot that is interesting +# depending on initiation. We can thus freely change the sign for visualization +# For two-person microstates, each person's map is sign-changed separately +manual_sign_correction = {"8":[-1,1,-1,1,1,1,-1,-1]} +sign_swap = manual_sign_correction[str(n_maps)] +for m in range(n_maps): + maps[m] *= sign_swap[m] + +# Plot a final time for last confirmation +plot_microstates(n_maps, maps, gev, epoch.info) + +# Close all figures +plt.close("all") + +### Save reordered results +n_maps = 8 +ii = n_clusters.index(n_maps) + +with open(f"{microstate_save_path}Intrabrain_microstate_fit_all_{ff}{n_maps}.pkl", "rb") as file: + microstate_results = pickle.load(file) +maps, m_labels, gfp_peaks, gev, cv, sub_indices = microstate_results +# Re-order +new_order = manual_reordering_template[str(n_maps)] +maps, gev, m_labels = reorder_microstate_results(new_order, maps, gev, m_labels) +# Sign swap +for m in range(n_maps): + maps[m] *= sign_swap[m] +# Overwrite variable +microstate_results = maps, m_labels, gfp_peaks, gev, cv, sub_indices +# Save to new file +with open(f"{microstate_save_path}Reordered/Intrabrain_microstate_fit_all_{ff}{n_maps}.pkl", "wb") as filehandle: + pickle.dump(microstate_results, filehandle) # [maps, L, gfp_peaks, gev, cv_min, sub_idx] + +# Save topomaps for the microstates +save_path = f"{fig_save_path}Microstates/Fit_all_{ff}/" +with open(f"{microstate_save_path}Reordered/Intrabrain_microstate_fit_all_{ff}{n_maps}.pkl", "rb") as file: + microstate_results = pickle.load(file) +maps, m_labels, gfp_peaks, gev, cv_min, sub_idx = microstate_results +fig = plot_microstates(n_maps, maps, gev, epoch.info) +fig.savefig(save_path+f"Intrabrain_fit_all_{ff}_maps{n_maps}"+".png") +# Save svg for Paper +fig.savefig(save_path+f"Intrabrain_fit_all_{ff}_maps{n_maps}"+".svg") + +# ========================================================================= +# # Estimate one-person microstate metrics/features +# # There might be a small error introduced due to gaps in the time series from +# # dropped segments, e.g. when calculating the transition probability as +# # the time series is discontinuous due to the gaps. But with the high sampling rate +# # only a very small fraction of the samples have discontinuous neighbors +# ========================================================================= +# The observer_actor and observer_observe conditions have been separated +# So there are observer and actor conditions. +# And the same for leader and follower. +""" +Overview of common (intrabrain) microstate features: + 1. Average duration a given microstate remains stable (Dur) + 2. Frequency occurrence, independent of individual duration (Occ) + Average number of times a microstate becomes dominant per second + 3. Ratio of total Time Covered (TCo) + 4. Transition probabilities (TMx) + 5. Ratio of shannon entropy relative to theoretical max chaos (Ent) +""" +# Hard-coded the optimal number of microstates based on CV criterion and GEV for dualmicro +n_maps = 8 +# Load all microstate results +with open(f"{microstate_save_path}Reordered/Intrabrain_microstate_fit_all_{ff}{n_maps}.pkl", "rb") as file: + microstate_results = pickle.load(file) +# Load all trialinfos +with open(f"{microstate_save_path}Intrabrain_microstate_fit_all_{ff}_trialinfos.pkl", "rb") as file: + trialinfo_list = pickle.load(file) + +Microstate_names = [chr(ele) for ele in range(65,65+n_maps)] + +m_labels = [0]*n_subjects +events = [0]*n_subjects +m_feats = [0]*n_subjects + +for i in range(n_subjects): + m_labels[i], events[i], m_feats[i] = single_micro_fit_all_feature_computation(i, + n_maps, microstate_results, trialinfo_list, sfreq, event_id, single_brain_event_id) + print(f"Finished computing microstate features for Subject {Subject_id[i]}") + +# Save the raw microstate features +with open(f"{microstate_save_path}/raw_features_single_micro_fit_all_{ff}_maps{n_maps}.pkl", "wb") as filehandle: + pickle.dump(m_feats, filehandle) # [Subject][Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr] [Event, map*] + # * the feature is calculated for each map, where applicable. + # Transition matrix is calculated for each map -> map transition probability + +# with open(f"{microstate_save_path}/raw_features_single_micro_fit_all_{ff}_maps{n_maps}.pkl", "rb") as file: +# m_feats = pickle.load(file) # [Subject][Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr] [Event, map*] + +### Convert all features to dataframes for further processing +col_names = ["Subject_ID", "Event_ID", "Microstate", "Value"] +col_values = [Subject_id,list(single_brain_event_id.keys()),Microstate_names] +dtypes = ["int64",str,str,"float64"] +# Mean duration +Dur_arr = np.stack([ele[0] for ele in m_feats]) # [Subject, event, n_map] +Dur_df = numpy_arr_to_pandas_df(Dur_arr, col_names, col_values, dtypes) +# Add dummy variable to enabling combining of dataframes +measurement_id = ["Duration"]*len(Dur_df) +Dur_df.insert(2, "Measurement", measurement_id) +# Save df +Dur_df.to_pickle(os.path.join(microstate_save_path,f"Single_micro_fit_all_{ff}_maps{n_maps}_duration_df.pkl")) + +# Frequency of occurrence per sec +Occ_arr = np.stack([ele[1] for ele in m_feats]) # [Subject, event, n_map] +Occ_df = numpy_arr_to_pandas_df(Occ_arr, col_names, col_values, dtypes) +# Add dummy variable to enabling combining of dataframes +measurement_id = ["Occurrence"]*len(Occ_df) +Occ_df.insert(2, "Measurement", measurement_id) +# Save df +Occ_df.to_pickle(os.path.join(microstate_save_path,f"Single_micro_fit_all_{ff}_maps{n_maps}_occurrence_df.pkl")) + +# Ratio total Time Covered +TCo_arr = np.stack([ele[2] for ele in m_feats]) # [Subject, event, n_map] +TCo_df = numpy_arr_to_pandas_df(TCo_arr, col_names, col_values, dtypes) +# Add dummy variable to enabling combining of dataframes +measurement_id = ["Time_covered"]*len(TCo_df) +TCo_df.insert(2, "Measurement", measurement_id) +# Save df +TCo_df.to_pickle(os.path.join(microstate_save_path,f"Single_micro_fit_all_{ff}_maps{n_maps}_ratio_time_covered_df.pkl")) + +# Transition matrix should be read as probability of row to column +xi, xj = np.meshgrid(Microstate_names,Microstate_names) +_, arrow = np.meshgrid(Microstate_names,["->"]*n_maps) + +transition_info = np.char.add(np.char.add(xj,arrow),xi) + +TMx_arr = np.stack([ele[3] for ele in m_feats]) # [Subject, event, n_map, n_map] +TMx_arr = TMx_arr.reshape((n_subjects,len(single_brain_event_id),n_maps*n_maps)) # Flatten the maps to 1D + +col_names = ["Subject_ID", "Event_ID", "Transition", "Value"] +col_values = [Subject_id,list(single_brain_event_id.keys()),transition_info.flatten()] +TMx_df = numpy_arr_to_pandas_df(TMx_arr, col_names, col_values, dtypes) +# Add dummy variable to enabling combining of dataframes +measurement_id = ["Probability"]*len(TMx_df) +TMx_df.insert(2, "Measurement", measurement_id) +# Save df +TMx_df.to_pickle(os.path.join(microstate_save_path,f"Single_micro_fit_all_{ff}_maps{n_maps}_transition_df.pkl")) + +# Entropy +Ent_arr = np.stack([ele[4] for ele in m_feats]) # [Subject, event] +col_names = ["Subject_ID", "Event_ID", "Value"] +col_values = [Subject_id,list(single_brain_event_id.keys())] +dtypes = ["int64",str,"float64"] +Ent_df = numpy_arr_to_pandas_df(Ent_arr, col_names, col_values, dtypes) +# Add dummy variable to enabling combining of dataframes +measurement_id = ["Entropy"]*len(Ent_df) +Ent_df.insert(2, "Measurement", measurement_id) +# Save df +Ent_df.to_pickle(os.path.join(microstate_save_path,f"Single_micro_fit_all_{ff}_maps{n_maps}_ratio_entropy_df.pkl")) + +# %% Inter-brain microstates fit all data +# Based on the microstate topographies estimated on single-brian data +""" +Interbrain features: + 1. Average duration of common interbrain microstates (IBDur) + 2. Frequency occurrence of common interbrain microstates in the pair (IBOcc) + 3. Ratio of total time covered by interbrain common microstates in the pair (IBCov) + 4. Transition probability towards common interbrain microstates in the pair (IBTMx) + 5. Ratio of joint shannon entropy relative to theoretical max chaos (IBEnt) +""" + +for f in len(all_freq_ranges): + ff = freq_names[f] + # Hard-coded the optimal number of microstates based on CV criterion and GEV + n_maps = 5 + # Load all microstate results + with open(f"{microstate_save_path}Reordered/Intrabrain_microstate_fit_all_{ff}{n_maps}.pkl", "rb") as file: + microstate_results = pickle.load(file) + # Load all trialinfos + with open(f"{microstate_save_path}Intrabrain_microstate_fit_all_{ff}_trialinfos.pkl", "rb") as file: + trialinfo_list = pickle.load(file) + + Microstate_names = [chr(ele) for ele in range(65,65+n_maps)] + # Insert Z as the symbol for non common microstate + Microstate_names.insert(0,"Z") + + m_labels = [0]*(n_subjects//2) + events = [0]*(n_subjects//2) + m_feats = [0]*(n_subjects//2) + Pair_id = [0]*(n_subjects//2) + + for i in range(n_subjects//2): + m_labels[i], events[i], m_feats[i] = interbrain_microstate_feature_computation(i, + n_maps, microstate_results, trialinfo_list, sfreq, event_id, collapsed_event_id) + Pair_id[i] = int(str(Subject_id[2*i])[1:-1]) + print(f"Finished computing interbrain microstate features for pair {Pair_id[i]}") + + Pair_id = [ele+100 for ele in Pair_id] + + # Save the raw microstate features + with open(f"{microstate_save_path}/raw_interbrain_single_micro_fit_all_{ff}_maps{n_maps}.pkl", "wb") as filehandle: + pickle.dump([Pair_id, m_feats], filehandle) # [Subject][Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr] [Event, map*] + # * the feature is calculated for each map, where applicable. + # Transition matrix is calculated for each map -> map transition probability + # The first row and column correspond to the non common microstate, i.e. + # there is a different microstate in the pair + + # with open(f"{microstate_save_path}/raw_interbrain_single_micro_fit_all_{ff}_maps.pkl", "rb") as file: + # Pair_id, m_feats = pickle.load(file) # [Subject][Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr] [Event, map*] + + n_pairs = len(Pair_id) + + ### Convert all features to dataframes for further processing + col_names = ["Pair_ID", "Event_ID", "Microstate", "Value"] + col_values = [Pair_id,list(collapsed_event_id.keys()),Microstate_names] + dtypes = [int,str,str,"float64"] + # Mean duration + Dur_arr = np.stack([ele[0] for ele in m_feats]) # [Subject, event, n_map] + Dur_df = numpy_arr_to_pandas_df(Dur_arr, col_names, col_values, dtypes) + # Add dummy variable to enabling combining of dataframes + measurement_id = ["Duration"]*len(Dur_df) + Dur_df.insert(2, "Measurement", measurement_id) + # Save df + Dur_df.to_pickle(os.path.join(microstate_save_path,f"IB_Single_micro_fit_all_{ff}_maps{n_maps}_duration_df.pkl")) + + # Frequency of occurrence per sec + Occ_arr = np.stack([ele[1] for ele in m_feats]) # [Subject, event, n_map] + Occ_df = numpy_arr_to_pandas_df(Occ_arr, col_names, col_values, dtypes) + # Add dummy variable to enabling combining of dataframes + measurement_id = ["Occurrence"]*len(Occ_df) + Occ_df.insert(2, "Measurement", measurement_id) + # Save df + Occ_df.to_pickle(os.path.join(microstate_save_path,f"IB_Single_micro_fit_all_{ff}_maps{n_maps}_occurrence_df.pkl")) + + # Ratio total Time Covered + TCo_arr = np.stack([ele[2] for ele in m_feats]) # [Subject, event, n_map] + TCo_df = numpy_arr_to_pandas_df(TCo_arr, col_names, col_values, dtypes) + # Add dummy variable to enabling combining of dataframes + measurement_id = ["Time_covered"]*len(TCo_df) + TCo_df.insert(2, "Measurement", measurement_id) + # Save df + TCo_df.to_pickle(os.path.join(microstate_save_path,f"IB_Single_micro_fit_all_{ff}_maps{n_maps}_ratio_time_covered_df.pkl")) + + # Transition matrix should be read as probability of row to column + xi, xj = np.meshgrid(Microstate_names,Microstate_names) + _, arrow = np.meshgrid(Microstate_names,["->"]*(n_maps+1)) + + transition_info = np.char.add(np.char.add(xj,arrow),xi) + + TMx_arr = np.stack([ele[3] for ele in m_feats]) # [Subject, event, n_map, n_map] + TMx_arr = TMx_arr.reshape((n_pairs,len(collapsed_event_id),(n_maps+1)*(n_maps+1))) # Flatten the maps to 1D + + col_names = ["Pair_ID", "Event_ID", "Transition", "Value"] + col_values = [Pair_id,list(collapsed_event_id.keys()),transition_info.flatten()] + TMx_df = numpy_arr_to_pandas_df(TMx_arr, col_names, col_values, dtypes) + # Add dummy variable to enabling combining of dataframes + measurement_id = ["Probability"]*len(TMx_df) + TMx_df.insert(2, "Measurement", measurement_id) + # Save df + TMx_df.to_pickle(os.path.join(microstate_save_path,f"IB_Single_micro_fit_all_{ff}_maps{n_maps}_transition_df.pkl")) + + # Entropy + Ent_arr = np.stack([ele[4] for ele in m_feats]) # [Subject, event] + col_names = ["Pair_ID", "Event_ID", "Value"] + col_values = [Pair_id,list(collapsed_event_id.keys())] + dtypes = [int, str, "float64"] + Ent_df = numpy_arr_to_pandas_df(Ent_arr, col_names, col_values, dtypes) + # Add dummy variable to enabling combining of dataframes + measurement_id = ["Entropy"]*len(Ent_df) + Ent_df.insert(2, "Measurement", measurement_id) + # Save df + Ent_df.to_pickle(os.path.join(microstate_save_path,f"IB_Single_micro_fit_all_{ff}_maps{n_maps}_ratio_joint_entropy_df.pkl")) + +# ========================================================================= +# Repeat for 8 alpha microstates +# ========================================================================= +ff = "alpha" +n_maps = 8 +# Load all microstate results +with open(f"{microstate_save_path}Reordered/Intrabrain_microstate_fit_all_{ff}{n_maps}.pkl", "rb") as file: + microstate_results = pickle.load(file) +# Load all trialinfos +with open(f"{microstate_save_path}Intrabrain_microstate_fit_all_{ff}_trialinfos.pkl", "rb") as file: + trialinfo_list = pickle.load(file) + +Microstate_names = [chr(ele) for ele in range(65,65+n_maps)] +# Insert Z as the symbol for non common microstate +Microstate_names.insert(0,"Z") + +m_labels = [0]*(n_subjects//2) +events = [0]*(n_subjects//2) +m_feats = [0]*(n_subjects//2) +Pair_id = [0]*(n_subjects//2) + +for i in range(n_subjects//2): + m_labels[i], events[i], m_feats[i] = interbrain_microstate_feature_computation(i, + n_maps, microstate_results, trialinfo_list, sfreq, event_id, collapsed_event_id) + Pair_id[i] = int(str(Subject_id[2*i])[1:-1]) + print(f"Finished computing interbrain microstate features for pair {Pair_id[i]}") + +Pair_id = [ele+100 for ele in Pair_id] + +# Save the raw microstate features +with open(f"{microstate_save_path}/raw_interbrain_single_micro_fit_all_{ff}_maps{n_maps}.pkl", "wb") as filehandle: + pickle.dump([Pair_id, m_feats], filehandle) # [Subject][Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr] [Event, map*] + # * the feature is calculated for each map, where applicable. + # Transition matrix is calculated for each map -> map transition probability + # The first row and column correspond to the non common microstate, i.e. + # there is a different microstate in the pair + +# with open(f"{microstate_save_path}/raw_interbrain_single_micro_fit_all_{ff}_maps.pkl", "rb") as file: +# Pair_id, m_feats = pickle.load(file) # [Subject][Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr] [Event, map*] + +n_pairs = len(Pair_id) + +### Convert all features to dataframes for further processing +col_names = ["Pair_ID", "Event_ID", "Microstate", "Value"] +col_values = [Pair_id,list(collapsed_event_id.keys()),Microstate_names] +dtypes = [int,str,str,"float64"] +# Mean duration +Dur_arr = np.stack([ele[0] for ele in m_feats]) # [Subject, event, n_map] +Dur_df = numpy_arr_to_pandas_df(Dur_arr, col_names, col_values, dtypes) +# Add dummy variable to enabling combining of dataframes +measurement_id = ["Duration"]*len(Dur_df) +Dur_df.insert(2, "Measurement", measurement_id) +# Save df +Dur_df.to_pickle(os.path.join(microstate_save_path,f"IB_Single_micro_fit_all_{ff}_maps{n_maps}_duration_df.pkl")) + +# Frequency of occurrence per sec +Occ_arr = np.stack([ele[1] for ele in m_feats]) # [Subject, event, n_map] +Occ_df = numpy_arr_to_pandas_df(Occ_arr, col_names, col_values, dtypes) +# Add dummy variable to enabling combining of dataframes +measurement_id = ["Occurrence"]*len(Occ_df) +Occ_df.insert(2, "Measurement", measurement_id) +# Save df +Occ_df.to_pickle(os.path.join(microstate_save_path,f"IB_Single_micro_fit_all_{ff}_maps{n_maps}_occurrence_df.pkl")) + +# Ratio total Time Covered +TCo_arr = np.stack([ele[2] for ele in m_feats]) # [Subject, event, n_map] +TCo_df = numpy_arr_to_pandas_df(TCo_arr, col_names, col_values, dtypes) +# Add dummy variable to enabling combining of dataframes +measurement_id = ["Time_covered"]*len(TCo_df) +TCo_df.insert(2, "Measurement", measurement_id) +# Save df +TCo_df.to_pickle(os.path.join(microstate_save_path,f"IB_Single_micro_fit_all_{ff}_maps{n_maps}_ratio_time_covered_df.pkl")) + +# Transition matrix should be read as probability of row to column +xi, xj = np.meshgrid(Microstate_names,Microstate_names) +_, arrow = np.meshgrid(Microstate_names,["->"]*(n_maps+1)) + +transition_info = np.char.add(np.char.add(xj,arrow),xi) + +TMx_arr = np.stack([ele[3] for ele in m_feats]) # [Subject, event, n_map, n_map] +TMx_arr = TMx_arr.reshape((n_pairs,len(collapsed_event_id),(n_maps+1)*(n_maps+1))) # Flatten the maps to 1D + +col_names = ["Pair_ID", "Event_ID", "Transition", "Value"] +col_values = [Pair_id,list(collapsed_event_id.keys()),transition_info.flatten()] +TMx_df = numpy_arr_to_pandas_df(TMx_arr, col_names, col_values, dtypes) +# Add dummy variable to enabling combining of dataframes +measurement_id = ["Probability"]*len(TMx_df) +TMx_df.insert(2, "Measurement", measurement_id) +# Save df +TMx_df.to_pickle(os.path.join(microstate_save_path,f"IB_Single_micro_fit_all_{ff}_maps{n_maps}_transition_df.pkl")) + +# Entropy +Ent_arr = np.stack([ele[4] for ele in m_feats]) # [Subject, event] +col_names = ["Pair_ID", "Event_ID", "Value"] +col_values = [Pair_id,list(collapsed_event_id.keys())] +dtypes = [int, str, "float64"] +Ent_df = numpy_arr_to_pandas_df(Ent_arr, col_names, col_values, dtypes) +# Add dummy variable to enabling combining of dataframes +measurement_id = ["Entropy"]*len(Ent_df) +Ent_df.insert(2, "Measurement", measurement_id) +# Save df +Ent_df.to_pickle(os.path.join(microstate_save_path,f"IB_Single_micro_fit_all_{ff}_maps{n_maps}_ratio_joint_entropy_df.pkl")) + + +# %% Two-brain microstates fit all data +""" +The two observe and imitate conditions are collapesed +Instead of having ppn1 being observer/follower in 8 trials and actor/leader +in 8 trials, we will fix the topomap from "ppn1, top row" to always be +observer and follower. This means for condition 6 and 7, ppn2 will be treated +as ppn1 so the first topomap is still being fitted to the observer/follower! +So the first microstate (top row) will always correspond to the Observer and Follower +And the 2nd paired microstate (bot row) will always correspond to Actor and Leader + +Additionally we compute features for 8 trials and then take the average instead +of all 16. This is done in order to compute it for the asymmetrical trials +without flipping, as the flip itself can create artefacts. + +And the same process is repeated for the symmetrical conditions to be consistent,, +although it shouldn't have a big impact for those trials +""" + +# Compute two-person microstates for each pair, fitted for all data +# We will concatenate the pairs along the channel axis +# Loop over frequencies +for f in len(all_freq_ranges): + ff = freq_names[f] + freq_range0 = all_freq_ranges[f] + # ========================================================================= + # First the microstate topographies are determined + # It might be an advantage to run the estimation of microstates on a HPC + # ========================================================================= + # Get data from all pairs before performing kmeans + np.random.seed(1234) + n_clusters=[3, 4, 5, 6, 7, 8, 9, 10] + n_runs = 100 # increased to 100 runs! + # Get current time + c_time1 = time_now(); print(c_time1) + # Save RAM by appending directly to array instead of making list and then array + pair_arr_indices = [0] + trialinfo_list = [] + events_list = [] + for i in range(n_pairs): + tmp_data, tmp_trialinfo, tmp_events = prepare_2P_micro_arr_collapsed_events(i, sfreq, event_id, freq_range=freq_range0, standardize=True) + pair_arr_indices.append(len(tmp_data)) + trialinfo_list.append(tmp_trialinfo) + events_list.append(tmp_events) + if i == 0: # first run initiation + micro_data_all = tmp_data + else: + micro_data_all = np.append(micro_data_all,tmp_data, axis=0) + del tmp_data # clear up space + print(f"Finished preparing microstate data for pair {Pair_id[i]}") + + # Use cumulative sum to determine indices for each pair's data + pair_indices = np.cumsum(pair_arr_indices) + + # Save the trialinfos and events from all pairs, for easier access in later steps + with open(f"{microstate_save_path}Dualmicro_fit_all_{ff}_trial_events_infos.pkl", "wb") as filehandle: + pickle.dump([Pair_id,trialinfo_list,events_list], filehandle) # [maps, L, gfp_peaks, gev, cv_min, pair_idx] + + + # # with args parser in hpc + # n_maps = n_clusters[(args.map_idx-1)] + # print(f"Running analysis for maps: {n_maps}") + # print("Memory used by the micro data array (GB):",micro_data_all.nbytes*9.31e-10) + + for n_maps in n_clusters: # Don't use for loop on the HPC! + # Run the 100 runs in batches of 10 to save underway in case the job script terminates + best_cv_crit = 9999 # initialize unreasonably high value + for r in range(10): + microstate_results = list(kmeans_dualmicro(micro_data_all, n_maps, + n_runs=int(n_runs/10),maxiter=1000)) + # Overwrite the maps if a lower CV criterion was found for the initiation + if microstate_results[4] < best_cv_crit: + microstate_results.append(pair_indices) + # Save results + with open(f"{microstate_save_path}Dualmicro_fit_all_{ff}_data_maps{n_maps}.pkl", "wb") as filehandle: + pickle.dump(microstate_results, filehandle) # [maps, L, gfp_peaks, gev, cv_min, pair_idx] + print(f"Updated the microstates. Previous best CV: {best_cv_crit}", + f"new best CV criterion : {microstate_results[4]}") + # Update best cv criterion value + best_cv_crit = microstate_results[4] + + print(f"Finished sub-run {r+1} out of 10") + + print(f"Finished microstate analysis for n_maps = {n_maps}") + print("Started", c_time1, "\nCurrent",time_now()) + + # ========================================================================= + # # Evaluate microstates fitted to all data + # ========================================================================= + # Get summary results + microstate_summary_results = [] + + for n_maps in n_clusters: + with open(f"{microstate_save_path}Dualmicro_fit_all_{ff}_data_maps{n_maps}.pkl", "rb") as file: + microstate_results = pickle.load(file) + # Also save summary results across n_maps + microstate_summary_results.append([microstate_results[0],microstate_results[3],microstate_results[4]]) + + # Use CV criterion to estimate best number of microstates + cv_gev_arr = np.zeros((len(n_clusters),2)) + for imap in range(len(n_clusters)): + gev = np.sum(microstate_summary_results[imap][1]) + cv = microstate_summary_results[imap][2] + cv_gev_arr[imap,:] = [cv, gev] + + # Convert to Pandas dataframe + col_names = ["n_Microstates", "Fit_Criteria", "Value"] + Fit_Criteria = ["CV Criterion", "Global Explained Variance"] + dtypes = [int,str,"float64"] + + cv_gev_df = numpy_arr_to_pandas_df(cv_gev_arr, col_names = col_names, col_values = [n_clusters,Fit_Criteria], + dtypes = dtypes) + # Evaluate optimal n_Microstates + h_order = Fit_Criteria + g = sns.FacetGrid(data=cv_gev_df,row=None, + margin_titles=True, height=8, aspect=1.5) + g = g.map(sns.pointplot,"n_Microstates", "Value", "Fit_Criteria", + dodge=0, capsize=0.18, errorbar=None, linestyles=["-", "-"], + markers=["o", "o"], hue_order=h_order, palette=sns.color_palette()) + g.add_legend() + plt.subplots_adjust(top=0.9, right=0.85, left=0.1) + g.fig.suptitle("Mean CV Criterion and GEV", fontsize=18) + g.set_axis_labels(x_var="Number of Microstates", + y_var="GEV and CV", + fontsize=14) + # The lower CV the better. Measure of residual variance + # But the higher GEV the better. + # Save file + g.savefig(f"{fig_save_path}Microstates/Fit_all_{ff}/"+"Dualmicro_fit_all_{ff}_CV_Criterion_GEV"+".png") + + # Count which number of microstates have the lowest cv criterion for each subject + min_idx = np.argmin(cv_gev_df.loc[cv_gev_df["Fit_Criteria"]=="CV Criterion","Value"]) + cv_gev_df.loc[cv_gev_df["Fit_Criteria"]=="CV Criterion"].iloc[min_idx] + + # Visualize the microstates + # Prior to re-ordering + for ii in range(len(n_clusters)): + plot_dualmicro(n_clusters[ii], microstate_summary_results[ii][0], microstate_summary_results[ii][1], epoch.info) + + # ========================================================================= + # # Re-order two-person microstates + # # This is only run once, after microstates are created + # # We only do it for 8 microstates, which was the optimal number + # ========================================================================= + n_maps = 8 + ii = n_clusters.index(n_maps) + + with open(f"{microstate_save_path}Dualmicro_fit_all_{ff}_data_maps{n_maps}.pkl", "rb") as file: + microstate_results = pickle.load(file) + + maps, m_labels, gfp_peaks, gev, cv_min, pair_idx = microstate_results + + plot_dualmicro(n_maps, maps, gev, epoch.info) + + # Make dictionary with n_maps and new order + # All 4 top row consecutively, followed by 4 bot row + manual_reordering_template = {"8_alpha":[5,2,7,0,1,4,6,3], + "8_beta":[4,1,3,6,7,5,0,2], + "8_broadband":[6,3,4,0,2,1,5,7]} + + new_order = manual_reordering_template[f"{n_maps}_{ff}"] + + maps, gev, m_labels = reorder_microstate_results(new_order, maps, gev, m_labels) + + # Plot again to check it worked + plot_dualmicro(n_maps, maps, gev, epoch.info) + + # Since neuronal activity is often oscillating, this causes polarity inversions + # Microstates ignores the sign, and hence the polarity in the map is arbitrary + # It is only the relative difference within the plot that is interesting + # depending on initiation. We can thus freely change the sign for visualization + # For two-person microstates, each person's map is sign-changed separately + manual_sign_correction = {"8_alpha":[[-1,-1,-1,1,-1,1,1,1],[1,1,1,-1,-1,1,1,1]], + "8_beta":[[-1,-1,-1,-1,-1,1,1,1],[-1,-1,1,-1,-1,1,1,-1]], + "8_broadband":[[1,1,-1,-1,1,-1,-1,-1],[-1,1,-1,-1,1,-1,-1,-1]]} + sign_swap = manual_sign_correction[f"{n_maps}_{ff}"] + + maps = sign_swap_microstates(sign_swap, maps, n_maps, n_channels) + + # Plot a final time for last confirmation + plot_dualmicro(n_maps, maps, gev, epoch.info) + + # Close all figures and repeat by changing n_maps + plt.close("all") + + ### Save reordered results + n_maps = 8 + ii = n_clusters.index(n_maps) + + with open(f"{microstate_save_path}Dualmicro_fit_all_{ff}_data_maps{n_maps}.pkl", "rb") as file: + microstate_results = pickle.load(file) + maps, m_labels, gfp_peaks, gev, cv_min, pair_idx = microstate_results + # Re-order + new_order = manual_reordering_template[str(n_maps)] + maps, gev, m_labels = reorder_microstate_results(new_order, maps, gev, m_labels) + # Sign alignment + maps = sign_swap_microstates(sign_swap, maps, n_maps, n_channels) + # Overwrite variable + microstate_results = maps, m_labels, gfp_peaks, gev, cv_min, pair_idx + # Save to new file + with open(f"{microstate_save_path}Reordered/Dualmicro_fit_all_{ff}_data_maps{n_maps}.pkl", "wb") as filehandle: + pickle.dump(microstate_results, filehandle) # [maps, L, gfp_peaks, gev, cv_min, pair_idx] + + # Save topomaps for the microstates + save_path = f"{fig_save_path}Microstates/Fit_all_{ff}/" + with open(f"{microstate_save_path}Reordered/Dualmicro_fit_all_{ff}_data_maps{n_maps}.pkl", "rb") as file: + microstate_results = pickle.load(file) + maps, m_labels, gfp_peaks, gev, cv_min, pair_idx = microstate_results + fig = plot_dualmicro(n_maps, maps, gev, epoch.info) + fig.savefig(save_path+f"Dualmicro_fit_all_{ff}_maps{n_maps}"+".png") + + # Save svg for Paper + fig.savefig(save_path+f"Dualmicro_fit_all_{ff}_maps{n_maps}"+".svg") + + ### Save svg with fixed color scales across all microstates + vlims = (np.min(maps), np.max(maps)) + + fig = plot_dualmicro(n_maps, maps, gev, vlims, epoch.info, vlims) + + fig.savefig(save_path+f"Dualmicro_fit_all_{ff}_fixed_colorscale_maps{n_maps}"+".png") + fig.savefig(save_path+f"Dualmicro_fit_all_{ff}_fixed_colorscale_maps{n_maps}"+".svg") + + # ========================================================================= + # # Estimate two-person microstate metrics/features + # # There might be a small error introduced due to gaps in the time series from + # # dropped segments, e.g. when calculating the transition probability as + # # the time series is discontinuous due to the gaps. But with the high sampling rate + # # only a very small fraction of the samples have discontinuous neighbors + # ========================================================================= + """ + Overview of common microstate features: + 1. Average duration a given microstate remains stable (Dur) + 2. Frequency occurrence, independent of individual duration (Occ) + Average number of times a microstate becomes dominant per second + 3. Ratio of total Time Covered (TCo) + 4. Transition probabilities (TMx) + 5. Ratio of shannon entropy relative to theoretical max chaos (Ent) + """ + # Hard-coded the optimal number of microstates based on CV criterion and GEV + n_maps = 8 + # Load all microstate results + with open(f"{microstate_save_path}Reordered/Dualmicro_fit_all_{ff}_data_maps{n_maps}.pkl", "rb") as file: + microstate_results = pickle.load(file) + # Load all trialinfos + with open(f"{microstate_save_path}Dualmicro_fit_all_{ff}_trial_events_infos.pkl", "rb") as file: + trialinfo_list = pickle.load(file) + + Microstate_names = [chr(ele) for ele in range(65,65+n_maps)] + + m_labels = [0]*n_pairs + events = [0]*n_pairs + m_feats = [0]*n_pairs + + for i in range(n_pairs): + m_labels[i], events[i], m_feats[i] = dualmicro_fit_all_feature_computation(i) + print(f"Finished computing microstate features for pair {Pair_id[i]}") + + # Save the raw microstate features + with open(f"{microstate_save_path}/raw_dualmicro_fit_all_{ff}_features_maps{n_maps}.pkl", "wb") as filehandle: + pickle.dump(m_feats, filehandle) # [Subject][Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr] [Event, map*] + # * the feature is calculated for each map, where applicable. + # Transition matrix is calculated for each map -> map transition probability + + # with open(f"{microstate_save_path}/raw_computed_dualmicro_fit_all_{ff}_features.pkl", "rb") as file: + # m_feats = pickle.load(file) # [Subject][Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr] [Event, map*] + + ### Convert all features to dataframes for further processing + col_names = ["Pair_ID", "Event_ID", "Microstate", "Value"] + col_values = [Pair_id,list(collapsed_event_id.keys()),Microstate_names] + dtypes = [int,str,str,"float64"] + # Mean duration + Dur_arr = np.stack([ele[0] for ele in m_feats]) # [Subject, event, n_map] + Dur_df = numpy_arr_to_pandas_df(Dur_arr, col_names, col_values, dtypes) + # Add dummy variable to enabling combining of dataframes + measurement_id = ["Duration"]*len(Dur_df) + Dur_df.insert(2, "Measurement", measurement_id) + # Save df + Dur_df.to_pickle(os.path.join(microstate_save_path,"Dualmicro_fit_all_{ff}_duration_df.pkl")) + + # Frequency of occurrence per sec + Occ_arr = np.stack([ele[1] for ele in m_feats]) # [Subject, event, n_map] + Occ_df = numpy_arr_to_pandas_df(Occ_arr, col_names, col_values, dtypes) + # Add dummy variable to enabling combining of dataframes + measurement_id = ["Occurrence"]*len(Occ_df) + Occ_df.insert(2, "Measurement", measurement_id) + # Save df + Occ_df.to_pickle(os.path.join(microstate_save_path,"Dualmicro_fit_all_{ff}_occurrence_df.pkl")) + + # Ratio total Time Covered + TCo_arr = np.stack([ele[2] for ele in m_feats]) # [Subject, event, n_map] + TCo_df = numpy_arr_to_pandas_df(TCo_arr, col_names, col_values, dtypes) + # Add dummy variable to enabling combining of dataframes + measurement_id = ["Time_covered"]*len(TCo_df) + TCo_df.insert(2, "Measurement", measurement_id) + # Save df + TCo_df.to_pickle(os.path.join(microstate_save_path,"Dualmicro_fit_all_{ff}_ratio_time_covered_df.pkl")) + + # Transition matrix should be read as probability of row to column + xi, xj = np.meshgrid(Microstate_names,Microstate_names) + _, arrow = np.meshgrid(Microstate_names,["->"]*n_maps) + + transition_info = np.char.add(np.char.add(xj,arrow),xi) + + TMx_arr = np.stack([ele[3] for ele in m_feats]) # [Subject, event, n_map, n_map] + TMx_arr = TMx_arr.reshape((n_pairs,len(collapsed_event_id),n_maps*n_maps)) # Flatten the maps to 1D + + col_names = ["Pair_ID", "Event_ID", "Transition", "Value"] + col_values = [Pair_id,list(collapsed_event_id.keys()),transition_info.flatten()] + TMx_df = numpy_arr_to_pandas_df(TMx_arr, col_names, col_values, dtypes) + # Add dummy variable to enabling combining of dataframes + measurement_id = ["Probability"]*len(TMx_df) + TMx_df.insert(2, "Measurement", measurement_id) + # Save df + TMx_df.to_pickle(os.path.join(microstate_save_path,"Dualmicro_fit_all_{ff}_transition_df.pkl")) + + # Entropy + Ent_arr = np.stack([ele[4] for ele in m_feats]) # [Subject, event] + col_names = ["Pair_ID", "Event_ID", "Value"] + col_values = [Pair_id,list(collapsed_event_id.keys())] + dtypes = [int, str, "float64"] + Ent_df = numpy_arr_to_pandas_df(Ent_arr, col_names, col_values, dtypes) + # Add dummy variable to enabling combining of dataframes + measurement_id = ["Entropy"]*len(Ent_df) + Ent_df.insert(2, "Measurement", measurement_id) + # Save df + Ent_df.to_pickle(os.path.join(microstate_save_path,"Dualmicro_fit_all_{ff}_ratio_entropy_df.pkl")) + +# %% Backfit two-person microstates to pseudo-pairs +# The pseudo-pairs are created for all participants except the real pair. +# This is fine for symmetrical tasks, e.g. rest and coupled. +# But not for assymmetrical tasks like observation and leader. +# We might have a leader - leader pseudo-pair. +# Hence we only look at ppn1 with ppn2 from different pairs and exclude +# ppn1 with ppn1 or ppn2 with ppn2 + +for f in len(all_freq_ranges): + ff = freq_names[f] + freq_range0 = all_freq_ranges[f] + # ========================================================================= + # It might be an advantage to run the backfitting of microstates on a HPC + # ========================================================================= + # To save time and prevent reloading the same EEG over and over, I divided + # the prepare array function into a load and combine function + # By loading all into memory, I can skip loading for every combination + # but this requires a very high memory, which is fortunately not a problem on the hpc + + # I am limiting the pseudo-pairs to be where ppn1 ends with 1 and ppn2 with 2 + # Which means we have 21 * 20 options + n_pseudo_pairs = n_pairs*(n_pairs-1) + + # To not load data 420 times for two participants, we preload all EEG data to ram + c_time1 = time_now(); print("Starting load",c_time1) + all_micro_data = [0]*n_subjects + all_trial_data = [0]*n_subjects + for i in range(n_subjects): + all_micro_data[i], all_trial_data[i] = load_microstate_arrays(i) + print("Load finished", time_now()) + + # Get the prototypical alpha maps + n_maps = 8 + with open(f"{microstate_save_path}Reordered/Dualmicro_fit_all_{ff}_data_maps{n_maps}.pkl", "rb") as file: + microstate_results = pickle.load(file) + + prototype_map = microstate_results[0] + + # Start the backfitting + m_labels = [0]*n_pseudo_pairs + events = [0]*n_pseudo_pairs + GEVs = [0]*n_pseudo_pairs + counter = 0 + pseudo_pair_id = [] + for i in range(n_subjects): + for j in range(n_subjects): + # Skip if the subject is the same + if np.abs(Subject_id[i]-Subject_id[j]) == 0: + continue + # Skip if the subject are from the same pair + if np.abs(Subject_id[i]-Subject_id[j]) == 1: + continue + # Skip if ppn1 is not ending on 1, and ppn2 not ending on 2 + if not (str(Subject_id[i])[-1] == "1") & (str(Subject_id[j])[-1] == "2"): + continue + # A valid pseudo pair + else: + # Get the synchronized events + event0 = get_synch_events_from_pseudo_pairs(all_trial_data[i],all_trial_data[j]) + # Get the preloaded micro data + micro_data1 = all_micro_data[i] + micro_data2 = all_micro_data[j] + # Get the synchronized and concatenated micro data in alpha + micro_data0 = combine_two_person_microstate_arrays(micro_data1, micro_data2, event0, sfreq, freq_range=freq_range0) + # Backfit and get the labels + L, GEV = pseudo_pair_dualmicro_backfitting(micro_data0, prototype_map, event0, n_maps, sfreq) + # Save the results + m_labels[counter], GEVs[counter], events[counter] = L, GEV, event0 + pseudo_pair_id.append(f"{Subject_id[i]}-{Subject_id[j]}") + # Move counter + counter += 1 + print(f"Finished backfitting for pseudo pair {pseudo_pair_id[-1]}") + print("Started", c_time1, "\nCurrent",time_now()) + + backfit_results = [pseudo_pair_id, m_labels, GEVs, events] + # Save the results from all pseudo pairs + with open(f"{microstate_save_path}Reordered/Backfitting/Dualmicro_fit_all_{ff}_data_maps{n_maps}.pkl", "wb") as filehandle: + pickle.dump(backfit_results, filehandle) # [pseudo_pair_id, L, GEV, events] + + # ========================================================================= + # Estimate two-person microstate metrics/features + # There might be a small error introduced due to gaps in the time series from + # dropped segments, e.g. when calculating the transition probability as + # the time series is discontinuous due to the gaps. But with the high sampling rate + # only a very small fraction of the samples have discontinuous neighbors + # ========================================================================= + + """ + Overview of common microstate features: + 1. Average duration a given microstate remains stable (Dur) + 2. Frequency occurrence, independent of individual duration (Occ) + Average number of times a microstate becomes dominant per second + 3. Ratio of total Time Covered (TCo) + 4. Transition probabilities (TMx) + 5. Ratio of shannon entropy relative to theoretical max chaos (Ent) + """ + n_maps = 8 + # Load all the backfit pseudo-pair results + with open(f"{microstate_save_path}Reordered/Backfitting/Dualmicro_fit_all_{ff}_data_maps{n_maps}.pkl", "rb") as file: + backfit_results = pickle.load(file) # [pseudo_pair_id, L, GEV, events] + + # Hard-coded the optimal number of microstates based on CV criterion and GEV + n_maps = 8 + Microstate_names = [chr(ele) for ele in range(65,65+n_maps)] + + pseudo_pair_id = backfit_results[0] + n_pseudo_pairs = len(pseudo_pair_id) + + m_labels = [0]*n_pseudo_pairs + events = [0]*n_pseudo_pairs + m_feats = [0]*n_pseudo_pairs + + for i in range(n_pseudo_pairs): + m_labels[i], events[i], m_feats[i] = dualmicro_fit_all_pseudo_pair_feature_computation(i,\ + n_maps, backfit_results, sfreq, event_id, collapsed_event_id) + print(f"Finished computing microstate features for psuedo pair {pseudo_pair_id[i]}") + + # Save the raw microstate features + with open(f"{microstate_save_path}/raw_dualmicro_fit_all_{ff}_pseudo_pairs_features_maps{n_maps}.pkl", "wb") as filehandle: + pickle.dump(m_feats, filehandle) # [Subject][Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr] [Event, map*] + # * the feature is calculated for each map, where applicable. + # Transition matrix is calculated for each map -> map transition probability + + # with open(f"{microstate_save_path}/raw_computed_dualmicro_fit_all_{ff}_features.pkl", "rb") as file: + # m_feats = pickle.load(file) # [Subject][Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr] [Event, map*] + + ### Convert all features to dataframes for further processing + col_names = ["Pseudo_Pair_ID", "Event_ID", "Microstate", "Value"] + col_values = [pseudo_pair_id,list(collapsed_event_id.keys()),Microstate_names] + dtypes = [str,str,str,"float64"] + # Mean duration + Dur_arr = np.stack([ele[0] for ele in m_feats]) # [Subject, event, n_map] + Dur_df = numpy_arr_to_pandas_df(Dur_arr, col_names, col_values, dtypes) + # Add dummy variable to enabling combining of dataframes + measurement_id = ["Duration"]*len(Dur_df) + Dur_df.insert(2, "Measurement", measurement_id) + # Save df + Dur_df.to_pickle(os.path.join(microstate_save_path,"Dualmicro_fit_all_{ff}_pseudo_pairs_duration_df.pkl")) + + # Frequency of occurrence per sec + Occ_arr = np.stack([ele[1] for ele in m_feats]) # [Subject, event, n_map] + Occ_df = numpy_arr_to_pandas_df(Occ_arr, col_names, col_values, dtypes) + # Add dummy variable to enabling combining of dataframes + measurement_id = ["Occurrence"]*len(Occ_df) + Occ_df.insert(2, "Measurement", measurement_id) + # Save df + Occ_df.to_pickle(os.path.join(microstate_save_path,"Dualmicro_fit_all_{ff}_pseudo_pairs_occurrence_df.pkl")) + + # Ratio total Time Covered + TCo_arr = np.stack([ele[2] for ele in m_feats]) # [Subject, event, n_map] + TCo_df = numpy_arr_to_pandas_df(TCo_arr, col_names, col_values, dtypes) + # Add dummy variable to enabling combining of dataframes + measurement_id = ["Time_covered"]*len(TCo_df) + TCo_df.insert(2, "Measurement", measurement_id) + # Save df + TCo_df.to_pickle(os.path.join(microstate_save_path,"Dualmicro_fit_all_{ff}_pseudo_pairs_ratio_time_covered_df.pkl")) + + # Transition matrix should be read as probability of row to column + xi, xj = np.meshgrid(Microstate_names,Microstate_names) + _, arrow = np.meshgrid(Microstate_names,["->"]*n_maps) + + transition_info = np.char.add(np.char.add(xj,arrow),xi) + + TMx_arr = np.stack([ele[3] for ele in m_feats]) # [Subject, event, n_map, n_map] + TMx_arr = TMx_arr.reshape((n_pseudo_pairs,len(collapsed_event_id),n_maps*n_maps)) # Flatten the maps to 1D + + col_names = ["Pseudo_Pair_ID", "Event_ID", "Transition", "Value"] + col_values = [pseudo_pair_id,list(collapsed_event_id.keys()),transition_info.flatten()] + TMx_df = numpy_arr_to_pandas_df(TMx_arr, col_names, col_values, dtypes) + # Add dummy variable to enabling combining of dataframes + measurement_id = ["Probability"]*len(TMx_df) + TMx_df.insert(2, "Measurement", measurement_id) + # Save df + TMx_df.to_pickle(os.path.join(microstate_save_path,"Dualmicro_fit_all_{ff}_pseudo_pairs_transition_df.pkl")) + + # Entropy + Ent_arr = np.stack([ele[4] for ele in m_feats]) # [Subject, event] + col_names = ["Pseudo_Pair_ID", "Event_ID", "Value"] + col_values = [pseudo_pair_id,list(collapsed_event_id.keys())] + dtypes = [str,str,"float64"] + Ent_df = numpy_arr_to_pandas_df(Ent_arr, col_names, col_values, dtypes) + # Add dummy variable to enabling combining of dataframes + measurement_id = ["Entropy"]*len(Ent_df) + Ent_df.insert(2, "Measurement", measurement_id) + # Save df + Ent_df.to_pickle(os.path.join(microstate_save_path,"Dualmicro_fit_all_{ff}_pseudo_pairs_ratio_entropy_df.pkl")) + +# %% eLORETA on Intrabrain microstates +### Make forward solutions +# Computed using the fsaverage template MRI + +# # First time setup will need to download fsaverage templates +# mne.datasets.fetch_fsaverage() + +fs_dir = "C:/Users/glia/mne_data/MNE-fsaverage-data/fsaverage" + +subjects_dir = os.path.dirname(fs_dir) +trans = "fsaverage" +src = os.path.join(fs_dir, "bem", "fsaverage-ico-5-src.fif") +bem = os.path.join(fs_dir, "bem", "fsaverage-5120-5120-5120-bem-sol.fif") + +# Read the template sourcespace +sourcespace = mne.read_source_spaces(src) + +# Since I use a template, I only need to make the forward operator once +# As we assume the channel positions are fixed approximately the same +# for all subjects using the same caps +subject_eeg = epoch.copy() + +subject_eeg.set_eeg_reference(projection=True) # needed for inverse modelling + +# Make forward solution +fwd = mne.make_forward_solution(subject_eeg.info, trans=trans, src=src, + bem=bem, eeg=True, mindist=5.0, n_jobs=1) + +# # Save forward operator +# fname_fwd = "./Source_fwd/fsaverage_{}-fwd.fif".format(study_order[i]) +# mne.write_forward_solution(fname_fwd, fwd, overwrite=True) + +# # Check the alignment looks correct between EEG sensors and the template +# mne.viz.plot_alignment( +# subject_eeg.info, trans, src=src, fwd=fwd, dig=True, +# meg=["helmet", "sensors"], subjects_dir=subjects_dir, surfaces="auto") + +### Load Parcellation +# Desikan-Killiany atlas (34 ROI from both hemispheres = 68 ROIs) +# Named aparc.annot in MNE python fsaverage folder +labels = mne.read_labels_from_annot("fsaverage", parc="aparc", + subjects_dir=subjects_dir) +labels = labels[:-1] # remove unknowns +label_names = [label.name for label in labels] +n_roi = len(labels) + +# Prepare brain lobe information +Frontal_rois = ['superiorfrontal-lh','superiorfrontal-rh', + 'rostralmiddlefrontal-lh','rostralmiddlefrontal-rh', + 'caudalmiddlefrontal-lh','caudalmiddlefrontal-rh', + 'parsopercularis-lh','parsopercularis-rh', + 'parstriangularis-lh','parstriangularis-rh', + 'parsorbitalis-lh','parsorbitalis-rh', + 'lateralorbitofrontal-lh','lateralorbitofrontal-rh', + 'medialorbitofrontal-lh','medialorbitofrontal-rh', + 'precentral-lh','precentral-rh', + 'paracentral-lh','paracentral-rh', + 'frontalpole-lh','frontalpole-rh'] +Parietal_rois = ['superiorparietal-lh','superiorparietal-rh', + 'inferiorparietal-lh','inferiorparietal-rh', + 'supramarginal-lh','supramarginal-rh', + 'postcentral-lh','postcentral-rh', + 'precuneus-lh','precuneus-rh'] +Temporal_rois = ['superiortemporal-lh','superiortemporal-rh', + 'middletemporal-lh','middletemporal-rh', + 'inferiortemporal-lh','inferiortemporal-rh', + 'bankssts-lh','bankssts-rh', + 'fusiform-lh','fusiform-rh', + 'transversetemporal-lh','transversetemporal-rh', + 'entorhinal-lh','entorhinal-rh', + 'temporalpole-lh','temporalpole-rh', + 'parahippocampal-lh','parahippocampal-rh'] +Occipital_rois = ['lateraloccipital-lh','lateraloccipital-rh', + 'lingual-lh','lingual-rh', + 'cuneus-lh','cuneus-rh', + 'pericalcarine-lh','pericalcarine-rh'] +Cingulate_rois = ['rostralanteriorcingulate-lh','rostralanteriorcingulate-rh', + 'caudalanteriorcingulate-lh','caudalanteriorcingulate-rh', + 'posteriorcingulate-lh','posteriorcingulate-rh', + 'isthmuscingulate-lh','isthmuscingulate-rh'] +Insular_rois = ['insula-lh','insula-rh'] + +Lobes = [Frontal_rois,Parietal_rois,Temporal_rois,Occipital_rois,Cingulate_rois,Insular_rois] + +Brain_region_labels = ["Frontal","Parietal","Temporal","Occipital","Cingulate","Insular"] +Brain_region_hemi_labels = np.repeat(Brain_region_labels,2).astype("<U12") +Brain_region_hemi_labels[::2] = [ele+"-lh" for ele in Brain_region_labels] +Brain_region_hemi_labels[1::2] = [ele+"-rh" for ele in Brain_region_labels] + +Brain_region = np.array(label_names, dtype = "<U32") +for l in range(len(Lobes)): + Brain_region[np.array([i in Lobes[l] for i in Brain_region])] = Brain_region_labels[l] + +### Concatenate the microstates into one Raw Object to apply inverse on it +n_maps = 8 +Microstate_names = [chr(ele) for ele in range(65,65+n_maps)] + +for f in len(all_freq_ranges): + ff = freq_names[f] + + with open(f"{microstate_save_path}Reordered/Intrabrain_microstate_fit_all_{ff}{n_maps}.pkl", "rb") as file: + microstate_results = pickle.load(file) + + # Get the microstates and reshape to have channels in the first dim + maps = microstate_results[0] + maps = maps.transpose() + + raw_maps = mne.io.RawArray(maps,subject_eeg.info) + raw_maps._filenames = [""] # Fix error with NoneType for "filename" for raw created with RawArray + raw_maps.set_eeg_reference(projection=True) # needed for inverse modelling + + # Using assumption about equal variance and no correlations I make a diagonal matrix as cov + noise_cov = mne.make_ad_hoc_cov(subject_eeg.info, None) + + # Make inverse operator + # Using default depth parameter = 0.8 and free orientation (loose = 1) + inverse_operator = mne.minimum_norm.make_inverse_operator(subject_eeg.info, + fwd, noise_cov, + loose = 1, depth = 0.8, + verbose = 0) + src_inv = inverse_operator["src"] + # Compute inverse solution and retrieve the source localized microstate activities for each label + # Define regularization + snr = 3 # Default setting + + # Use eLORETA and only keep the activity normal to the cortical surface + stc = mne.minimum_norm.apply_inverse_raw(raw_maps,inverse_operator, + lambda2 = 1/(snr**2), + pick_ori = "normal", + method = "eLORETA", verbose = 2) + + # Get the source activity in the ROIs + label_activity = mne.extract_label_time_course(stc, labels, src_inv, mode="mean_flip", + return_generator=False, verbose=0) + + # Visualize the microstates in source space + # This way of plotting makes the color scale fixed across microstates + brain = stc.plot( + hemi="lh", + subjects_dir=subjects_dir, + smoothing_steps=1, + ) + + ### Convert Label Activity to Pandas DataFrame + # With ROI names and then add Brain Region label + col_names = ["ROI", "Microstate", "Value"] + col_names = ["Microstate", "ROI", "Value"] + col_val = [Microstate_names, label_names] + + # Create the source microstate activity dataframe + sMicro_df = numpy_arr_to_pandas_df(label_activity.T, col_names = col_names, col_values = col_val) + + assert sMicro_df.loc[(sMicro_df["ROI"]==label_names[4])& + (sMicro_df["Microstate"]==Microstate_names[3]), + "Value"].iloc[0] == label_activity[4,3] + + # Add brain region information + sMicro_df.insert(2, "Brain_region", np.tile(Brain_region,int(sMicro_df.shape[0]/n_roi))) + sMicro_df["Brain_region"] = sMicro_df["Brain_region"].astype("category").\ + cat.reorder_categories(Brain_region_labels, ordered=True) + + # Add hemisphere information + sMicro_df.insert(3, "Hemisphere", [ele[-2:] for ele in sMicro_df["ROI"]]) + + # Add a colum that combines brain region and hemisphere for plotting + sMicro_df.insert(4, "Brain_region_hemi", [b+"-"+h for b, h in zip(sMicro_df["Brain_region"],sMicro_df["Hemisphere"])]) + sMicro_df["Brain_region_hemi"] = sMicro_df["Brain_region_hemi"].astype("category").\ + cat.reorder_categories(Brain_region_hemi_labels, ordered=True) + + # Save the dataframe + sMicro_df.to_pickle(os.path.join(microstate_save_path,f"Single_micro_{ff}_source_activity_df.pkl")) + +# %% eLORETA on two-brain microstates +# Continued based on fwd operator and template loaded for intrabrain +n_maps = 8 +Microstate_names = [chr(ele) for ele in range(65,65+n_maps)] + +for f in len(all_freq_ranges): + ff = freq_names[f] + + with open(f"{microstate_save_path}Reordered/Dualmicro_fit_all_{ff}_data_maps{n_maps}.pkl", "rb") as file: + microstate_results = pickle.load(file) + + # Get the microstates + maps = microstate_results[0] + maps = maps.reshape(2*n_maps,n_channels) + + # # Check the maps were split properly + # plot_microstates(n_maps, maps[:8], microstate_results[3]) + # plot_microstates(n_maps, maps[8:], microstate_results[3]) + # Maps are ordered as: ppn1 A, ppn2 A, ppn1 B, ppn2 B etc + + # Transpose to have channels in the first dim + maps = maps.transpose() + + raw_maps = mne.io.RawArray(maps,subject_eeg.info) + raw_maps._filenames = [""] # Fix error with NoneType for "filename" for raw created with RawArray + raw_maps.set_eeg_reference(projection=True) # needed for inverse modelling + + # Using assumption about equal variance and no correlations I make a diagonal matrix as cov + noise_cov = mne.make_ad_hoc_cov(subject_eeg.info, None) + + # Make inverse operator + # Using default depth parameter = 0.8 and free orientation (loose = 1) + inverse_operator = mne.minimum_norm.make_inverse_operator(subject_eeg.info, + fwd, noise_cov, + loose = 1, depth = 0.8, + verbose = 0) + src_inv = inverse_operator["src"] + # Compute inverse solution and retrieve the source localized microstate activities for each label + # Define regularization + snr = 3 # Default setting + + # Use eLORETA and only keep the activity normal to the cortical surface + stc = mne.minimum_norm.apply_inverse_raw(raw_maps,inverse_operator, + lambda2 = 1/(snr**2), + pick_ori = "normal", + method = "eLORETA", verbose = 2) + + # Get the source activity in the ROIs + label_activity = mne.extract_label_time_course(stc, labels, src_inv, mode="mean_flip", + return_generator=False, verbose=0) + + # Visualize the microstates in source space + # This way of plotting makes the color scale fixed across microstates + brain = stc.plot( + hemi="lh", + subjects_dir=subjects_dir, + smoothing_steps=1, + ) + + # Visualize with different color scales for each microstate + Microstate_names2 = np.repeat(Microstate_names,2).astype("<U2") + Microstate_names2[::2] = [ele+"1" for ele in Microstate_names] + Microstate_names2[1::2] = [ele+"2" for ele in Microstate_names] + + # Save source activations for each microstate + # Lateral and medial for each hemisphere + dorsal + flatmaps + save_path = f"{fig_save_path}Microstates/SourceDualmicroPrototypes/" + + hemis = ["lh","rh"] + views = ["lateral","medial"] + + for i in range(len(Microstate_names2)): + times0 = np.linspace(0,1,sfreq+1)[:2*n_maps+1] + stc0 = stc.copy().crop(times0[i],times0[i+1],include_tmax=False) + # Color bar limits defined as max saturation of top 1% (yellow or teal) + # middle at 5%, which means they will have alpha = 1 and progressively be + # closer to yellow or teal + # Lower boundary at 10%, which means they will be red/blue but with decreased + # transparency + clim_max = -(np.sort(-np.abs(stc0.data),axis=0)[stc0.shape[0]//100])[0] + clim_mid = -(np.sort(-np.abs(stc0.data),axis=0)[stc0.shape[0]//20])[0] + clim_min = -(np.sort(-np.abs(stc0.data),axis=0)[stc0.shape[0]//10])[0] + clim0 = {"kind":"value","pos_lims":[clim_min,clim_mid,clim_max]} + + # Lateral and medial + for h in range(len(hemis)): + hh = hemis[h] + brain = stc0.plot( + hemi=hh, + subjects_dir=subjects_dir, + smoothing_steps=10, # spatial smoothing + colorbar=False, + background="white", + cortex="classic", + size=800, + transparent=True, + views=views[0], + clim=clim0, + ) + brain.save_image(os.path.join(save_path, f"Dualmicro_source_{Microstate_names2[i]}_{hh}_{views[0]}"+".png")) + brain.show_view(views[1]) + brain.save_image(os.path.join(save_path, f"Dualmicro_source_{Microstate_names2[i]}_{hh}_{views[1]}"+".png")) + + # Dorsal map + brain = stc0.plot( + hemi="both", + subjects_dir=subjects_dir, + smoothing_steps=10, # spatial smoothing + colorbar=True, + background="white", + cortex="classic", + size=1500, + transparent=True, + views="dorsal", + clim=clim0, + ) + brain.save_image(os.path.join(save_path, f"Dualmicro_source_{Microstate_names2[i]}_dorsal"+".png")) + + # Flat map + brain = stc0.plot( + hemi="both", + surface="flat", + subjects_dir=subjects_dir, + smoothing_steps=10, # spatial smoothing + colorbar=False, + background="white", + cortex="classic", + size=1500, + transparent=True, + views="flat", + clim=clim0, + ) + brain.save_image(os.path.join(save_path, f"Dualmicro_source_{Microstate_names2[i]}_flat"+".png")) + # Close all figures + mne.viz.close_all_3d_figures() + + # Mean + brain = stc.mean().plot( + hemi="lh", + subjects_dir=subjects_dir, + smoothing_steps=10, + ) + + ### Convert Label Activity to Pandas DataFrame + # With ROI names and then add Brain Region label + col_names = ["ROI", "Microstate", "Value"] + col_names = ["Microstate", "ROI", "Value"] + col_val = [Microstate_names2, label_names] + dtypes = [str, str, "float64"] + + # Create the source microstate activity dataframe + sMicro_df = numpy_arr_to_pandas_df(label_activity.T, col_names, col_val, dtypes) + + assert sMicro_df.loc[(sMicro_df["ROI"]==label_names[4])& + (sMicro_df["Microstate"]==Microstate_names2[3]), + "Value"].iloc[0] == label_activity[4,3] + + # Add brain region information + sMicro_df.insert(2, "Brain_region", np.tile(Brain_region,int(sMicro_df.shape[0]/n_roi))) + sMicro_df["Brain_region"] = sMicro_df["Brain_region"].astype("category").\ + cat.reorder_categories(Brain_region_labels, ordered=True) + + # Add hemisphere information + sMicro_df.insert(3, "Hemisphere", [ele[-2:] for ele in sMicro_df["ROI"]]) + + # Add a colum that combines brain region and hemisphere for plotting + sMicro_df.insert(4, "Brain_region_hemi", [b+"-"+h for b, h in zip(sMicro_df["Brain_region"],sMicro_df["Hemisphere"])]) + sMicro_df["Brain_region_hemi"] = sMicro_df["Brain_region_hemi"].astype("category").\ + cat.reorder_categories(Brain_region_hemi_labels, ordered=True) + + # Save the dataframe + sMicro_df.to_pickle(os.path.join(microstate_save_path,"Dualmicro_{ff}_source_activity_df.pkl")) + +# %% LRTC with DFA on Two-person microstate label time series +# Using Detrended Fluctuation Analysis (DFA) +# Adapted from Python Implementation by Arthur-Ervin Avramiea <a.e.avramiea@vu.nl> +# From NBT2 toolbox +""" +See Hardstone et al, 2012 for more info +Perform DFA + 1 Compute cumulative sum of time series to create signal profile + 2 Define set of window sizes (see below) + 3 Remove the linear trend using least-squares for each window + 4 Calculate standard deviation for each window and take the mean + 5 Plot fluctuation function (Standard deviation) as function + for all window sizes, on double logarithmic scale + 6 The DFA exponent alpha correspond to Hurst exponent + f(L) = sd = L^alpha (with alpha as linear coefficient in log plot) + +If 0 < alpha < 0.5: The process exhibits anti-correlations +If 0.5 < alpha < 1: The process exhibits positive correlations +If alpha = 0.5: The process is indistinguishable from a random process +If 1.0 < alpha < 2.0: The process is non-stationary. H = alpha - 1 + +Window sizes should be equally spaced on a logarithmic scale +Sizes should be at least 4 samples and up to 10% of total signal length + +### Specific for our microstate DFA analysis +We have 8 microstates, but to compute the random walk we will partition +the microstate sequence into two classes (see reference on microstate Hurst +https://pubmed.ncbi.nlm.nih.gov/20921381/) + +A/B/C/D will be assigned the positive direction, while E/F/G/H will be +assigned the negative direction, corresponding to whether ppn1 or ppn2 +are in one of the canonical microstates, while the other have a non-specific +(average) topography. + +Each 25s trial is too short to estimate LRTC on, so I will concatenate all +the trials corresponding to each condition. + +This should yield up to 25s * 16 trials = 400s of data for each condition, +except rest which is up to 120s * 2 trials = 240s + +DFA is computed from 8 trials and then averaged, to avoid the +problem of flipping in the asymmetric trials. We change windows size to 5-20s +To ensure consistency the same procedure is applied to the symmetric trials + +""" + +# Window sizes +compute_interval = [5,20] # the window sizes should be between 5s and 30s +# Compute DFA window sizes for the given Interval +window_sizes = np.floor(np.logspace(-1,3,40) * sfreq).astype(int) # %logspace from 0.1 seccond (10^-1) to 1000 (10^3) seconds +window_sizes = window_sizes[(window_sizes >= compute_interval[0]*sfreq) & \ + (window_sizes <= compute_interval[1]*sfreq)] + +for f in len(all_freq_ranges): + ff = freq_names[f] + # Nolds are already using all cores so multiprocessing with concurrent makes it slower + n_maps = 8 + + with open(f"{microstate_save_path}Reordered/Dualmicro_fit_all_{ff}_data_maps{n_maps}.pkl", "rb") as file: + microstate_results = pickle.load(file) + # Load all trialinfos + with open(f"{microstate_save_path}Dualmicro_fit_all_{ff}_trial_events_infos.pkl", "rb") as file: + trialinfo_list = pickle.load(file) + + # Pre-allocate memory + DFA_arr = np.zeros((n_pairs,len(collapsed_event_id))) + Fluctuation_arr = np.zeros((n_pairs,len(collapsed_event_id),len(window_sizes))) + + # Get start time + c_time1 = time.localtime() + c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1) + print("Started {}".format(c_time1)) + # Nolds are already using all cores so concurrent futures with make it slower + for i in range(n_pairs): + # Compute DFA + dfa_temp, fluc_temp = compute_dualmicro_DFA(i, microstate_results, + trialinfo_list, sfreq, window_sizes, event_id, collapsed_event_id, True) + # Save to array + DFA_arr[i] = dfa_temp + Fluctuation_arr[i] = fluc_temp + print("Finished {} out of {} pairs".format(i+1,n_pairs)) + + # Get ending time + c_time2 = time.localtime() + c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2) + print(("Started {} \nEnded Time {}".format(c_time1,c_time2))) + + # Save the raw DFA analysis data + np.save(microstate_save_path+"DFA_arr.npy", DFA_arr) + np.save(microstate_save_path+"Fluctuation_arr.npy", Fluctuation_arr) + + # Convert to Pandas dataframe (DFA exponent) + col_names = ["Pair_ID", "Event_ID", "Value"] + col_values = [Pair_id,list(collapsed_event_id.keys())] + dtypes = ["int64",str,"float64"] + + DFA_df = numpy_arr_to_pandas_df(DFA_arr, col_names, col_values, dtypes) + + # Add dummy variable to enabling combining of dataframes + measurement_id = ["DFA"]*len(DFA_df) + DFA_df.insert(2, "Measurement", measurement_id) + # Save df + DFA_df.to_pickle(os.path.join(microstate_save_path,f"Dualmicro_{ff}_DFA_exponent_df.pkl")) + +# %% DFA in pseudo-pairs +for f in len(all_freq_ranges): + ff = freq_names[f] + # Nolds are already using all cores so multiprocessing with concurrent makes it slower + n_maps = 8 + + # Load all the backfit pseudo-pair results + with open(f"{microstate_save_path}Reordered/Backfitting/Dualmicro_fit_all_{ff}_data_maps{n_maps}.pkl", "rb") as file: + backfit_results = pickle.load(file) # [pseudo_pair_id, L, GEV, events] + + # Pre-allocate memory + DFA_arr = np.zeros((n_pairs,len(collapsed_event_id))) + Fluctuation_arr = np.zeros((n_pairs,len(collapsed_event_id),len(window_sizes))) + + # Get start time + c_time1 = time.localtime() + c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1) + print("Started {}".format(c_time1)) + # Nolds are already using all cores so concurrent futures with make it slower + for i in range(n_pairs): + # Compute DFA + dfa_temp, fluc_temp = compute_dualmicro_DFA_pseudo(i, backfit_results, + sfreq, window_sizes, event_id, collapsed_event_id, True) + # Save to array + DFA_arr[i] = dfa_temp + Fluctuation_arr[i] = fluc_temp + print("Finished {} out of {} pairs".format(i+1,n_pairs)) + + # Get ending time + c_time2 = time.localtime() + c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2) + print(("Started {} \nEnded Time {}".format(c_time1,c_time2))) + + # Save the raw DFA analysis data + np.save(microstate_save_path+"DFA_arr.npy", DFA_arr) + np.save(microstate_save_path+"Fluctuation_arr.npy", Fluctuation_arr) + + # Convert to Pandas dataframe (DFA exponent) + col_names = ["Pair_ID", "Event_ID", "Value"] + col_values = [Pair_id,list(collapsed_event_id.keys())] + dtypes = ["int64",str,"float64"] + + DFA_df = numpy_arr_to_pandas_df(DFA_arr, col_names, col_values, dtypes) + + # Add dummy variable to enabling combining of dataframes + measurement_id = ["DFA"]*len(DFA_df) + DFA_df.insert(2, "Measurement", measurement_id) + # Save df + DFA_df.to_pickle(os.path.join(microstate_save_path,f"Dualmicro_{ff}_DFA_exponent_df.pkl")) + +# %% Time-lagged inter-brain microstate synchrony +# Hard-coded the optimal number of microstates based on CV criterion and GEV +n_maps = 5 + +# The lag (number of samples) we will iterate over to find greatest time-lagged interbrain microstate synchrony +lag_search_range = sfreq # 1 second in both directions +lag_interval = np.linspace(-lag_search_range,lag_search_range,lag_search_range*2+1).astype(int) + +Microstate_names = [chr(ele) for ele in range(65,65+n_maps)] +# Insert Z as the symbol for non common microstate +Microstate_names.insert(0,"Z") + +# Loop over frequencies +for f in len(all_freq_ranges): + ff = freq_names[f] + + # Load all microstate results + with open(f"{microstate_save_path}Reordered/Intrabrain_microstate_fit_all_{ff}{n_maps}.pkl", "rb") as file: + microstate_results = pickle.load(file) + # Load all trialinfos + with open(f"{microstate_save_path}Intrabrain_microstate_fit_all_{ff}_trialinfos.pkl", "rb") as file: + trialinfo_list = pickle.load(file) + + m_labels = [0]*(n_subjects//2) + events = [0]*(n_subjects//2) + m_feats = [0]*(n_subjects//2) + shift_info = [0]*(n_subjects//2) + Pair_id = [0]*(n_subjects//2) + + for i in tqdm(range(n_subjects//2)): + m_labels[i], events[i], m_feats[i], shift_info[i] = shifted_interbrain_microstate_feature_computation(i, + n_maps, microstate_results, trialinfo_list, sfreq, + event_id, collapsed_event_id, lag_search_range, lag_interval) + Pair_id[i] = int(str(Subject_id[2*i])[1:-1]) + print(f"Finished computing interbrain microstate features for pair {Pair_id[i]}") + + Pair_id = [ele+100 for ele in Pair_id] + + # Save the raw microstate features + with open(f"{microstate_save_path}/raw_shifted_interbrain_single_micro_fit_all_{ff}_maps{n_maps}.pkl", "wb") as filehandle: + pickle.dump([Pair_id, m_feats, shift_info], filehandle) # [Subject][Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr][Event, map*] + # * the feature is calculated for each map, where applicable. + # Transition matrix is calculated for each map -> map transition probability + # The first row and column correspond to the non common microstate, i.e. + # there is a different microstate in the pair + + # with open(f"{microstate_save_path}/raw_shifted_interbrain_single_micro_fit_all_{ff}_maps{n_maps}.pkl", "rb") as file: + # Pair_id, m_feats, shift_info = pickle.load(file) # [Subject][Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr] [Event, map*] + + n_pairs = len(Pair_id) + + ### Convert all features to dataframes for further processing + col_names = ["Pair_ID", "Event_ID", "Microstate", "Value"] + col_values = [Pair_id,list(collapsed_event_id.keys()),Microstate_names] + dtypes = [int,str,str,"float64"] + + # Ratio total Time Covered + TCo_arr = np.stack([ele[2] for ele in m_feats]) # [Subject, event, n_map] + TCo_df = numpy_arr_to_pandas_df(TCo_arr, col_names, col_values, dtypes) + # Add dummy variable to enabling combining of dataframes + measurement_id = ["Time_covered"]*len(TCo_df) + TCo_df.insert(2, "Measurement", measurement_id) + # Save df + TCo_df.to_pickle(os.path.join(microstate_save_path,f"Shifted_IB_Single_micro_fit_all_{ff}_maps{n_maps}_ratio_time_covered_df.pkl"))