From d1ecc0df6f9bf21f2b83cbd28afcf81b53341e82 Mon Sep 17 00:00:00 2001 From: glia <glia@dtu.dk> Date: Tue, 13 Aug 2024 15:09:31 +0200 Subject: [PATCH] Upload dualmicro_functions.py --- dualmicro_functions.py | 1796 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1796 insertions(+) create mode 100644 dualmicro_functions.py diff --git a/dualmicro_functions.py b/dualmicro_functions.py new file mode 100644 index 0000000..4bc819c --- /dev/null +++ b/dualmicro_functions.py @@ -0,0 +1,1796 @@ +# -*- coding: utf-8 -*- +""" +Functions for the analysis of two-brain microstates + +@author: Qianliang Li (glia@dtu.dk) +""" +import os +import numpy as np +import pandas as pd +import mne +import mat73 +import matplotlib.pyplot as plt +import nolds + +from eeg_microstates3 import (locmax, T_empirical, H_1, H_2) + +def load_epoch_from_fieldtrip(i, files, event_id): + """Loads the preprocessed EEG data """ + # Get environmental variables + Subject_id = os.environ.get('Subject_id') + # Load the file + tmp_dict = mat73.loadmat(files[i]) + s_id = Subject_id[i] + eeg = tmp_dict["eeg_ppn"+str(s_id)[-1]+"_seg"] + # Get the epoched time series data (epoch, ch, time) + data = np.stack(eeg["trial"]) + # Re-scale data + data *= 1e-6 # micro Volt to Volt + # Create info instance + ch_names = [ele[0] for ele in eeg["hdr"]["label"]] + sfreq = int(eeg["fsample"]) + eeg_info = mne.create_info(ch_names, sfreq,ch_types="eeg") + # Update filter info + mne.filter._filt_update_info(eeg_info, True, 1.0, 40) + # Make events array (epoch number, _, event id) + events = np.zeros((len(data),3)).astype(int) + events[:,0] = np.arange(0,len(data)) + events[:,2] = eeg["trialinfo"][:,0] + # Save trialinfo object for synchronization of pairs based on timings + trialinfo = eeg["trialinfo"][:,0:3] + # Create the Epochs + epoch = mne.EpochsArray(data, eeg_info, events, 0, event_id) + # Set montage + epoch.set_montage("biosemi64") + return epoch, trialinfo + +# The function takes the data as a numpy array (n_t, n_ch) +def prepare_1P_micro_arr(i, ppn2_correction, sfreq, freq_range=None, standardize:bool=True): + """ Prepare single-brain EEG data for microstate analysis """ + # Get environmental variables + Subject_id = os.environ.get('Subject_id') + # Load epochs + epoch, trialinfo = load_epoch_from_fieldtrip(i) + # Ensure it is averaged referenced + epoch = epoch.set_eeg_reference("average") + # Get numpy arrays + micro_data = epoch.get_data() # get data + if standardize == True: + # Standardize data - will make EEG amplitudes comparable between + micro_data = micro_data - micro_data.mean(axis=1, keepdims=True) + micro_data /= micro_data.std(axis=1, keepdims=True) + # Correct the event_id for ppn2 in each pair, to swap all 6 with 4 + # and 7 with 5 and vice versa + if str(Subject_id[i])[-1]=="2": + trialinfo_df = pd.DataFrame(trialinfo.copy()) + trialinfo_df.iloc[:,0].replace(ppn2_correction, inplace=True) + trialinfo = trialinfo_df.to_numpy() + + # Transform data to correct shape + arr_shape = micro_data.shape # get shape + micro_data = micro_data.swapaxes(1,2) # swap ch and time axis + micro_data = micro_data.reshape(arr_shape[0]*arr_shape[2],arr_shape[1]) # reshape by combining epochs and times + + # Filter the data + if freq_range == None: + micro_data_filtered = micro_data # do not perform filtering + elif not freq_range == None: + # Notice it is done only after combining epochs and time, as filter length + # would be too long for 1s epochs. The filter function wants time on the last axis + micro_data = micro_data.transpose() + micro_data_filtered = mne.filter.filter_data(micro_data, sfreq, freq_range[0], freq_range[1]) + # Reverse the shape + micro_data_filtered = micro_data_filtered.transpose() + + return micro_data_filtered, trialinfo + +def plot_microstates(n_maps, maps, gev, epoch_info): + """ Plot the microstates """ + fig, axarr = plt.subplots(1, n_maps, figsize=(5*n_maps,5)) + fig.patch.set_facecolor('white') + for imap in range(n_maps): + mne.viz.plot_topomap(maps[imap,:], pos = epoch_info, axes = axarr[imap]) # plot + axarr[imap].set_title("GEV: {:.2f}".format(gev[imap]), fontsize=16, fontweight="bold") # title + fig.suptitle("Microstates: {}".format(n_maps), fontsize=20, fontweight="bold") + plt.tight_layout() + return fig + +def reorder_microstate_results(new_order, maps, gev, m_labels): + """ Function to re-order microstates based on manual order """ + reordered_maps = maps[new_order,:] + reordered_gev = gev[new_order] + # Make directory to find and replace map labels + dic0 = {value:key for key, value in enumerate(new_order)} + reordered_m_labels = np.array([dic0.get(n, n) for n in m_labels]) # re-order labels + return reordered_maps, reordered_gev, reordered_m_labels + +def microstate_run_length_encoding(m_labels): + """ Take a 1D stream of microstates and returns the label and duration """ + # Adapted from RLE Matlab code by Abdulrahman Ikram Siddiq, 2011 + # Initialize + counter = 0 + label = [9999] * len(m_labels) + duration = [9999] * len(m_labels) + # Starting the lists + label[counter] = m_labels[counter] # first element + duration[counter] = 1 # first element + for i in range(1,len(m_labels)): + # Add 1 to duration, if the next element is the same as current microstate + if m_labels[i-1] == m_labels[i]: + duration[counter] += 1 + # If it is not the same, get the new microstate label and restart duration + else: + counter += 1 + label[counter] = m_labels[i] + duration[counter] = 1 + # Trim the unused parts of the list + label = label[:counter+1] + duration = duration[:counter+1] + assert not any(np.array(label) == 9999) # all fillers removed + assert not any(np.array(duration) == 9999) # all fillers removed + + return np.array(label), np.array(duration) + +def single_micro_fit_all_feature_computation(i, n_maps, microstate_results, trialinfo_list, sfreq, event_id, + single_brain_event_id): + """ + Estimates the common (intrabrain) microstate features: + 1. Average duration a given microstate remains stable (Dur) + 2. Frequency occurrence, independent of individual duration (Occ) + Average number of times a microstate becomes dominant per second + 3. Ratio of total Time Covered (TCo) + 4. Transition probabilities (TMx) + 5. Ratio of shannon entropy relative to theoretical max chaos (Ent) + + Parameters + ---------- + i : int + The index. + n_maps : int + The number of maps (clusters) used. + microstate_results : list + The estimated microstates. + trialinfo_list : list + List with trial informations. + sfreq : int + The sampling frequency. + event_id : dict + The mappings between event id and condition. + single_brain_event_id : dict + The renamed mappings between event id and condition for single-brain + analysis. + + Returns + ------- + m_labels : np.array + The microstate sequence (labels) time series in the format (epoch, time). + events : pd.DataFrame + The events corresponding to each epoch. + MFeatures : list + List of arrays of each microstate feature. + + """ + # Get environmental variables + Subject_id = os.environ.get('Subject_id') + # Get the microstate labels + sub_idx = microstate_results[5] + subject_indices = sub_idx[i], sub_idx[i+1] + m_labels = microstate_results[1][subject_indices[0]:subject_indices[1]] + + # Get the trialinfo with conditions + # Notice that ppn2 events have been corrected, so we are using separate + # observer and actor conditions + # and also separate follower and leader conditions + Subject0, trialinfo = trialinfo_list[i] + assert Subject_id[i] == Subject0 + assert len(trialinfo) == len(m_labels)/sfreq + # Convert to dataframe + event_id_inv = {v: k for k, v in event_id.items()} # Inverse the event id + events = pd.DataFrame(trialinfo,columns=["Event_id","Trial_number","Trial_start_time"]) + events["Event_label"] = events["Event_id"].replace(event_id_inv) + events = events.reset_index().rename(columns={"index":"Epoch_idx"}) + + # Reshape m_labels to (epoch, time) + m_labels = m_labels.reshape(len(trialinfo),sfreq) + + # Remove pre-trial epochs + pre_trial_epochs = events["Trial_start_time"] < 0 + m_labels = m_labels[np.invert(pre_trial_epochs)] + events = events.loc[np.invert(pre_trial_epochs)].reset_index(drop=True) + events["Epoch_idx"] = events.index + + # Initialize arrays + Dur_arr = np.zeros((len(single_brain_event_id),n_maps)); Dur_arr.fill(np.nan) + Occ_arr = np.zeros((len(single_brain_event_id),n_maps)); Occ_arr.fill(np.nan) + TCo_arr = np.zeros((len(single_brain_event_id),n_maps)); TCo_arr.fill(np.nan) + TMx_arr = np.zeros((len(single_brain_event_id),n_maps,n_maps)) + Ent_arr = np.zeros(len(single_brain_event_id)) + for e in range(len(single_brain_event_id)): + ev_idx = list(single_brain_event_id.values())[e] + ep_idx = events["Epoch_idx"][events["Event_id"] == ev_idx] + trial_numbers0 = np.unique(events["Trial_number"][ep_idx]) + if len(trial_numbers0) > 8: + # Compute the first 8 trials and the last 8 separately, before + # taking the average to be consistent with asymmetric trials + Dur_arr0 = np.zeros((2,n_maps)) + Occ_arr0 = np.zeros((2,n_maps)) + TCo_arr0 = np.zeros((2,n_maps)) + TMx_arr0 = np.zeros((2,n_maps,n_maps)) + Ent_arr0 = np.zeros(2) + trial_numbers_split = np.array_split(trial_numbers0, 2) + # Array_split is used in cases where a trial might have been + # dropped, hence the split is only 8/7 + for s in range(len(trial_numbers_split)): + ep_idx0 = events.loc[(events["Event_id"] == ev_idx)& + (events["Trial_number"].isin(trial_numbers_split[s])), + "Epoch_idx"] + m_labels0 = m_labels[ep_idx0,:] + m_labels0_flat = m_labels0.reshape(m_labels0.shape[0]*m_labels0.shape[1]) + # Compute duration, occurrence and time covered + l_, d_ = microstate_run_length_encoding(m_labels0_flat) + # For each microstate + for ii in range(n_maps): + if np.isnan(np.nanmean(d_[l_==ii])): + # The specific microstate did not occur at all for this event + Dur_arr0[s,ii] = 0 + Occ_arr0[s,ii] = 0 + TCo_arr0[s,ii] = 0 + else: + Dur_arr0[s,ii] = np.mean(d_[l_==ii]) * 1000/sfreq # convert to ms + Occ_arr0[s,ii] = len(d_[l_==ii])/len(d_) * sfreq + TCo_arr0[s,ii] = np.sum(d_[l_==ii])/np.sum(d_) + # Compute transition matrix + TMx_arr0[s] = T_empirical(m_labels0_flat, n_maps) + # Compute Shannon Entropy relative to max + Ent_arr0[s] = H_1(m_labels0_flat, n_maps)/np.log(float(n_maps)) + # Average over the splits + Dur_arr[e] = np.mean(Dur_arr0, axis=0) + Occ_arr[e] = np.mean(Occ_arr0, axis=0) + TCo_arr[e] = np.mean(TCo_arr0, axis=0) + TMx_arr[e] = np.mean(TMx_arr0, axis=0) + Ent_arr[e] = np.mean(Ent_arr0, axis=0) + + else: + m_labels0 = m_labels[ep_idx,:] + m_labels0_flat = m_labels0.reshape(m_labels0.shape[0]*m_labels0.shape[1]) + # Compute duration, occurrence and time covered + l_, d_ = microstate_run_length_encoding(m_labels0_flat) + # For each microstate + for ii in range(n_maps): + if np.isnan(np.nanmean(d_[l_==ii])): + # The specific microstate did not occur at all for this event + Dur_arr[e,ii] = 0 + Occ_arr[e,ii] = 0 + TCo_arr[e,ii] = 0 + else: + Dur_arr[e,ii] = np.mean(d_[l_==ii]) * 1000/sfreq # convert to ms + Occ_arr[e,ii] = len(d_[l_==ii])/len(d_) * sfreq + TCo_arr[e,ii] = np.sum(d_[l_==ii])/np.sum(d_) + # Compute transition matrix + TMx_arr[e] = T_empirical(m_labels0_flat, n_maps) + # Compute Shannon Entropy relative to max + Ent_arr[e] = H_1(m_labels0_flat, n_maps)/np.log(float(n_maps)) + # Save all microstate features together + MFeatures = [Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr] + return m_labels, events, MFeatures + +def interbrain_microstate_run_length_encoding(m_labels1, m_labels2): + """ + Takes two 1D streams of microstate labels and returns the label + and duration for the common microstate. If there are no common + microstate, the label -1 is returned with a corresponding duration. + + Adapted from RLE Matlab code by Abdulrahman Ikram Siddiq, 2011 + """ + assert len(m_labels1) == len(m_labels2), "The provided microstate labels do not have equal length" + # Initialize + counter = 0 + label = [9999] * len(m_labels1) + duration = [9999] * len(m_labels1) + # Starting the lists + if m_labels1[counter] == m_labels2[counter]: + label[counter] = m_labels1[counter] + else: + label[counter] = -1 + duration[counter] = 1 # first element + for i in range(1,len(m_labels1)): + # If the previous timepoint was not common microstate, then add + # 1 to the duration if the next timepoint is also not common + if label[counter] == -1: + if m_labels1[i] != m_labels2[i]: + duration[counter] += 1 + # If it becomes a common microstate, then get new microstate + # label and restart duration + elif m_labels1[i] == m_labels2[i]: + counter += 1 + label[counter] = m_labels1[i] + duration[counter] = 1 + # If the previous time point was a common microstate, then add + # 1 to duration if the next time point is also common and the same label + elif label[counter] != -1: + if m_labels1[i] == m_labels2[i]: + if label[counter] == m_labels1[i]: + duration[counter] += 1 + # If still common, but a new label for both timeseries + # then restart with new label and duration + else: + counter += 1 + label[counter] = m_labels1[i] + duration[counter] = 1 + # If the two timeseries have different microstates, then + # set lable to -1 and restart duration + elif m_labels1[i] != m_labels2[i]: + counter += 1 + label[counter] = -1 + duration[counter] = 1 + + # Trim the unused parts of the list + label = label[:counter+1] + duration = duration[:counter+1] + assert not any(np.array(label) == 9999) # all fillers removed + assert not any(np.array(duration) == 9999) # all fillers removed + + return np.array(label), np.array(duration) + +def interbrain_T_matrix(m_labels1, m_labels2, n_maps): + """ + Takes two 1D microstate labels and returns the transition matrix + The first row and column correspond to label -1, which indicates + no common microstate + + Adapted from von Wegner F and Laufs H, 2018 + """ + assert len(m_labels1) == len(m_labels2), "The provided microstate labels do not have equal length" + T = np.zeros((n_maps+1, n_maps+1)) + n = len(m_labels1) + for i in range(n-1): + # If common microstate, then use that label + if m_labels1[i] == m_labels2[i]: + row_idx = m_labels1[i]+1 + # Or set row as 0, the not common microstate + else: + row_idx = 0 + # If next state is also common, set it to that state + if m_labels1[i+1] == m_labels2[i+1]: + col_idx = m_labels1[i+1]+1 + # Or set col as 0, if it is not common microstate + else: + col_idx = 0 + # Add 1 count to the transition matrix + T[row_idx,col_idx] += 1.0 + assert n - np.sum(T) == 1 # check whether all transitions have been counted + # Normalize row sums to 1 + T /= T.sum(axis=1, keepdims=True) + + return T + +def interbrain_microstate_feature_computation(i, n_maps, microstate_results, trialinfo_list, sfreq, event_id, + collapsed_event_id): + """ + Interbrain features: + 1. Average duration of common interbrain microstates (IBDur) + 2. Frequency occurrence of common interbrain microstates in the pair (IBOcc) + 3. Ratio of total time covered by interbrain common microstates in the pair (IBCov) + 4. Transition probability towards common interbrain microstates in the pair (IBTMx) + 5. Ratio of joint shannon entropy relative to theoretical max chaos (IBEnt) + + Parameters + ---------- + i : int + The index. + n_maps : int + The number of maps (clusters) used. + microstate_results : list + The estimated microstates. + trialinfo_list : list + List with trial informations. + sfreq : int + The sampling frequency. + event_id : dict + The mappings between event id and condition. + collapsed_brain_event_id : dict + The renamed mappings between event id and collapsed conditions + + Returns + ------- + m_labels : list of two np.array + The microstate sequence (labels) time series in the format (epoch, time). + events : list of two pd.DataFrame + The events corresponding to each epoch. + MFeatures : list + List of arrays of each microstate feature. + + """ + # Here i refers to the pair in range(n_subjects//2) + sub_idx = microstate_results[5] + + # Get the microstate labels and events for participant 1 + subject_indices1 = sub_idx[2*i], sub_idx[2*i+1] + m_labels1 = microstate_results[1][subject_indices1[0]:subject_indices1[1]] + # Get the trialinfo with conditions + Subject1, trialinfo1 = trialinfo_list[2*i] + # Convert to dataframe + event_id_inv = {v: k for k, v in event_id.items()} # Inverse the event id + + events1 = pd.DataFrame(trialinfo1,columns=["Event_id","Trial_number","Trial_start_time"]) + events1["Event_label"] = events1["Event_id"].replace(event_id_inv) + events1 = events1.reset_index().rename(columns={"index":"Epoch_idx"}) + # Reshape m_labels to (epoch, time) + m_labels1 = m_labels1.reshape(len(trialinfo1),sfreq) + + # Get the microstate labels and events for participant 2 + subject_indices2 = sub_idx[2*i+1], sub_idx[2*i+2] + m_labels2 = microstate_results[1][subject_indices2[0]:subject_indices2[1]] + # Get the trialinfo with conditions + Subject2, trialinfo2 = trialinfo_list[2*i+1] + # Convert to dataframe + events2 = pd.DataFrame(trialinfo2,columns=["Event_id","Trial_number","Trial_start_time"]) + events2["Event_label"] = events2["Event_id"].replace(event_id_inv) + events2 = events2.reset_index().rename(columns={"index":"Epoch_idx"}) + # Reshape m_labels to (epoch, time) + m_labels2 = m_labels2.reshape(len(trialinfo2),sfreq) + + # check that the participants from a pair was loaded + assert Subject2-1 == Subject1 + # Check that the same amount of event types are present + assert trialinfo1[-1,1] == trialinfo2[-1,1] + # Synchronize the events from the pair based on timing info + # By trimming the epochs to only include the epochs that are present + # in both participants of the pair + # Initialize with an array filled with a unique number not in use + sync_m_labels1 = np.zeros_like(m_labels1); sync_m_labels1.fill(9999) + sync_m_labels2 = np.zeros_like(m_labels2); sync_m_labels2.fill(9999) + for t in np.unique(trialinfo1[:,1]): + t_idx1 = np.where(trialinfo1[:,1] == t)[0] + t_idx2 = np.where(trialinfo2[:,1] == t)[0] + # Get the timings for the epochs for the specific trial t + t_timings1 = trialinfo1[t_idx1,2] + t_timings2 = trialinfo2[t_idx2,2] + timings_intersect = np.intersect1d(t_timings1,t_timings2) + # Get the indices where the timings matches + t_idx_match1 = t_idx1[pd.Series(t_timings1).isin(timings_intersect)] + t_idx_match2 = t_idx2[pd.Series(t_timings2).isin(timings_intersect)] + # Get the actual values from the synchronized epochs + sync_m_labels1[t_idx_match1] = m_labels1[t_idx_match1] + sync_m_labels2[t_idx_match2] = m_labels2[t_idx_match2] + # Find the epochs that were asynchronous, which have to be trimmed + asynch_epochs1 = np.unique(np.where(sync_m_labels1==9999)[0]) + asynch_epochs2 = np.unique(np.where(sync_m_labels2==9999)[0]) + # Trim/delete the asynchronous epochs + sync_m_labels1 = np.delete(sync_m_labels1,asynch_epochs1,axis=0) + sync_m_labels2 = np.delete(sync_m_labels2,asynch_epochs2,axis=0) + assert len(sync_m_labels1) == len(sync_m_labels2) # check the amount of synchronized epochs are equal + # Fix events + sync_events1 = events1.drop(asynch_epochs1,axis=0).reset_index(drop=True) + sync_events2 = events2.drop(asynch_epochs2,axis=0).reset_index(drop=True) + + # Since they are synchronized already, just take the first and fix + # epoch idx column with the new idx for the synchronized labels + events = sync_events1 + events["Epoch_idx"] = events.index + # Notice that for the intrabrain fit all alpha v6 I only corrected + # ppn2. So by taking ppn1 I get the original event labels. + + # Update in v7 + # # Collapse event_id 6 and 7 to 4 and 5 + # events.loc[events["Event_id"] == 6,["Event_id","Event_label"]] = [4, "observe, actor"] + # events.loc[events["Event_id"] == 7,["Event_id","Event_label"]] = [5, "imitate, leader"] + + # Due to the hard constraint for both participants being in the same + # microstate. It does not matter who will be treated as ppn1 and ppn2 + # since the prototypical microstate topographies are exactly the same! + + # Remove pre-trial epochs + pre_trial_epochs = events["Trial_start_time"] < 0 + sync_m_labels1 = sync_m_labels1[np.invert(pre_trial_epochs)] + sync_m_labels2 = sync_m_labels2[np.invert(pre_trial_epochs)] + events = events.loc[np.invert(pre_trial_epochs)].reset_index(drop=True) + events["Epoch_idx"] = events.index + + # Pre-allocate memory + Dur_arr = np.zeros((len(event_id),n_maps+1)); Dur_arr.fill(np.nan) + Occ_arr = np.zeros((len(event_id),n_maps+1)); Occ_arr.fill(np.nan) + TCo_arr = np.zeros((len(event_id),n_maps+1)); TCo_arr.fill(np.nan) + TMx_arr = np.zeros((len(event_id),n_maps+1,n_maps+1)) + Ent_arr = np.zeros(len(event_id)) + for e in range(len(event_id)): + ev_idx = list(event_id.values())[e] + ep_idx = events["Epoch_idx"][events["Event_id"] == ev_idx] + trial_numbers0 = np.unique(events["Trial_number"][ep_idx]) + if len(trial_numbers0) > 8: + # Compute the first 8 trials and the last 8 separately, before + # taking the average to be consistent with asymmetric trials + Dur_arr0 = np.zeros((2,n_maps+1)) + Occ_arr0 = np.zeros((2,n_maps+1)) + TCo_arr0 = np.zeros((2,n_maps+1)) + TMx_arr0 = np.zeros((2,n_maps+1,n_maps+1)) + Ent_arr0 = np.zeros(2) + trial_numbers_split = np.array_split(trial_numbers0, 2) + # Array_split is used in cases where a trial might have been + # dropped, hence the split is only 8/7 + for s in range(len(trial_numbers_split)): + ep_idx0 = events.loc[(events["Event_id"] == ev_idx)& + (events["Trial_number"].isin(trial_numbers_split[s])), + "Epoch_idx"] + # Get the microstate labels + m_labels10 = sync_m_labels1[ep_idx0,:] + m_labels20 = sync_m_labels2[ep_idx0,:] + # Flatten the labels + m_labels10_flat = m_labels10.reshape(m_labels10.shape[0]*m_labels10.shape[1]) + m_labels20_flat = m_labels20.reshape(m_labels20.shape[0]*m_labels20.shape[1]) + # Compute average duration of common microstate + # Output: label and duration of common microstate. Label -1 is used + # for not common microstate + l_, d_ = interbrain_microstate_run_length_encoding(m_labels10_flat,m_labels20_flat) + # For each microstate + for ii in range(n_maps+1): + if np.isnan(np.nanmean(d_[l_==ii-1])): + # The specific microstate did not occur at all for this event + Dur_arr0[s,ii] = 0 + Occ_arr0[s,ii] = 0 + TCo_arr0[s,ii] = 0 + else: + Dur_arr0[s,ii] = np.mean(d_[l_==ii-1]) * 1000/sfreq # convert to ms + Occ_arr0[s,ii] = len(d_[l_==ii-1])/len(d_) * sfreq + TCo_arr0[s,ii] = np.sum(d_[l_==ii-1])/np.sum(d_) + # Compute transition matrix + TMx_arr0[s] = interbrain_T_matrix(m_labels10_flat,m_labels20_flat,n_maps) + # Compute Joint Shannon Entropy relative to max + # Max is the sum of max individual entropies + Ent_arr0[s] = H_2(m_labels10_flat,m_labels20_flat,n_maps)/(2*np.log(float(n_maps))) + # Average over the splits + Dur_arr[e] = np.mean(Dur_arr0, axis=0) + Occ_arr[e] = np.mean(Occ_arr0, axis=0) + TCo_arr[e] = np.mean(TCo_arr0, axis=0) + TMx_arr[e] = np.mean(TMx_arr0, axis=0) + Ent_arr[e] = np.mean(Ent_arr0, axis=0) + + else: + # Get the microstate labels + m_labels10 = sync_m_labels1[ep_idx,:] + m_labels20 = sync_m_labels2[ep_idx,:] + # Flatten the labels + m_labels10_flat = m_labels10.reshape(m_labels10.shape[0]*m_labels10.shape[1]) + m_labels20_flat = m_labels20.reshape(m_labels20.shape[0]*m_labels20.shape[1]) + # Compute average duration of common microstate + # Output: label and duration of common microstate. Label -1 is used + # for not common microstate + l_, d_ = interbrain_microstate_run_length_encoding(m_labels10_flat,m_labels20_flat) + # For each microstate + for ii in range(n_maps+1): + if np.isnan(np.nanmean(d_[l_==ii-1])): + # The specific microstate did not occur at all for this event + Dur_arr[e,ii] = 0 + Occ_arr[e,ii] = 0 + TCo_arr[e,ii] = 0 + else: + Dur_arr[e,ii] = np.mean(d_[l_==ii-1]) * 1000/sfreq # convert to ms + Occ_arr[e,ii] = len(d_[l_==ii-1])/len(d_) * sfreq + TCo_arr[e,ii] = np.sum(d_[l_==ii-1])/np.sum(d_) + # Compute transition matrix + TMx_arr[e] = interbrain_T_matrix(m_labels10_flat,m_labels20_flat,n_maps) + # Compute Joint Shannon Entropy relative to max + # Max is the sum of max individual entropies + Ent_arr[e] = H_2(m_labels10_flat,m_labels20_flat,n_maps)/(2*np.log(float(n_maps))) + + # Combine all features in a list + MFeatures = [Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr] + + # Update in v7 - Collapse after computations in single events + Dur_arr2 = np.zeros((len(collapsed_event_id),n_maps+1)) + Occ_arr2 = np.zeros((len(collapsed_event_id),n_maps+1)) + TCo_arr2 = np.zeros((len(collapsed_event_id),n_maps+1)) + TMx_arr2 = np.zeros((len(collapsed_event_id),n_maps+1,n_maps+1)) + Ent_arr2 = np.zeros(len(collapsed_event_id)) + MFeatures2 = [Dur_arr2,Occ_arr2,TCo_arr2,TMx_arr2,Ent_arr2] + + for f in range(len(MFeatures2)): + tmp_feat = MFeatures[f] + tmp_feat2 = MFeatures2[f] + for e in range(len(collapsed_event_id)): + ee = list(collapsed_event_id.keys())[e] + if (ee == 'observer_actor'): + old_ev_idx1 = list(event_id.keys()).index("observe, actor") + old_ev_idx2 = list(event_id.keys()).index("observe, observer") + tmp_feat2[e] = np.mean([tmp_feat[old_ev_idx1],tmp_feat[old_ev_idx2]], axis=0) + elif ee == 'follower_leader': + old_ev_idx1 = list(event_id.keys()).index("imitate, leader") + old_ev_idx2 = list(event_id.keys()).index("imitate, follower") + tmp_feat2[e] = np.mean([tmp_feat[old_ev_idx1],tmp_feat[old_ev_idx2]], axis=0) + else: + new_ev_idx = list(collapsed_event_id.values())[e] + old_ev_idx = list(event_id.values()).index(new_ev_idx) + tmp_feat2[e] = tmp_feat[old_ev_idx] + + return [sync_m_labels1, sync_m_labels2], [sync_events1, sync_events2], MFeatures2 + + +def get_synch_events_from_pairs(i, trialinfo1, trialinfo2, event_id): + """ Get the synchronized events for each pair """ + # Get environmental variables + Subject_id = os.environ.get('Subject_id') + # Check that the participants from a pair was loaded + assert Subject_id[2*i+1]-1 == Subject_id[2*i] + # Synchronize the events from the pair based on timing info + # By trimming the epochs to only include the epochs that are present + # in both participants of the pair + event_id_inv = {v: k for k, v in event_id.items()} # Inverse the event id + events1 = pd.DataFrame(trialinfo1,columns=["Event_id","Trial_number","Trial_start_time"]) + events1["Event_label"] = events1["Event_id"].replace(event_id_inv) + events1 = events1.reset_index().rename(columns={"index":"Epoch_idx"}) + + events2 = pd.DataFrame(trialinfo2,columns=["Event_id","Trial_number","Trial_start_time"]) + events2["Event_label"] = events2["Event_id"].replace(event_id_inv) + events2 = events2.reset_index().rename(columns={"index":"Epoch_idx"}) + # Concatenate all the synched events + sync_events1 = np.zeros((len(events1))) + sync_events2 = np.zeros((len(events2))) + for t in np.unique(trialinfo1[:,1]): + t_idx1 = np.where(trialinfo1[:,1] == t)[0] + t_idx2 = np.where(trialinfo2[:,1] == t)[0] + # Get the timings for the epochs for the specific trial t + t_timings1 = trialinfo1[t_idx1,2] + t_timings2 = trialinfo2[t_idx2,2] + timings_intersect = np.intersect1d(t_timings1,t_timings2) + # Get the indices where the timings matches + t_idx_match1 = t_idx1[pd.Series(t_timings1).isin(timings_intersect)] + t_idx_match2 = t_idx2[pd.Series(t_timings2).isin(timings_intersect)] + # Save the trials that should be kept + sync_events1[t_idx_match1] = 1 + sync_events2[t_idx_match2] = 1 + # Find the epochs that were asynchronous, which have to be trimmed + asynch_epochs1 = np.where(sync_events1==0)[0] + asynch_epochs2 = np.where(sync_events2==0)[0] + # Trim/delete the asynchronous epochs + events1 = events1.drop(asynch_epochs1,axis=0).reset_index(drop=True) + events2 = events2.drop(asynch_epochs2,axis=0).reset_index(drop=True) + assert len(events1) == len(events2) # check the amount of synchronized epochs are equal + return events1, events2 + +def prepare_2P_micro_arr_collapsed_events(i, sfreq, event_id, freq_range=None, standardize:bool=True): + # Here i refers to the pair in range(n_subjects//2) + # Load epochs + epoch1, trialinfo1 = load_epoch_from_fieldtrip(2*i) + epoch2, trialinfo2 = load_epoch_from_fieldtrip(2*i+1) + # Ensure it is averaged referenced + epoch1 = epoch1.set_eeg_reference("average") + epoch2 = epoch2.set_eeg_reference("average") + # Get numpy arrays + micro_data1 = epoch1.get_data() # get data + micro_data2 = epoch2.get_data() # get data + # Get only the synchronized epochs + events = get_synch_events_from_pairs(i, trialinfo1, trialinfo2, event_id) + micro_data1 = micro_data1[events[0]["Epoch_idx"]] + micro_data2 = micro_data2[events[1]["Epoch_idx"]] + assert micro_data1.shape == micro_data2.shape + # Get event labels for each epoch + # Since they are already synchronized I just take the first + event_labels0 = events[0]["Event_label"] + if standardize == True: + # Standardize data - will make EEG amplitudes comparable between + # recordings/trials. Standardized along the ch axis, meaning the mean + # and std of the topomap at each timepoint is normalized + # It will be normalized separately for each participant + # in order to ensure both participants are treated equally by the + # kmeans + micro_data10 = micro_data1 - micro_data1.mean(axis=1, keepdims=True) + micro_data10 /= micro_data10.std(axis=1, keepdims=True) + micro_data20 = micro_data2 - micro_data2.mean(axis=1, keepdims=True) + micro_data20 /= micro_data20.std(axis=1, keepdims=True) + elif standardize == False: + micro_data10 = micro_data1 + micro_data20 = micro_data2 + # Concatenate along the channel axis, to treat the paired EEG as one entity + micro_data = np.concatenate([micro_data10,micro_data20],axis=1) + + # Flip the micro_data between ppn1 and ppn2 to fix first microstate + # always being observer/follower, and second microstate being actor/leader + cond6_idx = event_labels0 == "observe, observer" + cond7_idx = event_labels0 == "imitate, follower" + cond67_idx = cond6_idx+cond7_idx + micro_data[cond67_idx] = np.concatenate([micro_data20[cond67_idx], + micro_data10[cond67_idx]],axis=1) + # Transform data to correct shape + arr_shape = micro_data.shape # get shape + micro_data = micro_data.swapaxes(1,2) # swap ch and time axis + micro_data = micro_data.reshape(arr_shape[0]*arr_shape[2],arr_shape[1]) # reshape by combining epochs and times + + # Filter the data + if freq_range == None: + micro_data_filtered = micro_data # do not perform filtering + elif not freq_range == None: + # Notice it is done only after combining epochs and time, as filter length + # would be too long for 1s epochs. The filter function wants time on the last axis + micro_data = micro_data.transpose() + micro_data_filtered = mne.filter.filter_data(micro_data, sfreq, freq_range[0], freq_range[1]) + # Reverse the shape + micro_data_filtered = micro_data_filtered.transpose() + + return micro_data_filtered, [trialinfo1, trialinfo2], events + +def plot_dualmicro(n_maps, maps, gev, epoch_info, vlims=(None, None)): + # Split the map from the two participants + n_channels = epoch_info["nchan"] + maps1, maps2, _ = np.split(maps, [n_channels,2*n_channels], axis=1) + fig, axarr = plt.subplots(2, n_maps, figsize=(20,10)) + fig.patch.set_facecolor('white') + for imap in range(n_maps): + mne.viz.plot_topomap(maps1[imap,:], pos = epoch_info, vlim = vlims, axes = axarr[0,imap]) # plot + axarr[0,imap].set_title("GEV: {:.2f}".format(gev[imap]), fontsize=16, fontweight="bold") # title + mne.viz.plot_topomap(maps2[imap,:], pos = epoch_info, vlim = vlims, axes = axarr[1,imap]) + fig.suptitle("Microstates: {}".format(n_maps), fontsize=20, fontweight="bold") + plt.tight_layout() + return fig + +def sign_swap_microstates(sign_swap, maps, n_maps, n_channels): + # Split maps + maps1, maps2, _ = np.split(maps, [n_channels,2*n_channels], axis=1) + for m in range(n_maps): + # Sign swap each + maps1[m] *= sign_swap[0][m] + maps2[m] *= sign_swap[1][m] + # Concatenate maps + maps = np.concatenate([maps1,maps2],axis=1) + return maps + +def dualmicro_fit_all_feature_computation(i, n_maps, microstate_results, trialinfo_list, sfreq, event_id, + collapsed_event_id): + """ + Overview of common microstate features: + 1. Average duration a given microstate remains stable (Dur) + 2. Frequency occurrence, independent of individual duration (Occ) + Average number of times a microstate becomes dominant per second + 3. Ratio of total Time Covered (TCo) + 4. Transition probabilities (TMx) + 5. Ratio of shannon entropy relative to theoretical max chaos (Ent) + + Parameters + ---------- + i : int + The index. + n_maps : int + The number of maps (clusters) used. + microstate_results : list + The estimated microstates. + trialinfo_list : list + List with trial informations. + sfreq : int + The sampling frequency. + event_id : dict + The mappings between event id and condition. + collapsed_brain_event_id : dict + The renamed mappings between event id and collapsed conditions + + Returns + ------- + m_labels : list of two np.array + The microstate sequence (labels) time series in the format (epoch, time). + events : list of two pd.DataFrame + The events corresponding to each epoch. + MFeatures : list + List of arrays of each microstate feature. + + """ + + pair_idx = microstate_results[5] + pair_indices = pair_idx[i], pair_idx[i+1] + m_labels = microstate_results[1][pair_indices[0]:pair_indices[1]] + + events = trialinfo_list[2][i] + # Since they are synchronized already, just take the first and fix + # epoch idx column with the new idx for the synchronized labels + events = events[0] + + # Update in v7 + # # Collapse event_id 6 and 7 to 4 and 5 + # events.loc[events["Event_id"] == 6,["Event_id","Event_label"]] = [4, "observe, actor"] + # events.loc[events["Event_id"] == 7,["Event_id","Event_label"]] = [5, "imitate, leader"] + # The microstate clustering was performed on flipped (collapsed) events, + # but I will compute the features on the 8 trials to avoid the flipping + # effect and then collapse by averaging afterwards + + # Reshape m_labels to (epoch, time) + m_labels = m_labels.reshape(len(events),sfreq) + + # Remove pre-trial epochs + pre_trial_epochs = events["Trial_start_time"] < 0 + m_labels = m_labels[np.invert(pre_trial_epochs)] + events = events.loc[np.invert(pre_trial_epochs)].reset_index(drop=True) + events["Epoch_idx"] = events.index + + # Pre-allocate memory + Dur_arr = np.zeros((len(event_id),n_maps)); Dur_arr.fill(np.nan) + Occ_arr = np.zeros((len(event_id),n_maps)); Occ_arr.fill(np.nan) + TCo_arr = np.zeros((len(event_id),n_maps)); TCo_arr.fill(np.nan) + TMx_arr = np.zeros((len(event_id),n_maps,n_maps)) + Ent_arr = np.zeros(len(event_id)) + for e in range(len(event_id)): + ev_idx = list(event_id.values())[e] + ep_idx = events["Epoch_idx"][events["Event_id"] == ev_idx] + trial_numbers0 = np.unique(events["Trial_number"][ep_idx]) + if len(trial_numbers0) > 8: + # Compute the first 8 trials and the last 8 separately, before + # taking the average to be consistent with asymmetric trials + Dur_arr0 = np.zeros((2,n_maps)) + Occ_arr0 = np.zeros((2,n_maps)) + TCo_arr0 = np.zeros((2,n_maps)) + TMx_arr0 = np.zeros((2,n_maps,n_maps)) + Ent_arr0 = np.zeros(2) + trial_numbers_split = np.array_split(trial_numbers0, 2) + # Array_split is used in cases where a trial might have been + # dropped, hence the split is only 8/7 + for s in range(len(trial_numbers_split)): + ep_idx0 = events.loc[(events["Event_id"] == ev_idx)& + (events["Trial_number"].isin(trial_numbers_split[s])), + "Epoch_idx"] + m_labels0 = m_labels[ep_idx0,:] + m_labels0_flat = m_labels0.reshape(m_labels0.shape[0]*m_labels0.shape[1]) + # Compute duration, occurrence and time covered + l_, d_ = microstate_run_length_encoding(m_labels0_flat) + # For each microstate + for ii in range(n_maps): + if np.isnan(np.nanmean(d_[l_==ii])): + # The specific microstate did not occur at all for this event + Dur_arr0[s,ii] = 0 + Occ_arr0[s,ii] = 0 + TCo_arr0[s,ii] = 0 + else: + Dur_arr0[s,ii] = np.mean(d_[l_==ii]) * 1000/sfreq # convert to ms + Occ_arr0[s,ii] = len(d_[l_==ii])/len(d_) * sfreq + TCo_arr0[s,ii] = np.sum(d_[l_==ii])/np.sum(d_) + # Compute transition matrix + TMx_arr0[s] = T_empirical(m_labels0_flat, n_maps) + # Compute Shannon Entropy relative to max + Ent_arr0[s] = H_1(m_labels0_flat, n_maps)/np.log(float(n_maps)) + # Average over the splits + Dur_arr[e] = np.mean(Dur_arr0, axis=0) + Occ_arr[e] = np.mean(Occ_arr0, axis=0) + TCo_arr[e] = np.mean(TCo_arr0, axis=0) + TMx_arr[e] = np.mean(TMx_arr0, axis=0) + Ent_arr[e] = np.mean(Ent_arr0, axis=0) + + else: + m_labels0 = m_labels[ep_idx,:] + m_labels0_flat = m_labels0.reshape(m_labels0.shape[0]*m_labels0.shape[1]) + # Compute duration, occurrence and time covered + l_, d_ = microstate_run_length_encoding(m_labels0_flat) + # For each microstate + for ii in range(n_maps): + if np.isnan(np.nanmean(d_[l_==ii])): + # The specific microstate did not occur at all for this event + Dur_arr[e,ii] = 0 + Occ_arr[e,ii] = 0 + TCo_arr[e,ii] = 0 + else: + Dur_arr[e,ii] = np.mean(d_[l_==ii]) * 1000/sfreq # convert to ms + Occ_arr[e,ii] = len(d_[l_==ii])/len(d_) * sfreq + TCo_arr[e,ii] = np.sum(d_[l_==ii])/np.sum(d_) + # Compute transition matrix + TMx_arr[e] = T_empirical(m_labels0_flat, n_maps) + # Compute Shannon Entropy relative to max + Ent_arr[e] = H_1(m_labels0_flat, n_maps)/np.log(float(n_maps)) + # Combine all features in a list + MFeatures = [Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr] + + # Update in v7 - Collapse after computations in single events + Dur_arr2 = np.zeros((len(collapsed_event_id),n_maps)) + Occ_arr2 = np.zeros((len(collapsed_event_id),n_maps)) + TCo_arr2 = np.zeros((len(collapsed_event_id),n_maps)) + TMx_arr2 = np.zeros((len(collapsed_event_id),n_maps,n_maps)) + Ent_arr2 = np.zeros(len(collapsed_event_id)) + MFeatures2 = [Dur_arr2,Occ_arr2,TCo_arr2,TMx_arr2,Ent_arr2] + + for f in range(len(MFeatures2)): + tmp_feat = MFeatures[f] + tmp_feat2 = MFeatures2[f] + for e in range(len(collapsed_event_id)): + ee = list(collapsed_event_id.keys())[e] + if (ee == 'observer_actor'): + old_ev_idx1 = list(event_id.keys()).index("observe, actor") + old_ev_idx2 = list(event_id.keys()).index("observe, observer") + tmp_feat2[e] = np.mean([tmp_feat[old_ev_idx1],tmp_feat[old_ev_idx2]], axis=0) + elif ee == 'follower_leader': + old_ev_idx1 = list(event_id.keys()).index("imitate, leader") + old_ev_idx2 = list(event_id.keys()).index("imitate, follower") + tmp_feat2[e] = np.mean([tmp_feat[old_ev_idx1],tmp_feat[old_ev_idx2]], axis=0) + else: + new_ev_idx = list(collapsed_event_id.values())[e] + old_ev_idx = list(event_id.values()).index(new_ev_idx) + tmp_feat2[e] = tmp_feat[old_ev_idx] + + return m_labels, events, MFeatures2 + +def load_microstate_arrays(i, standardize:bool=True): + """ Loads the EEG data and convert to np.array """ + epoch1, trialinfo1 = load_epoch_from_fieldtrip(i) + # Ensure it is averaged referenced + epoch1 = epoch1.set_eeg_reference("average") + # Get numpy arrays + micro_data1 = epoch1.get_data() # get data + if standardize == True: + # Standardize data - will make EEG amplitudes comparable between + # recordings/trials. Standardized along the ch axis, meaning the mean + # and std of the topomap at each timepoint is normalized + # It will be normalized separately for each participant + micro_data10 = micro_data1 - micro_data1.mean(axis=1, keepdims=True) + micro_data10 /= micro_data10.std(axis=1, keepdims=True) + elif standardize == False: + micro_data10 = micro_data1 + return micro_data10, trialinfo1 + +def get_synch_events_from_pseudo_pairs(trialinfo1, trialinfo2, event_id): + """ + Takes two subject indices from a pseudo pair and align their events (based + on when they occur, e.g. first coupled in ppn1 with first coupled in ppn2) + After their microstates have been synchronized, the microstate features + are computed to serve as baseline as a non-interacting pair + """ + # Synchronize the events from the pair based on timing info + # By trimming the epochs to only include the epochs that are present + # in both participants of the pair + event_id_inv = {v: k for k, v in event_id.items()} # Inverse the event id + events1 = pd.DataFrame(trialinfo1,columns=["Event_id","Trial_number","Trial_start_time"]) + events1["Event_label"] = events1["Event_id"].replace(event_id_inv) + events1 = events1.reset_index().rename(columns={"index":"Epoch_idx"}) + + events2 = pd.DataFrame(trialinfo2,columns=["Event_id","Trial_number","Trial_start_time"]) + events2["Event_label"] = events2["Event_id"].replace(event_id_inv) + events2 = events2.reset_index().rename(columns={"index":"Epoch_idx"}) + + # I should just make sure to align the timings of the trials for each event type + counter_idx = 0 + # Initialize events as dataframe filled with unique number + sync_events1 = events1.applymap(lambda x: 9999) + sync_events2 = events2.applymap(lambda x: 9999) + + for e in range(len(event_id)): + # Get event number for the specific event + ev_idx = list(event_id.values())[e] + # Get the indices corresponding to the event for each participant + ep_idx1 = events1["Epoch_idx"][events1["Event_id"] == ev_idx] + ep_idx2 = events2["Epoch_idx"][events2["Event_id"] == ev_idx] + # Get the first trial from ppn1 and ppn2 that correspond to the event + event_trial_idx1 = np.unique(trialinfo1[ep_idx1][:,1]) + event_trial_idx2 = np.unique(trialinfo2[ep_idx2][:,1]) + if len(event_trial_idx1) != len(event_trial_idx2): + print(f"Not equal number of {list(event_id.keys())[e]} trials") + min_trials = np.min([len(event_trial_idx1),len(event_trial_idx2)]) + print(f"Using the first {min_trials} trials") + event_trial_idx1 = event_trial_idx1[:min_trials] + event_trial_idx2 = event_trial_idx2[:min_trials] + + for t1, t2 in zip(event_trial_idx1,event_trial_idx2): + t_idx1 = np.where(trialinfo1[:,1] == t1)[0] + t_idx2 = np.where(trialinfo2[:,1] == t2)[0] + # Get the timings for the epochs for the specific trial t + t_timings1 = trialinfo1[t_idx1,2] + t_timings2 = trialinfo2[t_idx2,2] + timings_intersect = np.intersect1d(t_timings1,t_timings2) + # Get the indices where the timings matches + t_idx_match1 = t_idx1[pd.Series(t_timings1).isin(timings_intersect)] + t_idx_match2 = t_idx2[pd.Series(t_timings2).isin(timings_intersect)] + # Save the event info + sync_events1.iloc[counter_idx:(counter_idx+len(timings_intersect))] = events1.iloc[t_idx_match1] + sync_events2.iloc[counter_idx:(counter_idx+len(timings_intersect))] = events2.iloc[t_idx_match2] + # Move counter + counter_idx += len(timings_intersect) + # Find the epochs that were asynchronous, which have to be trimmed + asynch_epochs1 = np.unique(np.where(sync_events1==9999)[0]) + asynch_epochs2 = np.unique(np.where(sync_events2==9999)[0]) + # Fix events + sync_events1 = sync_events1.drop(asynch_epochs1,axis=0).reset_index(drop=True) + sync_events2 = sync_events2.drop(asynch_epochs2,axis=0).reset_index(drop=True) + assert len(sync_events1) == len(sync_events2) # check the amount of synchronized epochs are equal + return [sync_events1, sync_events2] + +def combine_two_person_microstate_arrays(micro_data1, micro_data2, events, sfreq, freq_range=None): + """ Combine two EEG array data into one array for microstate estimation """ + # Get the synchronized epochs + micro_data1 = micro_data1[events[0]["Epoch_idx"]] + micro_data2 = micro_data2[events[1]["Epoch_idx"]] + # Get event labels for each epoch + # Since they are already synchronized I just take the first + event_labels0 = events[0]["Event_label"] + # Concatenate along the channel axis, to treat the paired EEG as one entity + micro_data = np.concatenate([micro_data1,micro_data2],axis=1) + + ### Update in v4.1 + # Flip the micro_data between ppn1 and ppn2 to fix first microstate + # always being observer/follower, and second microstate being actor/leader + cond6_idx = event_labels0 == "observe, observer" + cond7_idx = event_labels0 == "imitate, follower" + cond67_idx = cond6_idx+cond7_idx + micro_data[cond67_idx] = np.concatenate([micro_data2[cond67_idx], + micro_data1[cond67_idx]],axis=1) + # Transform data to correct shape + arr_shape = micro_data.shape # get shape + micro_data = micro_data.swapaxes(1,2) # swap ch and time axis + micro_data = micro_data.reshape(arr_shape[0]*arr_shape[2],arr_shape[1]) # reshape by combining epochs and times + # Filter the data in alpha + # Notice it is done only after combining epochs and time, as filter length + # would be too long for 1s epochs. The filter function wants time on the last axis + micro_data = micro_data.transpose() + micro_data_alpha = mne.filter.filter_data(micro_data, sfreq, freq_range[0], freq_range[1]) + # Reverse the shape + micro_data_alpha = micro_data_alpha.transpose() + + return micro_data_alpha + +def pseudo_pair_dualmicro_backfitting(micro_data, prototype_map, events, n_maps, sfreq): + """ Backfit microstate labels based on prototype map previously determined """ + assert prototype_map.shape[0] == n_maps, "Template map size is not equal to n_maps" + # Load micro data + micro_data0 = micro_data + n_ch = micro_data.shape[1] + # Estimate global explained variance (GEV) by the averaged template maps + # on the global field potential peaks in our data + # Find GFPs + gfp = np.std(micro_data0, axis=1) + gfp_peaks = locmax(gfp) + gfp_values = gfp[gfp_peaks] + gfp2 = np.sum(gfp_values**2) # normalizing constant in GEV + # Calculate spatial correlation + # Using absolute value of the topographies to obtain polarity invariance + # Since we are working on two-person microstates, we also need to test + # the different configurations. There are 4 in total, but we only need to try 2 + tmp_maps = np.split(prototype_map, [n_ch//2,n_ch], axis=1) + all_polarity_combinations = [np.concatenate([tmp_maps[0],tmp_maps[1]],axis=1), + np.concatenate([tmp_maps[0],-tmp_maps[1]],axis=1)] + C_arr = np.zeros((micro_data0.shape[0],n_maps,len(all_polarity_combinations))) + for p in range(len(all_polarity_combinations)): + C_arr[:,:,p] = np.dot(micro_data0, all_polarity_combinations[p].T) + # rescale cov + C_arr[:,:,p] /= (n_ch*np.outer(gfp, np.std(prototype_map, axis=1))) + # Get C as the highest correlation independent of polarity + # Take max for all polarity configurations and then argmax to find label + C = np.sqrt(np.max(C_arr**2,axis=2)) # notice sign is lost here, but it is only used as C^2 later so it is fine + L = np.argmax(C**2, axis=1) + + L_gfp = L[gfp_peaks] + C_gfp = C[gfp_peaks] + + gev = np.zeros(n_maps) + for k in range(n_maps): + r = L_gfp==k + gev[k] = np.sum(gfp_values[r]**2 * C_gfp[r,k]**2)/gfp2 + + # Reshape labels back to epoch, time + L_reshaped = L.reshape(len(events[0]),sfreq) + + return L_reshaped, gev + +def dualmicro_fit_all_pseudo_pair_feature_computation(i, n_maps, backfit_results, sfreq, + event_id, collapsed_event_id): + """ + Overview of common microstate features: + 1. Average duration a given microstate remains stable (Dur) + 2. Frequency occurrence, independent of individual duration (Occ) + Average number of times a microstate becomes dominant per second + 3. Ratio of total Time Covered (TCo) + 4. Transition probabilities (TMx) + 5. Ratio of shannon entropy relative to theoretical max chaos (Ent) + + Parameters + ---------- + i : int + The index. + n_maps : int + The number of maps (clusters) used. + backfit_results : list + The estimated back-fitted microstates. + sfreq : int + The sampling frequency. + event_id : dict + The mappings between event id and condition. + collapsed_brain_event_id : dict + The renamed mappings between event id and collapsed conditions + + Returns + ------- + m_labels : list of two np.array + The microstate sequence (labels) time series in the format (epoch, time). + events : list of two pd.DataFrame + The events corresponding to each epoch. + MFeatures : list + List of arrays of each microstate feature. + + """ + m_labels = backfit_results[1][i] + events = backfit_results[3][i] + + # Since they are synchronized already, just take the first and fix + # epoch idx column with the new idx for the synchronized labels + events = events[0] + + # Update in v7 + # # Collapse event_id 6 and 7 to 4 and 5 + # events.loc[events["Event_id"] == 6,["Event_id","Event_label"]] = [4, "observe, actor"] + # events.loc[events["Event_id"] == 7,["Event_id","Event_label"]] = [5, "imitate, leader"] + # The microstate clustering was performed on flipped (collapsed) events, + # but I will compute the features on the 8 trials to avoid the flipping + # effect and then collapse by averaging afterwards + + # Reshape m_labels to (epoch, time) + m_labels = m_labels.reshape(len(events),sfreq) + + # Remove pre-trial epochs + pre_trial_epochs = events["Trial_start_time"] < 0 + m_labels = m_labels[np.invert(pre_trial_epochs)] + events = events.loc[np.invert(pre_trial_epochs)].reset_index(drop=True) + events["Epoch_idx"] = events.index + + # Pre-allocate memory + Dur_arr = np.zeros((len(event_id),n_maps)); Dur_arr.fill(np.nan) + Occ_arr = np.zeros((len(event_id),n_maps)); Occ_arr.fill(np.nan) + TCo_arr = np.zeros((len(event_id),n_maps)); TCo_arr.fill(np.nan) + TMx_arr = np.zeros((len(event_id),n_maps,n_maps)) + Ent_arr = np.zeros(len(event_id)) + for e in range(len(event_id)): + ev_idx = list(event_id.values())[e] + ep_idx = events["Epoch_idx"][events["Event_id"] == ev_idx] + trial_numbers0 = np.unique(events["Trial_number"][ep_idx]) + if len(trial_numbers0) > 8: + # Compute the first 8 trials and the last 8 separately, before + # taking the average to be consistent with asymmetric trials + Dur_arr0 = np.zeros((2,n_maps)) + Occ_arr0 = np.zeros((2,n_maps)) + TCo_arr0 = np.zeros((2,n_maps)) + TMx_arr0 = np.zeros((2,n_maps,n_maps)) + Ent_arr0 = np.zeros(2) + trial_numbers_split = np.array_split(trial_numbers0, 2) + # Array_split is used in cases where a trial might have been + # dropped, hence the split is only 8/7 + for s in range(len(trial_numbers_split)): + ep_idx0 = events.loc[(events["Event_id"] == ev_idx)& + (events["Trial_number"].isin(trial_numbers_split[s])), + "Epoch_idx"] + m_labels0 = m_labels[ep_idx0,:] + m_labels0_flat = m_labels0.reshape(m_labels0.shape[0]*m_labels0.shape[1]) + # Compute duration, occurrence and time covered + l_, d_ = microstate_run_length_encoding(m_labels0_flat) + # For each microstate + for ii in range(n_maps): + if np.isnan(np.nanmean(d_[l_==ii])): + # The specific microstate did not occur at all for this event + Dur_arr0[s,ii] = 0 + Occ_arr0[s,ii] = 0 + TCo_arr0[s,ii] = 0 + else: + Dur_arr0[s,ii] = np.mean(d_[l_==ii]) * 1000/sfreq # convert to ms + Occ_arr0[s,ii] = len(d_[l_==ii])/len(d_) * sfreq + TCo_arr0[s,ii] = np.sum(d_[l_==ii])/np.sum(d_) + # Compute transition matrix + TMx_arr0[s] = T_empirical(m_labels0_flat, n_maps) + # Compute Shannon Entropy relative to max + Ent_arr0[s] = H_1(m_labels0_flat, n_maps)/np.log(float(n_maps)) + # Average over the splits + Dur_arr[e] = np.mean(Dur_arr0, axis=0) + Occ_arr[e] = np.mean(Occ_arr0, axis=0) + TCo_arr[e] = np.mean(TCo_arr0, axis=0) + TMx_arr[e] = np.mean(TMx_arr0, axis=0) + Ent_arr[e] = np.mean(Ent_arr0, axis=0) + + else: + m_labels0 = m_labels[ep_idx,:] + m_labels0_flat = m_labels0.reshape(m_labels0.shape[0]*m_labels0.shape[1]) + # Compute duration, occurrence and time covered + l_, d_ = microstate_run_length_encoding(m_labels0_flat) + # For each microstate + for ii in range(n_maps): + if np.isnan(np.nanmean(d_[l_==ii])): + # The specific microstate did not occur at all for this event + Dur_arr[e,ii] = 0 + Occ_arr[e,ii] = 0 + TCo_arr[e,ii] = 0 + else: + Dur_arr[e,ii] = np.mean(d_[l_==ii]) * 1000/sfreq # convert to ms + Occ_arr[e,ii] = len(d_[l_==ii])/len(d_) * sfreq + TCo_arr[e,ii] = np.sum(d_[l_==ii])/np.sum(d_) + # Compute transition matrix + TMx_arr[e] = T_empirical(m_labels0_flat, n_maps) + # Compute Shannon Entropy relative to max + Ent_arr[e] = H_1(m_labels0_flat, n_maps)/np.log(float(n_maps)) + # Combine all features in a list + MFeatures = [Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr] + + # Update in v7 - Collapse after computations in single events + Dur_arr2 = np.zeros((len(collapsed_event_id),n_maps)) + Occ_arr2 = np.zeros((len(collapsed_event_id),n_maps)) + TCo_arr2 = np.zeros((len(collapsed_event_id),n_maps)) + TMx_arr2 = np.zeros((len(collapsed_event_id),n_maps,n_maps)) + Ent_arr2 = np.zeros(len(collapsed_event_id)) + MFeatures2 = [Dur_arr2,Occ_arr2,TCo_arr2,TMx_arr2,Ent_arr2] + + for f in range(len(MFeatures2)): + tmp_feat = MFeatures[f] + tmp_feat2 = MFeatures2[f] + for e in range(len(collapsed_event_id)): + ee = list(collapsed_event_id.keys())[e] + if (ee == 'observer_actor'): + old_ev_idx1 = list(event_id.keys()).index("observe, actor") + old_ev_idx2 = list(event_id.keys()).index("observe, observer") + tmp_feat2[e] = np.mean([tmp_feat[old_ev_idx1],tmp_feat[old_ev_idx2]], axis=0) + elif ee == 'follower_leader': + old_ev_idx1 = list(event_id.keys()).index("imitate, leader") + old_ev_idx2 = list(event_id.keys()).index("imitate, follower") + tmp_feat2[e] = np.mean([tmp_feat[old_ev_idx1],tmp_feat[old_ev_idx2]], axis=0) + else: + new_ev_idx = list(collapsed_event_id.values())[e] + old_ev_idx = list(event_id.values()).index(new_ev_idx) + tmp_feat2[e] = tmp_feat[old_ev_idx] + + return m_labels, events, MFeatures2 + +def label_to_walk_direction(labels): + directions = np.zeros(len(labels)) + directions[pd.Series(labels).isin([0,1,2,3])] = 1 # A/B/C/D assigned to 1 + directions[pd.Series(labels).isin([4,5,6,7])] = -1 # E/F/G/H assigned to -1 + assert np.sum(directions==0)==0 + return directions + +def compute_dualmicro_DFA(i, microstate_results, trialinfo_list, sfreq, window_sizes, event_id, collapsed_event_id, overlap=True): + """ + See Hardstone et al, 2012 for more info + Perform DFA + 1 Compute cumulative sum of time series to create signal profile + 2 Define set of window sizes (see below) + 3 Remove the linear trend using least-squares for each window + 4 Calculate standard deviation for each window and take the mean + 5 Plot fluctuation function (Standard deviation) as function + for all window sizes, on double logarithmic scale + 6 The DFA exponent alpha correspond to Hurst exponent + f(L) = sd = L^alpha (with alpha as linear coefficient in log plot) + + Parameters + ---------- + i : int + The index. + microstate_results : list + The estimated microstates. + trialinfo_list : list + List with trial informations. + sfreq : int + The sampling frequency. + window_sizes : np.array + Window sizes to compute DFA over. + event_id : dict + The mappings between event id and condition. + collapsed_brain_event_id : dict + The renamed mappings between event id and collapsed conditions + overlap : bool, optional + Boolean to determine whether to use overlapping windows. The default is True. + + Returns + ------- + dfa_array : np.array + The Hurst Exponents. + fluctuations : np.array + The fluctuations estimated at different window sizes. + + """ + # Load all microstate results + pair_idx = microstate_results[5] + pair_indices = pair_idx[i], pair_idx[i+1] + m_labels = microstate_results[1][pair_indices[0]:pair_indices[1]] + + events = trialinfo_list[2][i] + # Since they are synchronized already, just take the first and fix + # epoch idx column with the new idx for the synchronized labels + events = events[0] + + # Reshape m_labels to (epoch, time) + m_labels = m_labels.reshape(len(events),sfreq) + + # Remove pre-trial epochs + pre_trial_epochs = events["Trial_start_time"] < 0 + m_labels = m_labels[np.invert(pre_trial_epochs)] + events = events.loc[np.invert(pre_trial_epochs)].reset_index(drop=True) + events["Epoch_idx"] = events.index + + # Pre-allocate memory + dfa_array = np.zeros((len(event_id))) + dfa_array[:] = np.nan + fluctuations = np.zeros((len(event_id),len(window_sizes))) + fluctuations[:] = np.nan + # As I am working with linear regression on log-values, I can just take + # the avg of the x, y coordinates and the coefficients will correspond + # to the avg for each separate linear regression + + for e in range(len(event_id)): + ev_idx = list(event_id.values())[e] + ep_idx = events["Epoch_idx"][events["Event_id"] == ev_idx] + trial_numbers0 = np.unique(events["Trial_number"][ep_idx]) + if len(trial_numbers0) > 8: + # Compute the first 8 trials and the last 8 separately, before + # taking the average to be consistent with asymmetric trials + dfa0 = [] + fluct0 = np.zeros((2,len(window_sizes))) + trial_numbers_split = np.array_split(trial_numbers0, 2) + # Array_split is used in cases where a trial might have been + # dropped, hence the split is only 8/7 + for s in range(len(trial_numbers_split)): + ep_idx0 = events.loc[(events["Event_id"] == ev_idx)& + (events["Trial_number"].isin(trial_numbers_split[s])), + "Epoch_idx"] + m_labels0 = m_labels[ep_idx0,:] + m_labels0_flat = m_labels0.reshape(m_labels0.shape[0]*m_labels0.shape[1]) + # Convert the labels to the directons for computation of cumulative + # sum in order to get the signal profile + data = label_to_walk_direction(m_labels0_flat) + # plt.plot(np.arange(1,len(data)+1,1),np.cumsum(data)) # plot signal profile + # Estimate and save DFA + dfa, fluctuations_2d = nolds.dfa(data,nvals=window_sizes,overlap=overlap,debug_data=True) + dfa0.append(dfa) + fluct0[s] = fluctuations_2d[1] + # Average the two DFA estimations + dfa_array[e] = np.mean(dfa0) + fluctuations[e,:] = np.mean(fluct0, axis=0) + + else: + m_labels0 = m_labels[ep_idx,:] + m_labels0_flat = m_labels0.reshape(m_labels0.shape[0]*m_labels0.shape[1]) + # Convert the labels to the directons for computation of cumulative + # sum in order to get the signal profile + data = label_to_walk_direction(m_labels0_flat) + # Estimate and save DFA + dfa_array[e],fluctuations_2d = nolds.dfa(data,nvals=window_sizes,overlap=overlap,debug_data=True) + fluctuations[e,:] = fluctuations_2d[1] + + # Update in DFA v2 - Collapse after computation of DFA + dfa_array2 = np.zeros((len(collapsed_event_id))) + dfa_array2[:] = np.nan + fluctuations2 = np.zeros((len(collapsed_event_id),len(window_sizes))) + fluctuations2[:] = np.nan + for e in range(len(collapsed_event_id)): + ee = list(collapsed_event_id.keys())[e] + if ee == 'observer_actor': + old_ev_idx1 = list(event_id.keys()).index("observe, actor") + old_ev_idx2 = list(event_id.keys()).index("observe, observer") + dfa_array2[e] = np.mean([dfa_array[old_ev_idx1],dfa_array[old_ev_idx2]]) + fluctuations2[e] = np.mean([fluctuations[old_ev_idx1],fluctuations[old_ev_idx2]], axis=0) + elif ee == 'follower_leader': + old_ev_idx1 = list(event_id.keys()).index("imitate, leader") + old_ev_idx2 = list(event_id.keys()).index("imitate, follower") + dfa_array2[e] = np.mean([dfa_array[old_ev_idx1],dfa_array[old_ev_idx2]]) + fluctuations2[e] = np.mean([fluctuations[old_ev_idx1],fluctuations[old_ev_idx2]], axis=0) + else: + new_ev_idx = list(collapsed_event_id.values())[e] + old_ev_idx = list(event_id.values()).index(new_ev_idx) + dfa_array2[e] = dfa_array[old_ev_idx] + fluctuations2[e] = fluctuations[old_ev_idx] + + return dfa_array2, fluctuations2 + + +def compute_dualmicro_DFA_pseudo(i, backfit_results, sfreq, window_sizes, event_id, collapsed_event_id, overlap=True): + """ + See Hardstone et al, 2012 for more info + Perform DFA + 1 Compute cumulative sum of time series to create signal profile + 2 Define set of window sizes (see below) + 3 Remove the linear trend using least-squares for each window + 4 Calculate standard deviation for each window and take the mean + 5 Plot fluctuation function (Standard deviation) as function + for all window sizes, on double logarithmic scale + 6 The DFA exponent alpha correspond to Hurst exponent + f(L) = sd = L^alpha (with alpha as linear coefficient in log plot) + + Parameters + ---------- + i : int + The index. + backfit_results : list + The estimated back-fitted microstates. + sfreq : int + The sampling frequency. + window_sizes : np.array + Window sizes to compute DFA over. + event_id : dict + The mappings between event id and condition. + collapsed_brain_event_id : dict + The renamed mappings between event id and collapsed conditions + overlap : bool, optional + Boolean to determine whether to use overlapping windows. The default is True. + + Returns + ------- + dfa_array : np.array + The Hurst Exponents. + fluctuations : np.array + The fluctuations estimated at different window sizes. + + """ + m_labels = backfit_results[1][i] + events = backfit_results[3][i] + + # Since they are synchronized already, just take the first and fix + # epoch idx column with the new idx for the synchronized labels + events = events[0] + + # Update in DFA v2 + # # Collapse event_id 6 and 7 to 4 and 5 + # events.loc[events["Event_id"] == 6,["Event_id","Event_label"]] = [4, "observe, actor"] + # events.loc[events["Event_id"] == 7,["Event_id","Event_label"]] = [5, "imitate, leader"] + # The microstate clustering was performed on flipped (collapsed) events, + # but I will compute the DFA on the 8 trials to avoid the flipping + # effect on the linear detrending, and then collapse by averaging + # after DFA computation + + # Reshape m_labels to (epoch, time) + m_labels = m_labels.reshape(len(events),sfreq) + + # Remove pre-trial epochs + pre_trial_epochs = events["Trial_start_time"] < 0 + m_labels = m_labels[np.invert(pre_trial_epochs)] + events = events.loc[np.invert(pre_trial_epochs)].reset_index(drop=True) + events["Epoch_idx"] = events.index + + # Pre-allocate memory + dfa_array = np.zeros((len(event_id))) + dfa_array[:] = np.nan + fluctuations = np.zeros((len(event_id),len(window_sizes))) + fluctuations[:] = np.nan + # As I am working with linear regression on log-values, I can just take + # the avg of the x, y coordinates and the coefficients will correspond + # to the avg for each separate linear regression + + for e in range(len(event_id)): + ev_idx = list(event_id.values())[e] + ep_idx = events["Epoch_idx"][events["Event_id"] == ev_idx] + trial_numbers0 = np.unique(events["Trial_number"][ep_idx]) + if len(trial_numbers0) > 8: + # Compute the first 8 trials and the last 8 separately, before + # taking the average to be consistent with asymmetric trials + dfa0 = [] + fluct0 = np.zeros((2,len(window_sizes))) + trial_numbers_split = np.array_split(trial_numbers0, 2) + # Array_split is used in cases where a trial might have been + # dropped, hence the split is only 8/7 + for s in range(len(trial_numbers_split)): + ep_idx0 = events.loc[(events["Event_id"] == ev_idx)& + (events["Trial_number"].isin(trial_numbers_split[s])), + "Epoch_idx"] + m_labels0 = m_labels[ep_idx0,:] + m_labels0_flat = m_labels0.reshape(m_labels0.shape[0]*m_labels0.shape[1]) + # Convert the labels to the directons for computation of cumulative + # sum in order to get the signal profile + data = label_to_walk_direction(m_labels0_flat) + # plt.plot(np.arange(1,len(data)+1,1),np.cumsum(data)) # plot signal profile + # Estimate and save DFA + dfa, fluctuations_2d = nolds.dfa(data,nvals=window_sizes,overlap=overlap,debug_data=True) + dfa0.append(dfa) + fluct0[s] = fluctuations_2d[1] + # Average the two DFA estimations + dfa_array[e] = np.mean(dfa0) + fluctuations[e,:] = np.mean(fluct0, axis=0) + + else: + m_labels0 = m_labels[ep_idx,:] + m_labels0_flat = m_labels0.reshape(m_labels0.shape[0]*m_labels0.shape[1]) + # Convert the labels to the directons for computation of cumulative + # sum in order to get the signal profile + data = label_to_walk_direction(m_labels0_flat) + # Estimate and save DFA + dfa_array[e],fluctuations_2d = nolds.dfa(data,nvals=window_sizes,overlap=overlap,debug_data=True) + fluctuations[e,:] = fluctuations_2d[1] + + # Update in DFA v2 - Collapse after computation of DFA + dfa_array2 = np.zeros((len(collapsed_event_id))) + dfa_array2[:] = np.nan + fluctuations2 = np.zeros((len(collapsed_event_id),len(window_sizes))) + fluctuations2[:] = np.nan + for e in range(len(collapsed_event_id)): + ee = list(collapsed_event_id.keys())[e] + if ee == 'observer_actor': + old_ev_idx1 = list(event_id.keys()).index("observe, actor") + old_ev_idx2 = list(event_id.keys()).index("observe, observer") + dfa_array2[e] = np.mean([dfa_array[old_ev_idx1],dfa_array[old_ev_idx2]]) + fluctuations2[e] = np.mean([fluctuations[old_ev_idx1],fluctuations[old_ev_idx2]], axis=0) + elif ee == 'follower_leader': + old_ev_idx1 = list(event_id.keys()).index("imitate, leader") + old_ev_idx2 = list(event_id.keys()).index("imitate, follower") + dfa_array2[e] = np.mean([dfa_array[old_ev_idx1],dfa_array[old_ev_idx2]]) + fluctuations2[e] = np.mean([fluctuations[old_ev_idx1],fluctuations[old_ev_idx2]], axis=0) + else: + new_ev_idx = list(collapsed_event_id.values())[e] + old_ev_idx = list(event_id.values()).index(new_ev_idx) + dfa_array2[e] = dfa_array[old_ev_idx] + fluctuations2[e] = fluctuations[old_ev_idx] + + return dfa_array2, fluctuations2 + +def similar_interbrain_microstates(x, y): + assert len(x) == len(y) + ratio_in_similar_state = sum(x == y)/len(x) + return ratio_in_similar_state + +def shift_label_time_series(x, y, shift): + assert len(x) == len(y) + x = pd.Series(x) + y = pd.Series(y) + # Shift the data + y = y.shift(shift) + # Drop the NaN + drop_idx = np.where(y.isna())[0] + y = y.drop(drop_idx) + x = x.drop(drop_idx) + assert len(y) == len(x) + assert sum(y.isna()) == 0 + return x.to_numpy().astype(int), y.to_numpy().astype(int) + +def shifted_interbrain_microstate_feature_computation(i, n_maps, microstate_results, + trialinfo_list, sfreq, event_id, collapsed_event_id, lag_search_range, lag_interval): + """ + Compute inter-brain features, but for time-lagged microstate label time series. + Similar to cross-correlation, but maximizing the amount of synchronized + (similar) microstates at a given lag + + Parameters + ---------- + i : int + The index. + n_maps : int + The number of maps (clusters) used. + microstate_results : list + The estimated microstates. + trialinfo_list : list + List with trial informations. + sfreq : int + The sampling frequency. + event_id : dict + The mappings between event id and condition. + collapsed_brain_event_id : dict + The renamed mappings between event id and collapsed conditions + lag_search_range : float + The lag in both directions the data is being shifted + lag_interval : np.array + The sample points corresponding to the lags the data is being shifted + + Returns + ------- + m_labels : list of two np.array + The microstate sequence (labels) time series in the format (epoch, time). + events : list of two pd.DataFrame + The events corresponding to each epoch. + MFeatures : list + List of arrays of each microstate feature. + shift_info : list + List containing the information regarding the shift, e.g. the cross- + similarity at different time lags and the optimal time lag + + """ + + # Here i refers to the pair in range(n_subjects//2) + sub_idx = microstate_results[5] + + # Get the microstate labels and events for participant 1 + subject_indices1 = sub_idx[2*i], sub_idx[2*i+1] + m_labels1 = microstate_results[1][subject_indices1[0]:subject_indices1[1]] + # Get the trialinfo with conditions + Subject1, trialinfo1 = trialinfo_list[2*i] + # Convert to dataframe + event_id_inv = {v: k for k, v in event_id.items()} # Inverse the event id + events1 = pd.DataFrame(trialinfo1,columns=["Event_id","Trial_number","Trial_start_time"]) + events1["Event_label"] = events1["Event_id"].replace(event_id_inv) + events1 = events1.reset_index().rename(columns={"index":"Epoch_idx"}) + # Reshape m_labels to (epoch, time) + m_labels1 = m_labels1.reshape(len(trialinfo1),sfreq) + + # Get the microstate labels and events for participant 2 + subject_indices2 = sub_idx[2*i+1], sub_idx[2*i+2] + m_labels2 = microstate_results[1][subject_indices2[0]:subject_indices2[1]] + # Get the trialinfo with conditions + Subject2, trialinfo2 = trialinfo_list[2*i+1] + # Convert to dataframe + events2 = pd.DataFrame(trialinfo2,columns=["Event_id","Trial_number","Trial_start_time"]) + events2["Event_label"] = events2["Event_id"].replace(event_id_inv) + events2 = events2.reset_index().rename(columns={"index":"Epoch_idx"}) + # Reshape m_labels to (epoch, time) + m_labels2 = m_labels2.reshape(len(trialinfo2),sfreq) + + # check that the participants from a pair was loaded + assert Subject2-1 == Subject1 + # Check that the same amount of event types are present + assert trialinfo1[-1,1] == trialinfo2[-1,1] + # Synchronize the events from the pair based on timing info + # By trimming the epochs to only include the epochs that are present + # in both participants of the pair + # Initialize with an array filled with a unique number not in use + sync_m_labels1 = np.zeros_like(m_labels1); sync_m_labels1.fill(9999) + sync_m_labels2 = np.zeros_like(m_labels2); sync_m_labels2.fill(9999) + for t in np.unique(trialinfo1[:,1]): + t_idx1 = np.where(trialinfo1[:,1] == t)[0] + t_idx2 = np.where(trialinfo2[:,1] == t)[0] + # Get the timings for the epochs for the specific trial t + t_timings1 = trialinfo1[t_idx1,2] + t_timings2 = trialinfo2[t_idx2,2] + timings_intersect = np.intersect1d(t_timings1,t_timings2) + # Get the indices where the timings matches + t_idx_match1 = t_idx1[pd.Series(t_timings1).isin(timings_intersect)] + t_idx_match2 = t_idx2[pd.Series(t_timings2).isin(timings_intersect)] + # Get the actual values from the synchronized epochs + sync_m_labels1[t_idx_match1] = m_labels1[t_idx_match1] + sync_m_labels2[t_idx_match2] = m_labels2[t_idx_match2] + # Find the epochs that were asynchronous, which have to be trimmed + asynch_epochs1 = np.unique(np.where(sync_m_labels1==9999)[0]) + asynch_epochs2 = np.unique(np.where(sync_m_labels2==9999)[0]) + # Trim/delete the asynchronous epochs + sync_m_labels1 = np.delete(sync_m_labels1,asynch_epochs1,axis=0) + sync_m_labels2 = np.delete(sync_m_labels2,asynch_epochs2,axis=0) + assert len(sync_m_labels1) == len(sync_m_labels2) # check the amount of synchronized epochs are equal + # Fix events + sync_events1 = events1.drop(asynch_epochs1,axis=0).reset_index(drop=True) + sync_events2 = events2.drop(asynch_epochs2,axis=0).reset_index(drop=True) + + # Since they are synchronized already, just take the first and fix + # epoch idx column with the new idx for the synchronized labels + events = sync_events1 + events["Epoch_idx"] = events.index + # Notice that for the intrabrain fit all alpha v6 I only corrected + # ppn2. So by taking ppn1 I get the original event labels. + + # Remove pre-trial epochs + pre_trial_epochs = events["Trial_start_time"] < 0 + sync_m_labels1 = sync_m_labels1[np.invert(pre_trial_epochs)] + sync_m_labels2 = sync_m_labels2[np.invert(pre_trial_epochs)] + events = events.loc[np.invert(pre_trial_epochs)].reset_index(drop=True) + events["Epoch_idx"] = events.index + + # Determine the optimal lag for highest interbrain state synchrony + # For each condition - using the collapsed dictionary + # To get one value of optimal lag for e.g. follower-leader and observer-actor + # Collapse event_id 6 and 7 to 4 and 5 + events_collapsed = events.copy() + events_collapsed.loc[events_collapsed["Event_id"] == 6,["Event_id","Event_label"]] = [4, "observe, actor"] + events_collapsed.loc[events_collapsed["Event_id"] == 7,["Event_id","Event_label"]] = [5, "imitate, leader"] + normal_events_to_collapsed_map = {"1":1, "2":2, "3":3, "4":4, "5":5, "6":4, "7":5, "8":8} + + shift_info = [0]*len(collapsed_event_id) + for e in range(len(collapsed_event_id)): + ev_idx = list(collapsed_event_id.values())[e] + ep_idx = events_collapsed["Epoch_idx"][events_collapsed["Event_id"] == ev_idx] + trial_numbers0 = np.unique(events_collapsed["Trial_number"][ep_idx]) + + lts1 = sync_m_labels1[ep_idx,:] + lts2 = sync_m_labels2[ep_idx,:] + # Flatten the labels + lts1_flat = pd.Series(lts1.ravel()) + lts2_flat = pd.Series(lts2.ravel()) + # Compute cross similarity + cross_similarity = [lts1_flat.corr(lts2_flat.shift(lag), method=similar_interbrain_microstates) for lag in lag_interval] + # Original ratio of time in similar states (time = 0) + t_zero_ratio_in_similar_microstate = cross_similarity[lag_search_range] + # The optimal lag + opt_lag = lag_interval[np.argmax(cross_similarity)] + opt_ratio_in_similar_microstate = cross_similarity[np.argmax(cross_similarity)] + # Shift the label time series with the optimal lag prior to computation of interbrain features + # Save the features + shift_info[e] = [t_zero_ratio_in_similar_microstate, opt_lag, opt_ratio_in_similar_microstate, cross_similarity] + + # Pre-allocate memory + Dur_arr = np.zeros((len(event_id),n_maps+1)); Dur_arr.fill(np.nan) + Occ_arr = np.zeros((len(event_id),n_maps+1)); Occ_arr.fill(np.nan) + TCo_arr = np.zeros((len(event_id),n_maps+1)); TCo_arr.fill(np.nan) + TMx_arr = np.zeros((len(event_id),n_maps+1,n_maps+1)) + Ent_arr = np.zeros(len(event_id)) + for e in range(len(event_id)): + ev_idx = list(event_id.values())[e] + ep_idx = events["Epoch_idx"][events["Event_id"] == ev_idx] + trial_numbers0 = np.unique(events["Trial_number"][ep_idx]) + # Get the opt_lag from collapsed events + ev_collapsed_idx = normal_events_to_collapsed_map[str(ev_idx)] + shift_info_idx = list(collapsed_event_id.values()).index(ev_collapsed_idx) + opt_lag = shift_info[shift_info_idx][1] + + if len(trial_numbers0) > 8: + # Compute the first 8 trials and the last 8 separately, before + # taking the average to be consistent with asymmetric trials + Dur_arr0 = np.zeros((2,n_maps+1)) + Occ_arr0 = np.zeros((2,n_maps+1)) + TCo_arr0 = np.zeros((2,n_maps+1)) + TMx_arr0 = np.zeros((2,n_maps+1,n_maps+1)) + Ent_arr0 = np.zeros(2) + trial_numbers_split = np.array_split(trial_numbers0, 2) + # Array_split is used in cases where a trial might have been + # dropped, hence the split is only 8/7 + for s in range(len(trial_numbers_split)): + ep_idx0 = events.loc[(events["Event_id"] == ev_idx)& + (events["Trial_number"].isin(trial_numbers_split[s])), + "Epoch_idx"] + # Get the microstate labels + m_labels10 = sync_m_labels1[ep_idx0,:] + m_labels20 = sync_m_labels2[ep_idx0,:] + # Flatten the labels + m_labels10_flat = m_labels10.reshape(m_labels10.shape[0]*m_labels10.shape[1]) + m_labels20_flat = m_labels20.reshape(m_labels20.shape[0]*m_labels20.shape[1]) + # Shift the label time-series to maximize interbrain synchrony + m_labels10_shifted, m_labels20_shifted = shift_label_time_series(m_labels10_flat, m_labels20_flat, opt_lag) + + # Compute average duration of common microstate + # Output: label and duration of common microstate. Label -1 is used + # for not common microstate + l_, d_ = interbrain_microstate_run_length_encoding(m_labels10_shifted,m_labels20_shifted) + # For each microstate + for ii in range(n_maps+1): + if np.isnan(np.nanmean(d_[l_==ii-1])): + # The specific microstate did not occur at all for this event + Dur_arr0[s,ii] = 0 + Occ_arr0[s,ii] = 0 + TCo_arr0[s,ii] = 0 + else: + Dur_arr0[s,ii] = np.mean(d_[l_==ii-1]) * 1000/sfreq # convert to ms + Occ_arr0[s,ii] = len(d_[l_==ii-1])/len(d_) * sfreq + TCo_arr0[s,ii] = np.sum(d_[l_==ii-1])/np.sum(d_) + # Compute transition matrix + TMx_arr0[s] = interbrain_T_matrix(m_labels10_shifted,m_labels20_shifted,n_maps) + # Compute Joint Shannon Entropy relative to max + # Max is the sum of max individual entropies + Ent_arr0[s] = H_2(m_labels10_shifted,m_labels20_shifted,n_maps)/(2*np.log(float(n_maps))) + # Average over the splits + Dur_arr[e] = np.mean(Dur_arr0, axis=0) + Occ_arr[e] = np.mean(Occ_arr0, axis=0) + TCo_arr[e] = np.mean(TCo_arr0, axis=0) + TMx_arr[e] = np.mean(TMx_arr0, axis=0) + Ent_arr[e] = np.mean(Ent_arr0, axis=0) + + else: + # Get the microstate labels + m_labels10 = sync_m_labels1[ep_idx,:] + m_labels20 = sync_m_labels2[ep_idx,:] + # Flatten the labels + m_labels10_flat = m_labels10.reshape(m_labels10.shape[0]*m_labels10.shape[1]) + m_labels20_flat = m_labels20.reshape(m_labels20.shape[0]*m_labels20.shape[1]) + # Shift the label time-series to maximize interbrain synchrony + m_labels10_shifted, m_labels20_shifted = shift_label_time_series(m_labels10_flat, m_labels20_flat, opt_lag) + + # Compute average duration of common microstate + # Output: label and duration of common microstate. Label -1 is used + # for not common microstate + l_, d_ = interbrain_microstate_run_length_encoding(m_labels10_shifted,m_labels20_shifted) + # For each microstate + for ii in range(n_maps+1): + if np.isnan(np.nanmean(d_[l_==ii-1])): + # The specific microstate did not occur at all for this event + Dur_arr[e,ii] = 0 + Occ_arr[e,ii] = 0 + TCo_arr[e,ii] = 0 + else: + Dur_arr[e,ii] = np.mean(d_[l_==ii-1]) * 1000/sfreq # convert to ms + Occ_arr[e,ii] = len(d_[l_==ii-1])/len(d_) * sfreq + TCo_arr[e,ii] = np.sum(d_[l_==ii-1])/np.sum(d_) + # Compute transition matrix + TMx_arr[e] = interbrain_T_matrix(m_labels10_shifted,m_labels20_shifted,n_maps) + # Compute Joint Shannon Entropy relative to max + # Max is the sum of max individual entropies + Ent_arr[e] = H_2(m_labels10_shifted,m_labels20_shifted,n_maps)/(2*np.log(float(n_maps))) + + # Combine all features in a list + MFeatures = [Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr] + + # Update in v7 - Collapse after computations in single events + Dur_arr2 = np.zeros((len(collapsed_event_id),n_maps+1)) + Occ_arr2 = np.zeros((len(collapsed_event_id),n_maps+1)) + TCo_arr2 = np.zeros((len(collapsed_event_id),n_maps+1)) + TMx_arr2 = np.zeros((len(collapsed_event_id),n_maps+1,n_maps+1)) + Ent_arr2 = np.zeros(len(collapsed_event_id)) + MFeatures2 = [Dur_arr2,Occ_arr2,TCo_arr2,TMx_arr2,Ent_arr2] + + for f in range(len(MFeatures2)): + tmp_feat = MFeatures[f] + tmp_feat2 = MFeatures2[f] + for e in range(len(collapsed_event_id)): + ee = list(collapsed_event_id.keys())[e] + if (ee == 'observer_actor'): + old_ev_idx1 = list(event_id.keys()).index("observe, actor") + old_ev_idx2 = list(event_id.keys()).index("observe, observer") + tmp_feat2[e] = np.mean([tmp_feat[old_ev_idx1],tmp_feat[old_ev_idx2]], axis=0) + elif ee == 'follower_leader': + old_ev_idx1 = list(event_id.keys()).index("imitate, leader") + old_ev_idx2 = list(event_id.keys()).index("imitate, follower") + tmp_feat2[e] = np.mean([tmp_feat[old_ev_idx1],tmp_feat[old_ev_idx2]], axis=0) + else: + new_ev_idx = list(collapsed_event_id.values())[e] + old_ev_idx = list(event_id.values()).index(new_ev_idx) + tmp_feat2[e] = tmp_feat[old_ev_idx] + + return [sync_m_labels1, sync_m_labels2], [sync_events1, sync_events2], MFeatures2, shift_info \ No newline at end of file -- GitLab