Main.py

    plot_dualmicro(n_maps, maps, gev, epoch.info)
    
    # Make dictionary with n_maps and new order
    # All 4 top row consecutively, followed by 4 bot row
    manual_reordering_template = {"8_alpha":[5,2,7,0,1,4,6,3],
                                  "8_beta":[4,1,3,6,7,5,0,2],
                                  "8_broadband":[6,3,4,0,2,1,5,7]} 
    
    new_order = manual_reordering_template[f"{n_maps}_{ff}"]
    
    maps, gev, m_labels = reorder_microstate_results(new_order, maps, gev, m_labels)
    
    # Plot again to check it worked
    plot_dualmicro(n_maps, maps, gev, epoch.info)
    
    # Since neuronal activity is often oscillating, this causes polarity inversions
    # Microstates ignores the sign, and hence the polarity in the map is arbitrary
    # It is only the relative difference within the plot that is interesting
    # depending on initiation. We can thus freely change the sign for visualization
    # For two-person microstates, each person's map is sign-changed separately
    manual_sign_correction = {"8_alpha":[[-1,-1,-1,1,-1,1,1,1],[1,1,1,-1,-1,1,1,1]],
                              "8_beta":[[-1,-1,-1,-1,-1,1,1,1],[-1,-1,1,-1,-1,1,1,-1]],
                              "8_broadband":[[1,1,-1,-1,1,-1,-1,-1],[-1,1,-1,-1,1,-1,-1,-1]]}
    sign_swap = manual_sign_correction[f"{n_maps}_{ff}"]

    maps = sign_swap_microstates(sign_swap, maps, n_maps, n_channels)
    
    # Plot a final time for last confirmation
    plot_dualmicro(n_maps, maps, gev, epoch.info)
    
    # Close all figures and repeat by changing n_maps
    plt.close("all")
    
    ### Save reordered results
    n_maps = 8
    ii = n_clusters.index(n_maps)
    
    with open(f"{microstate_save_path}Dualmicro_fit_all_{ff}_data_maps{n_maps}.pkl", "rb") as file:
        microstate_results = pickle.load(file)
    maps, m_labels, gfp_peaks, gev, cv_min, pair_idx = microstate_results
    # Re-order
    new_order = manual_reordering_template[str(n_maps)]
    maps, gev, m_labels = reorder_microstate_results(new_order, maps, gev, m_labels)
    # Sign alignment
    maps = sign_swap_microstates(sign_swap, maps, n_maps, n_channels)
    # Overwrite variable
    microstate_results = maps, m_labels, gfp_peaks, gev, cv_min, pair_idx
    # Save to new file
    with open(f"{microstate_save_path}Reordered/Dualmicro_fit_all_{ff}_data_maps{n_maps}.pkl", "wb") as filehandle:
        pickle.dump(microstate_results, filehandle) # [maps, L, gfp_peaks, gev, cv_min, pair_idx]
    
    # Save topomaps for the microstates
    save_path = f"{fig_save_path}Microstates/Fit_all_{ff}/"
    with open(f"{microstate_save_path}Reordered/Dualmicro_fit_all_{ff}_data_maps{n_maps}.pkl", "rb") as file:
        microstate_results = pickle.load(file)
    maps, m_labels, gfp_peaks, gev, cv_min, pair_idx = microstate_results
    fig = plot_dualmicro(n_maps, maps, gev, epoch.info)
    fig.savefig(save_path+f"Dualmicro_fit_all_{ff}_maps{n_maps}"+".png")
    
    # Save svg for Paper
    fig.savefig(save_path+f"Dualmicro_fit_all_{ff}_maps{n_maps}"+".svg")
    
    ### Save svg with fixed color scales across all microstates
    vlims = (np.min(maps), np.max(maps))
    
    fig = plot_dualmicro(n_maps, maps, gev, vlims, epoch.info, vlims)
    
    fig.savefig(save_path+f"Dualmicro_fit_all_{ff}_fixed_colorscale_maps{n_maps}"+".png")
    fig.savefig(save_path+f"Dualmicro_fit_all_{ff}_fixed_colorscale_maps{n_maps}"+".svg")
    
    # =========================================================================
    # # Estimate two-person microstate metrics/features
    # # There might be a small error introduced due to gaps in the time series from
    # # dropped segments, e.g. when calculating the transition probability as
    # # the time series is discontinuous due to the gaps. But with the high sampling rate
    # # only a very small fraction of the samples have discontinuous neighbors
    # =========================================================================
    """
    Overview of common microstate features:
        1. Average duration a given microstate remains stable (Dur)
        2. Frequency occurrence, independent of individual duration (Occ)
            Average number of times a microstate becomes dominant per second
        3. Ratio of total Time Covered (TCo)
        4. Transition probabilities (TMx)
        5. Ratio of shannon entropy relative to theoretical max chaos (Ent)
    """
    # Hard-coded the optimal number of microstates based on CV criterion and GEV
    n_maps = 8
    # Load all microstate results
    with open(f"{microstate_save_path}Reordered/Dualmicro_fit_all_{ff}_data_maps{n_maps}.pkl", "rb") as file:
        microstate_results = pickle.load(file)
    # Load all trialinfos
    with open(f"{microstate_save_path}Dualmicro_fit_all_{ff}_trial_events_infos.pkl", "rb") as file:
        trialinfo_list = pickle.load(file)
    
    Microstate_names = [chr(ele) for ele in range(65,65+n_maps)]
    
    m_labels = [0]*n_pairs
    events = [0]*n_pairs
    m_feats = [0]*n_pairs
    
    for i in range(n_pairs):
        m_labels[i], events[i], m_feats[i] = dualmicro_fit_all_feature_computation(i)
        print(f"Finished computing microstate features for pair {Pair_id[i]}")
    
    # Save the raw microstate features
    with open(f"{microstate_save_path}/raw_dualmicro_fit_all_{ff}_features_maps{n_maps}.pkl", "wb") as filehandle:
        pickle.dump(m_feats, filehandle) # [Subject][Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr] [Event, map*]
        # * the feature is calculated for each map, where applicable.
        # Transition matrix is calculated for each map -> map transition probability
    
    # with open(f"{microstate_save_path}/raw_computed_dualmicro_fit_all_{ff}_features.pkl", "rb") as file:
    #     m_feats = pickle.load(file) # [Subject][Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr] [Event, map*]
    
    ### Convert all features to dataframes for further processing
    col_names = ["Pair_ID", "Event_ID", "Microstate", "Value"]
    col_values = [Pair_id,list(collapsed_event_id.keys()),Microstate_names]
    dtypes = [int,str,str,"float64"]
    # Mean duration
    Dur_arr = np.stack([ele[0] for ele in m_feats]) # [Subject, event, n_map]
    Dur_df = numpy_arr_to_pandas_df(Dur_arr, col_names, col_values, dtypes)
    # Add dummy variable to enabling combining of dataframes
    measurement_id = ["Duration"]*len(Dur_df)
    Dur_df.insert(2, "Measurement", measurement_id)
    # Save df
    Dur_df.to_pickle(os.path.join(microstate_save_path,"Dualmicro_fit_all_{ff}_duration_df.pkl"))
    
    # Frequency of occurrence per sec
    Occ_arr = np.stack([ele[1] for ele in m_feats]) # [Subject, event, n_map]
    Occ_df = numpy_arr_to_pandas_df(Occ_arr, col_names, col_values, dtypes)
    # Add dummy variable to enabling combining of dataframes
    measurement_id = ["Occurrence"]*len(Occ_df)
    Occ_df.insert(2, "Measurement", measurement_id)
    # Save df
    Occ_df.to_pickle(os.path.join(microstate_save_path,"Dualmicro_fit_all_{ff}_occurrence_df.pkl"))
    
    # Ratio total Time Covered
    TCo_arr = np.stack([ele[2] for ele in m_feats]) # [Subject, event, n_map]
    TCo_df = numpy_arr_to_pandas_df(TCo_arr, col_names, col_values, dtypes)
    # Add dummy variable to enabling combining of dataframes
    measurement_id = ["Time_covered"]*len(TCo_df)
    TCo_df.insert(2, "Measurement", measurement_id)
    # Save df
    TCo_df.to_pickle(os.path.join(microstate_save_path,"Dualmicro_fit_all_{ff}_ratio_time_covered_df.pkl"))
    
    # Transition matrix should be read as probability of row to column
    xi, xj = np.meshgrid(Microstate_names,Microstate_names)
    _, arrow = np.meshgrid(Microstate_names,["->"]*n_maps)
    
    transition_info = np.char.add(np.char.add(xj,arrow),xi)
    
    TMx_arr = np.stack([ele[3] for ele in m_feats]) # [Subject, event, n_map, n_map]
    TMx_arr = TMx_arr.reshape((n_pairs,len(collapsed_event_id),n_maps*n_maps)) # Flatten the maps to 1D
    
    col_names = ["Pair_ID", "Event_ID", "Transition", "Value"]
    col_values = [Pair_id,list(collapsed_event_id.keys()),transition_info.flatten()]
    TMx_df = numpy_arr_to_pandas_df(TMx_arr, col_names, col_values, dtypes)
    # Add dummy variable to enabling combining of dataframes
    measurement_id = ["Probability"]*len(TMx_df)
    TMx_df.insert(2, "Measurement", measurement_id)
    # Save df
    TMx_df.to_pickle(os.path.join(microstate_save_path,"Dualmicro_fit_all_{ff}_transition_df.pkl"))
    
    # Entropy
    Ent_arr = np.stack([ele[4] for ele in m_feats]) # [Subject, event]
    col_names = ["Pair_ID", "Event_ID", "Value"]
    col_values = [Pair_id,list(collapsed_event_id.keys())]
    dtypes = [int, str, "float64"]
    Ent_df = numpy_arr_to_pandas_df(Ent_arr, col_names, col_values, dtypes)
    # Add dummy variable to enabling combining of dataframes
    measurement_id = ["Entropy"]*len(Ent_df)
    Ent_df.insert(2, "Measurement", measurement_id)
    # Save df
    Ent_df.to_pickle(os.path.join(microstate_save_path,"Dualmicro_fit_all_{ff}_ratio_entropy_df.pkl"))
    
# %% Backfit two-person microstates to pseudo-pairs
# The pseudo-pairs are created for all participants except the real pair.
# This is fine for symmetrical tasks, e.g. rest and coupled.
# But not for assymmetrical tasks like observation and leader.
# We might have a leader - leader pseudo-pair.
# Hence we only look at ppn1 with ppn2 from different pairs and exclude
# ppn1 with ppn1 or ppn2 with ppn2

for f in len(all_freq_ranges):
    ff = freq_names[f]
    freq_range0 = all_freq_ranges[f]
    # =========================================================================
    # It might be an advantage to run the backfitting of microstates on a HPC
    # =========================================================================
    # To save time and prevent reloading the same EEG over and over, I divided
    # the prepare array function into a load and combine function
    # By loading all into memory, I can skip loading for every combination
    # but this requires a very high memory, which is fortunately not a problem on the hpc
    
    # I am limiting the pseudo-pairs to be where ppn1 ends with 1 and ppn2 with 2
    # Which means we have 21 * 20 options
    n_pseudo_pairs = n_pairs*(n_pairs-1)
    
    # To not load data 420 times for two participants, we preload all EEG data to ram
    c_time1 = time_now(); print("Starting load",c_time1)
    all_micro_data = [0]*n_subjects
    all_trial_data = [0]*n_subjects
    for i in range(n_subjects):
        all_micro_data[i], all_trial_data[i] = load_microstate_arrays(i)
    print("Load finished", time_now())
    
    # Get the prototypical alpha maps
    n_maps = 8
    with open(f"{microstate_save_path}Reordered/Dualmicro_fit_all_{ff}_data_maps{n_maps}.pkl", "rb") as file:
        microstate_results = pickle.load(file)
    
    prototype_map = microstate_results[0]
    
    # Start the backfitting
    m_labels = [0]*n_pseudo_pairs
    events = [0]*n_pseudo_pairs
    GEVs = [0]*n_pseudo_pairs
    counter = 0
    pseudo_pair_id = []
    for i in range(n_subjects):
        for j in range(n_subjects):
            # Skip if the subject is the same
            if np.abs(Subject_id[i]-Subject_id[j]) == 0:
                continue
            # Skip if the subject are from the same pair
            if np.abs(Subject_id[i]-Subject_id[j]) == 1:
                continue
            # Skip if ppn1 is not ending on 1, and ppn2 not ending on 2
            if not (str(Subject_id[i])[-1] == "1") & (str(Subject_id[j])[-1] == "2"):
                continue
            # A valid pseudo pair
            else:
                # Get the synchronized events
                event0 = get_synch_events_from_pseudo_pairs(all_trial_data[i],all_trial_data[j])
                # Get the preloaded micro data
                micro_data1 = all_micro_data[i]
                micro_data2 = all_micro_data[j]
                # Get the synchronized and concatenated micro data in alpha
                micro_data0 = combine_two_person_microstate_arrays(micro_data1, micro_data2, event0, sfreq, freq_range=freq_range0)
                # Backfit and get the labels
                L, GEV = pseudo_pair_dualmicro_backfitting(micro_data0, prototype_map, event0, n_maps, sfreq)
                # Save the results
                m_labels[counter], GEVs[counter], events[counter] = L, GEV, event0
                pseudo_pair_id.append(f"{Subject_id[i]}-{Subject_id[j]}")
                # Move counter
                counter += 1
                print(f"Finished backfitting for pseudo pair {pseudo_pair_id[-1]}")
                print("Started", c_time1, "\nCurrent",time_now())
    
    backfit_results = [pseudo_pair_id, m_labels, GEVs, events]
    # Save the results from all pseudo pairs
    with open(f"{microstate_save_path}Reordered/Backfitting/Dualmicro_fit_all_{ff}_data_maps{n_maps}.pkl", "wb") as filehandle:
        pickle.dump(backfit_results, filehandle) # [pseudo_pair_id, L, GEV, events]
    
    # =========================================================================
    # Estimate two-person microstate metrics/features
    # There might be a small error introduced due to gaps in the time series from
    # dropped segments, e.g. when calculating the transition probability as
    # the time series is discontinuous due to the gaps. But with the high sampling rate
    # only a very small fraction of the samples have discontinuous neighbors
    # =========================================================================
    
    """
    Overview of common microstate features:
        1. Average duration a given microstate remains stable (Dur)
        2. Frequency occurrence, independent of individual duration (Occ)
            Average number of times a microstate becomes dominant per second
        3. Ratio of total Time Covered (TCo)
        4. Transition probabilities (TMx)
        5. Ratio of shannon entropy relative to theoretical max chaos (Ent)
    """
    n_maps = 8
    # Load all the backfit pseudo-pair results
    with open(f"{microstate_save_path}Reordered/Backfitting/Dualmicro_fit_all_{ff}_data_maps{n_maps}.pkl", "rb") as file:
        backfit_results = pickle.load(file) # [pseudo_pair_id, L, GEV, events]
    
    # Hard-coded the optimal number of microstates based on CV criterion and GEV
    n_maps = 8
    Microstate_names = [chr(ele) for ele in range(65,65+n_maps)]
    
    pseudo_pair_id = backfit_results[0]
    n_pseudo_pairs = len(pseudo_pair_id)
    
    m_labels = [0]*n_pseudo_pairs
    events = [0]*n_pseudo_pairs
    m_feats = [0]*n_pseudo_pairs
    
    for i in range(n_pseudo_pairs):
        m_labels[i], events[i], m_feats[i] = dualmicro_fit_all_pseudo_pair_feature_computation(i,\
           n_maps, backfit_results, sfreq, event_id, collapsed_event_id)
        print(f"Finished computing microstate features for psuedo pair {pseudo_pair_id[i]}")
    
    # Save the raw microstate features
    with open(f"{microstate_save_path}/raw_dualmicro_fit_all_{ff}_pseudo_pairs_features_maps{n_maps}.pkl", "wb") as filehandle:
        pickle.dump(m_feats, filehandle) # [Subject][Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr] [Event, map*]
        # * the feature is calculated for each map, where applicable.
        # Transition matrix is calculated for each map -> map transition probability
    
    # with open(f"{microstate_save_path}/raw_computed_dualmicro_fit_all_{ff}_features.pkl", "rb") as file:
    #     m_feats = pickle.load(file) # [Subject][Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr] [Event, map*]
    
    ### Convert all features to dataframes for further processing
    col_names = ["Pseudo_Pair_ID", "Event_ID", "Microstate", "Value"]
    col_values = [pseudo_pair_id,list(collapsed_event_id.keys()),Microstate_names]
    dtypes = [str,str,str,"float64"]
    # Mean duration
    Dur_arr = np.stack([ele[0] for ele in m_feats]) # [Subject, event, n_map]
    Dur_df = numpy_arr_to_pandas_df(Dur_arr, col_names, col_values, dtypes)
    # Add dummy variable to enabling combining of dataframes
    measurement_id = ["Duration"]*len(Dur_df)
    Dur_df.insert(2, "Measurement", measurement_id)
    # Save df
    Dur_df.to_pickle(os.path.join(microstate_save_path,"Dualmicro_fit_all_{ff}_pseudo_pairs_duration_df.pkl"))
    
    # Frequency of occurrence per sec
    Occ_arr = np.stack([ele[1] for ele in m_feats]) # [Subject, event, n_map]
    Occ_df = numpy_arr_to_pandas_df(Occ_arr, col_names, col_values, dtypes)
    # Add dummy variable to enabling combining of dataframes
    measurement_id = ["Occurrence"]*len(Occ_df)
    Occ_df.insert(2, "Measurement", measurement_id)
    # Save df
    Occ_df.to_pickle(os.path.join(microstate_save_path,"Dualmicro_fit_all_{ff}_pseudo_pairs_occurrence_df.pkl"))
    
    # Ratio total Time Covered
    TCo_arr = np.stack([ele[2] for ele in m_feats]) # [Subject, event, n_map]
    TCo_df = numpy_arr_to_pandas_df(TCo_arr, col_names, col_values, dtypes)
    # Add dummy variable to enabling combining of dataframes
    measurement_id = ["Time_covered"]*len(TCo_df)
    TCo_df.insert(2, "Measurement", measurement_id)
    # Save df
    TCo_df.to_pickle(os.path.join(microstate_save_path,"Dualmicro_fit_all_{ff}_pseudo_pairs_ratio_time_covered_df.pkl"))
    
    # Transition matrix should be read as probability of row to column
    xi, xj = np.meshgrid(Microstate_names,Microstate_names)
    _, arrow = np.meshgrid(Microstate_names,["->"]*n_maps)
    
    transition_info = np.char.add(np.char.add(xj,arrow),xi)
    
    TMx_arr = np.stack([ele[3] for ele in m_feats]) # [Subject, event, n_map, n_map]
    TMx_arr = TMx_arr.reshape((n_pseudo_pairs,len(collapsed_event_id),n_maps*n_maps)) # Flatten the maps to 1D
    
    col_names = ["Pseudo_Pair_ID", "Event_ID", "Transition", "Value"]
    col_values = [pseudo_pair_id,list(collapsed_event_id.keys()),transition_info.flatten()]
    TMx_df = numpy_arr_to_pandas_df(TMx_arr, col_names, col_values, dtypes)
    # Add dummy variable to enabling combining of dataframes
    measurement_id = ["Probability"]*len(TMx_df)
    TMx_df.insert(2, "Measurement", measurement_id)
    # Save df
    TMx_df.to_pickle(os.path.join(microstate_save_path,"Dualmicro_fit_all_{ff}_pseudo_pairs_transition_df.pkl"))
    
    # Entropy
    Ent_arr = np.stack([ele[4] for ele in m_feats]) # [Subject, event]
    col_names = ["Pseudo_Pair_ID", "Event_ID", "Value"]
    col_values = [pseudo_pair_id,list(collapsed_event_id.keys())]
    dtypes = [str,str,"float64"]
    Ent_df = numpy_arr_to_pandas_df(Ent_arr, col_names, col_values, dtypes)
    # Add dummy variable to enabling combining of dataframes
    measurement_id = ["Entropy"]*len(Ent_df)
    Ent_df.insert(2, "Measurement", measurement_id)
    # Save df
    Ent_df.to_pickle(os.path.join(microstate_save_path,"Dualmicro_fit_all_{ff}_pseudo_pairs_ratio_entropy_df.pkl"))

# %% eLORETA on Intrabrain microstates
### Make forward solutions
# Computed using the fsaverage template MRI

# # First time setup will need to download fsaverage templates
# mne.datasets.fetch_fsaverage()

fs_dir = "C:/Users/glia/mne_data/MNE-fsaverage-data/fsaverage"

subjects_dir = os.path.dirname(fs_dir)
trans = "fsaverage"
src = os.path.join(fs_dir, "bem", "fsaverage-ico-5-src.fif")
bem = os.path.join(fs_dir, "bem", "fsaverage-5120-5120-5120-bem-sol.fif")

# Read the template sourcespace
sourcespace = mne.read_source_spaces(src)

# Since I use a template, I only need to make the forward operator once
# As we assume the channel positions are fixed approximately the same
# for all subjects using the same caps
subject_eeg = epoch.copy()

subject_eeg.set_eeg_reference(projection=True) # needed for inverse modelling

# Make forward solution
fwd = mne.make_forward_solution(subject_eeg.info, trans=trans, src=src,
                            bem=bem, eeg=True, mindist=5.0, n_jobs=1)

# # Save forward operator
# fname_fwd = "./Source_fwd/fsaverage_{}-fwd.fif".format(study_order[i])
# mne.write_forward_solution(fname_fwd, fwd, overwrite=True)

# # Check the alignment looks correct between EEG sensors and the template
# mne.viz.plot_alignment(
#     subject_eeg.info, trans, src=src, fwd=fwd, dig=True,
#     meg=["helmet", "sensors"], subjects_dir=subjects_dir, surfaces="auto")

### Load Parcellation
# Desikan-Killiany atlas (34 ROI from both hemispheres = 68 ROIs)
# Named aparc.annot in MNE python fsaverage folder
labels = mne.read_labels_from_annot("fsaverage", parc="aparc",
                                    subjects_dir=subjects_dir)
labels = labels[:-1] # remove unknowns
label_names = [label.name for label in labels]
n_roi = len(labels)

# Prepare brain lobe information
Frontal_rois = ['superiorfrontal-lh','superiorfrontal-rh',
                'rostralmiddlefrontal-lh','rostralmiddlefrontal-rh',
                'caudalmiddlefrontal-lh','caudalmiddlefrontal-rh',
                'parsopercularis-lh','parsopercularis-rh',
                'parstriangularis-lh','parstriangularis-rh',
                'parsorbitalis-lh','parsorbitalis-rh',
                'lateralorbitofrontal-lh','lateralorbitofrontal-rh',
                'medialorbitofrontal-lh','medialorbitofrontal-rh',
                'precentral-lh','precentral-rh',
                'paracentral-lh','paracentral-rh',
                'frontalpole-lh','frontalpole-rh']
Parietal_rois = ['superiorparietal-lh','superiorparietal-rh',
                 'inferiorparietal-lh','inferiorparietal-rh',
                 'supramarginal-lh','supramarginal-rh',
                 'postcentral-lh','postcentral-rh',
                 'precuneus-lh','precuneus-rh']
Temporal_rois = ['superiortemporal-lh','superiortemporal-rh',
                 'middletemporal-lh','middletemporal-rh',
                 'inferiortemporal-lh','inferiortemporal-rh',
                 'bankssts-lh','bankssts-rh',
                 'fusiform-lh','fusiform-rh',
                 'transversetemporal-lh','transversetemporal-rh',
                 'entorhinal-lh','entorhinal-rh',
                 'temporalpole-lh','temporalpole-rh',
                 'parahippocampal-lh','parahippocampal-rh']
Occipital_rois = ['lateraloccipital-lh','lateraloccipital-rh',
                  'lingual-lh','lingual-rh',
                  'cuneus-lh','cuneus-rh',
                  'pericalcarine-lh','pericalcarine-rh']
Cingulate_rois = ['rostralanteriorcingulate-lh','rostralanteriorcingulate-rh',
                  'caudalanteriorcingulate-lh','caudalanteriorcingulate-rh',
                  'posteriorcingulate-lh','posteriorcingulate-rh',
                  'isthmuscingulate-lh','isthmuscingulate-rh']
Insular_rois = ['insula-lh','insula-rh']

Lobes = [Frontal_rois,Parietal_rois,Temporal_rois,Occipital_rois,Cingulate_rois,Insular_rois]

Brain_region_labels = ["Frontal","Parietal","Temporal","Occipital","Cingulate","Insular"]
Brain_region_hemi_labels = np.repeat(Brain_region_labels,2).astype("<U12")
Brain_region_hemi_labels[::2] = [ele+"-lh" for ele in Brain_region_labels]
Brain_region_hemi_labels[1::2] = [ele+"-rh" for ele in Brain_region_labels]

Brain_region = np.array(label_names, dtype = "<U32")
for l in range(len(Lobes)):
    Brain_region[np.array([i in Lobes[l] for i in Brain_region])] = Brain_region_labels[l]

### Concatenate the microstates into one Raw Object to apply inverse on it
n_maps = 8
Microstate_names = [chr(ele) for ele in range(65,65+n_maps)]

for f in len(all_freq_ranges):
    ff = freq_names[f]

    with open(f"{microstate_save_path}Reordered/Intrabrain_microstate_fit_all_{ff}{n_maps}.pkl", "rb") as file:
        microstate_results = pickle.load(file)
    
    # Get the microstates and reshape to have channels in the first dim
    maps = microstate_results[0]
    maps = maps.transpose()
    
    raw_maps = mne.io.RawArray(maps,subject_eeg.info)
    raw_maps._filenames = [""] # Fix error with NoneType for "filename" for raw created with RawArray
    raw_maps.set_eeg_reference(projection=True) # needed for inverse modelling
    
    # Using assumption about equal variance and no correlations I make a diagonal matrix as cov
    noise_cov = mne.make_ad_hoc_cov(subject_eeg.info, None)
    
    # Make inverse operator
    # Using default depth parameter = 0.8 and free orientation (loose = 1)
    inverse_operator = mne.minimum_norm.make_inverse_operator(subject_eeg.info,
                                                              fwd, noise_cov,
                                                              loose = 1, depth = 0.8,
                                                              verbose = 0)
    src_inv = inverse_operator["src"]
    # Compute inverse solution and retrieve the source localized microstate activities for each label
    # Define regularization
    snr = 3 # Default setting
    
    # Use eLORETA and only keep the activity normal to the cortical surface
    stc = mne.minimum_norm.apply_inverse_raw(raw_maps,inverse_operator,
                                                lambda2 = 1/(snr**2),
                                                pick_ori = "normal",
                                                method = "eLORETA", verbose = 2)
    
    # Get the source activity in the ROIs
    label_activity = mne.extract_label_time_course(stc, labels, src_inv, mode="mean_flip",
                                     return_generator=False, verbose=0)
    
    # Visualize the microstates in source space
    # This way of plotting makes the color scale fixed across microstates
    brain = stc.plot(
        hemi="lh",
        subjects_dir=subjects_dir,
        smoothing_steps=1,
    )
    
    ### Convert Label Activity to Pandas DataFrame
    # With ROI names and then add Brain Region label
    col_names = ["ROI", "Microstate", "Value"]
    col_names = ["Microstate", "ROI", "Value"]
    col_val = [Microstate_names, label_names]
    
    # Create the source microstate activity dataframe
    sMicro_df = numpy_arr_to_pandas_df(label_activity.T, col_names = col_names, col_values = col_val)
    
    assert sMicro_df.loc[(sMicro_df["ROI"]==label_names[4])&
                             (sMicro_df["Microstate"]==Microstate_names[3]),
                             "Value"].iloc[0] == label_activity[4,3]
    
    # Add brain region information
    sMicro_df.insert(2, "Brain_region", np.tile(Brain_region,int(sMicro_df.shape[0]/n_roi)))
    sMicro_df["Brain_region"] = sMicro_df["Brain_region"].astype("category").\
                cat.reorder_categories(Brain_region_labels, ordered=True)
    
    # Add hemisphere information
    sMicro_df.insert(3, "Hemisphere", [ele[-2:] for ele in sMicro_df["ROI"]])
    
    # Add a colum that combines brain region and hemisphere for plotting
    sMicro_df.insert(4, "Brain_region_hemi", [b+"-"+h for b, h in zip(sMicro_df["Brain_region"],sMicro_df["Hemisphere"])])
    sMicro_df["Brain_region_hemi"] = sMicro_df["Brain_region_hemi"].astype("category").\
                cat.reorder_categories(Brain_region_hemi_labels, ordered=True)
    
    # Save the dataframe
    sMicro_df.to_pickle(os.path.join(microstate_save_path,f"Single_micro_{ff}_source_activity_df.pkl"))

# %% eLORETA on two-brain microstates
# Continued based on fwd operator and template loaded for intrabrain
n_maps = 8
Microstate_names = [chr(ele) for ele in range(65,65+n_maps)]

for f in len(all_freq_ranges):
    ff = freq_names[f]

    with open(f"{microstate_save_path}Reordered/Dualmicro_fit_all_{ff}_data_maps{n_maps}.pkl", "rb") as file:
        microstate_results = pickle.load(file)
    
    # Get the microstates
    maps = microstate_results[0]
    maps = maps.reshape(2*n_maps,n_channels)
    
    # # Check the maps were split properly
    # plot_microstates(n_maps, maps[:8], microstate_results[3])
    # plot_microstates(n_maps, maps[8:], microstate_results[3])
    # Maps are ordered as: ppn1 A, ppn2 A, ppn1 B, ppn2 B etc
    
    # Transpose to have channels in the first dim
    maps = maps.transpose()
    
    raw_maps = mne.io.RawArray(maps,subject_eeg.info)
    raw_maps._filenames = [""] # Fix error with NoneType for "filename" for raw created with RawArray
    raw_maps.set_eeg_reference(projection=True) # needed for inverse modelling
    
    # Using assumption about equal variance and no correlations I make a diagonal matrix as cov
    noise_cov = mne.make_ad_hoc_cov(subject_eeg.info, None)
    
    # Make inverse operator
    # Using default depth parameter = 0.8 and free orientation (loose = 1)
    inverse_operator = mne.minimum_norm.make_inverse_operator(subject_eeg.info,
                                                              fwd, noise_cov,
                                                              loose = 1, depth = 0.8,
                                                              verbose = 0)
    src_inv = inverse_operator["src"]
    # Compute inverse solution and retrieve the source localized microstate activities for each label
    # Define regularization
    snr = 3 # Default setting
    
    # Use eLORETA and only keep the activity normal to the cortical surface
    stc = mne.minimum_norm.apply_inverse_raw(raw_maps,inverse_operator,
                                                lambda2 = 1/(snr**2),
                                                pick_ori = "normal",
                                                method = "eLORETA", verbose = 2)
    
    # Get the source activity in the ROIs
    label_activity = mne.extract_label_time_course(stc, labels, src_inv, mode="mean_flip",
                                     return_generator=False, verbose=0)
    
    # Visualize the microstates in source space
    # This way of plotting makes the color scale fixed across microstates
    brain = stc.plot(
        hemi="lh",
        subjects_dir=subjects_dir,
        smoothing_steps=1,
    )
    
    # Visualize with different color scales for each microstate
    Microstate_names2 = np.repeat(Microstate_names,2).astype("<U2")
    Microstate_names2[::2] = [ele+"1" for ele in Microstate_names]
    Microstate_names2[1::2] = [ele+"2" for ele in Microstate_names]
    
    # Save source activations for each microstate
    # Lateral and medial for each hemisphere + dorsal + flatmaps
    save_path = f"{fig_save_path}Microstates/SourceDualmicroPrototypes/"
    
    hemis = ["lh","rh"]
    views = ["lateral","medial"]
    
    for i in range(len(Microstate_names2)):
        times0 = np.linspace(0,1,sfreq+1)[:2*n_maps+1]
        stc0 = stc.copy().crop(times0[i],times0[i+1],include_tmax=False)
        # Color bar limits defined as max saturation of top 1% (yellow or teal)
        # middle at 5%, which means they will have alpha = 1 and progressively be
        # closer to yellow or teal
        # Lower boundary at 10%, which means they will be red/blue but with decreased
        # transparency
        clim_max = -(np.sort(-np.abs(stc0.data),axis=0)[stc0.shape[0]//100])[0]
        clim_mid = -(np.sort(-np.abs(stc0.data),axis=0)[stc0.shape[0]//20])[0]
        clim_min = -(np.sort(-np.abs(stc0.data),axis=0)[stc0.shape[0]//10])[0]
        clim0 = {"kind":"value","pos_lims":[clim_min,clim_mid,clim_max]}
        
        # Lateral and medial
        for h in range(len(hemis)):
            hh = hemis[h]
            brain = stc0.plot(
                hemi=hh,
                subjects_dir=subjects_dir,
                smoothing_steps=10, # spatial smoothing
                colorbar=False,
                background="white",
                cortex="classic",
                size=800,
                transparent=True,
                views=views[0],
                clim=clim0,
            )
            brain.save_image(os.path.join(save_path, f"Dualmicro_source_{Microstate_names2[i]}_{hh}_{views[0]}"+".png"))
            brain.show_view(views[1])
            brain.save_image(os.path.join(save_path, f"Dualmicro_source_{Microstate_names2[i]}_{hh}_{views[1]}"+".png"))
        
        # Dorsal map
        brain = stc0.plot(
            hemi="both",
            subjects_dir=subjects_dir,
            smoothing_steps=10, # spatial smoothing
            colorbar=True,
            background="white",
            cortex="classic",
            size=1500,
            transparent=True,
            views="dorsal",
            clim=clim0,
        )
        brain.save_image(os.path.join(save_path, f"Dualmicro_source_{Microstate_names2[i]}_dorsal"+".png"))
        
        # Flat map
        brain = stc0.plot(
            hemi="both",
            surface="flat",
            subjects_dir=subjects_dir,
            smoothing_steps=10, # spatial smoothing
            colorbar=False,
            background="white",
            cortex="classic",
            size=1500,
            transparent=True,
            views="flat",
            clim=clim0,
        )
        brain.save_image(os.path.join(save_path, f"Dualmicro_source_{Microstate_names2[i]}_flat"+".png"))
        # Close all figures
        mne.viz.close_all_3d_figures()
    
    # Mean
    brain = stc.mean().plot(
        hemi="lh",
        subjects_dir=subjects_dir,
        smoothing_steps=10,
    )
    
    ### Convert Label Activity to Pandas DataFrame
    # With ROI names and then add Brain Region label
    col_names = ["ROI", "Microstate", "Value"]
    col_names = ["Microstate", "ROI", "Value"]
    col_val = [Microstate_names2, label_names]
    dtypes = [str, str, "float64"]
    
    # Create the source microstate activity dataframe
    sMicro_df = numpy_arr_to_pandas_df(label_activity.T, col_names, col_val, dtypes)
    
    assert sMicro_df.loc[(sMicro_df["ROI"]==label_names[4])&
                             (sMicro_df["Microstate"]==Microstate_names2[3]),
                             "Value"].iloc[0] == label_activity[4,3]
    
    # Add brain region information
    sMicro_df.insert(2, "Brain_region", np.tile(Brain_region,int(sMicro_df.shape[0]/n_roi)))
    sMicro_df["Brain_region"] = sMicro_df["Brain_region"].astype("category").\
                cat.reorder_categories(Brain_region_labels, ordered=True)
    
    # Add hemisphere information
    sMicro_df.insert(3, "Hemisphere", [ele[-2:] for ele in sMicro_df["ROI"]])
    
    # Add a colum that combines brain region and hemisphere for plotting
    sMicro_df.insert(4, "Brain_region_hemi", [b+"-"+h for b, h in zip(sMicro_df["Brain_region"],sMicro_df["Hemisphere"])])
    sMicro_df["Brain_region_hemi"] = sMicro_df["Brain_region_hemi"].astype("category").\
                cat.reorder_categories(Brain_region_hemi_labels, ordered=True)
    
    # Save the dataframe
    sMicro_df.to_pickle(os.path.join(microstate_save_path,"Dualmicro_{ff}_source_activity_df.pkl"))

# %% LRTC with DFA on Two-person microstate label time series
# Using Detrended Fluctuation Analysis (DFA)
# Adapted from Python Implementation by Arthur-Ervin Avramiea <a.e.avramiea@vu.nl>
# From NBT2 toolbox
"""
See Hardstone et al, 2012 for more info
Perform DFA
    1 Compute cumulative sum of time series to create signal profile
    2 Define set of window sizes (see below)
    3 Remove the linear trend using least-squares for each window
    4 Calculate standard deviation for each window and take the mean
    5 Plot fluctuation function (Standard deviation) as function
      for all window sizes, on double logarithmic scale
    6 The DFA exponent alpha correspond to Hurst exponent
      f(L) = sd = L^alpha (with alpha as linear coefficient in log plot)

If 0 < alpha < 0.5: The process exhibits anti-correlations
If 0.5 < alpha < 1: The process exhibits positive correlations
If alpha = 0.5: The process is indistinguishable from a random process
If 1.0 < alpha < 2.0: The process is non-stationary. H = alpha - 1

Window sizes should be equally spaced on a logarithmic scale
Sizes should be at least 4 samples and up to 10% of total signal length

### Specific for our microstate DFA analysis
We have 8 microstates, but to compute the random walk we will partition
the microstate sequence into two classes (see reference on microstate Hurst
https://pubmed.ncbi.nlm.nih.gov/20921381/)

A/B/C/D will be assigned the positive direction, while E/F/G/H will be
assigned the negative direction, corresponding to whether ppn1 or ppn2
are in one of the canonical microstates, while the other have a non-specific
(average) topography.

Each 25s trial is too short to estimate LRTC on, so I will concatenate all
the trials corresponding to each condition.

This should yield up to 25s * 16 trials = 400s of data for each condition,
except rest which is up to 120s * 2 trials = 240s

DFA is computed from 8 trials and then averaged, to avoid the
problem of flipping in the asymmetric trials. We change windows size to 5-20s
To ensure consistency the same procedure is applied to the symmetric trials

"""

# Window sizes
compute_interval = [5,20] # the window sizes should be between 5s and 30s
# Compute DFA window sizes for the given Interval
window_sizes = np.floor(np.logspace(-1,3,40) * sfreq).astype(int) # %logspace from 0.1 seccond (10^-1) to 1000 (10^3) seconds
window_sizes = window_sizes[(window_sizes >= compute_interval[0]*sfreq) & \
    (window_sizes <= compute_interval[1]*sfreq)]

for f in len(all_freq_ranges):
    ff = freq_names[f]
    # Nolds are already using all cores so multiprocessing with concurrent makes it slower
    n_maps = 8
    
    with open(f"{microstate_save_path}Reordered/Dualmicro_fit_all_{ff}_data_maps{n_maps}.pkl", "rb") as file:
        microstate_results = pickle.load(file)
    # Load all trialinfos
    with open(f"{microstate_save_path}Dualmicro_fit_all_{ff}_trial_events_infos.pkl", "rb") as file:
        trialinfo_list = pickle.load(file)
    
    # Pre-allocate memory
    DFA_arr = np.zeros((n_pairs,len(collapsed_event_id)))
    Fluctuation_arr = np.zeros((n_pairs,len(collapsed_event_id),len(window_sizes)))
    
    # Get start time
    c_time1 = time.localtime()
    c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
    print("Started {}".format(c_time1))
    # Nolds are already using all cores so concurrent futures with make it slower
    for i in range(n_pairs):
        # Compute DFA
        dfa_temp, fluc_temp = compute_dualmicro_DFA(i, microstate_results, 
           trialinfo_list, sfreq, window_sizes, event_id, collapsed_event_id, True)
        # Save to array
        DFA_arr[i] = dfa_temp
        Fluctuation_arr[i] = fluc_temp
        print("Finished {} out of {} pairs".format(i+1,n_pairs))
    
    # Get ending time
    c_time2 = time.localtime()
    c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
    print(("Started {} \nEnded Time {}".format(c_time1,c_time2)))
    
    # Save the raw DFA analysis data 
    np.save(microstate_save_path+"DFA_arr.npy", DFA_arr)
    np.save(microstate_save_path+"Fluctuation_arr.npy", Fluctuation_arr)
    
    # Convert to Pandas dataframe (DFA exponent)
    col_names = ["Pair_ID", "Event_ID", "Value"]
    col_values = [Pair_id,list(collapsed_event_id.keys())]
    dtypes = ["int64",str,"float64"]
    
    DFA_df = numpy_arr_to_pandas_df(DFA_arr, col_names, col_values, dtypes)
    
    # Add dummy variable to enabling combining of dataframes
    measurement_id = ["DFA"]*len(DFA_df)
    DFA_df.insert(2, "Measurement", measurement_id)
    # Save df
    DFA_df.to_pickle(os.path.join(microstate_save_path,f"Dualmicro_{ff}_DFA_exponent_df.pkl"))

# %% DFA in pseudo-pairs
for f in len(all_freq_ranges):
    ff = freq_names[f]
    # Nolds are already using all cores so multiprocessing with concurrent makes it slower
    n_maps = 8
    
    # Load all the backfit pseudo-pair results
    with open(f"{microstate_save_path}Reordered/Backfitting/Dualmicro_fit_all_{ff}_data_maps{n_maps}.pkl", "rb") as file:
        backfit_results = pickle.load(file) # [pseudo_pair_id, L, GEV, events]
        
    # Pre-allocate memory
    DFA_arr = np.zeros((n_pairs,len(collapsed_event_id)))
    Fluctuation_arr = np.zeros((n_pairs,len(collapsed_event_id),len(window_sizes)))
    
    # Get start time
    c_time1 = time.localtime()
    c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
    print("Started {}".format(c_time1))
    # Nolds are already using all cores so concurrent futures with make it slower
    for i in range(n_pairs):
        # Compute DFA
        dfa_temp, fluc_temp = compute_dualmicro_DFA_pseudo(i, backfit_results,
           sfreq, window_sizes, event_id, collapsed_event_id, True)
        # Save to array
        DFA_arr[i] = dfa_temp
        Fluctuation_arr[i] = fluc_temp
        print("Finished {} out of {} pairs".format(i+1,n_pairs))
    
    # Get ending time
    c_time2 = time.localtime()
    c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
    print(("Started {} \nEnded Time {}".format(c_time1,c_time2)))
    
    # Save the raw DFA analysis data 
    np.save(microstate_save_path+"DFA_arr.npy", DFA_arr)
    np.save(microstate_save_path+"Fluctuation_arr.npy", Fluctuation_arr)
    
    # Convert to Pandas dataframe (DFA exponent)
    col_names = ["Pair_ID", "Event_ID", "Value"]
    col_values = [Pair_id,list(collapsed_event_id.keys())]
    dtypes = ["int64",str,"float64"]
    
    DFA_df = numpy_arr_to_pandas_df(DFA_arr, col_names, col_values, dtypes)
    
    # Add dummy variable to enabling combining of dataframes
    measurement_id = ["DFA"]*len(DFA_df)
    DFA_df.insert(2, "Measurement", measurement_id)
    # Save df
    DFA_df.to_pickle(os.path.join(microstate_save_path,f"Dualmicro_{ff}_DFA_exponent_df.pkl"))

# %% Time-lagged inter-brain microstate synchrony
# Hard-coded the optimal number of microstates based on CV criterion and GEV
n_maps = 5

# The lag (number of samples) we will iterate over to find greatest time-lagged interbrain microstate synchrony
lag_search_range = sfreq # 1 second in both directions
lag_interval = np.linspace(-lag_search_range,lag_search_range,lag_search_range*2+1).astype(int)

Microstate_names = [chr(ele) for ele in range(65,65+n_maps)]
# Insert Z as the symbol for non common microstate
Microstate_names.insert(0,"Z")

# Loop over frequencies
for f in len(all_freq_ranges):
    ff = freq_names[f]

    # Load all microstate results
    with open(f"{microstate_save_path}Reordered/Intrabrain_microstate_fit_all_{ff}{n_maps}.pkl", "rb") as file:
        microstate_results = pickle.load(file)
    # Load all trialinfos
    with open(f"{microstate_save_path}Intrabrain_microstate_fit_all_{ff}_trialinfos.pkl", "rb") as file:
        trialinfo_list = pickle.load(file)
    
    m_labels = [0]*(n_subjects//2)
    events = [0]*(n_subjects//2)
    m_feats = [0]*(n_subjects//2)
    shift_info = [0]*(n_subjects//2)
    Pair_id = [0]*(n_subjects//2)
    
    for i in tqdm(range(n_subjects//2)):
        m_labels[i], events[i], m_feats[i], shift_info[i] = shifted_interbrain_microstate_feature_computation(i,
               n_maps, microstate_results, trialinfo_list, sfreq,
               event_id, collapsed_event_id, lag_search_range, lag_interval)
        Pair_id[i] = int(str(Subject_id[2*i])[1:-1])
        print(f"Finished computing interbrain microstate features for pair {Pair_id[i]}")
    
    Pair_id = [ele+100 for ele in Pair_id]
    
    # Save the raw microstate features
    with open(f"{microstate_save_path}/raw_shifted_interbrain_single_micro_fit_all_{ff}_maps{n_maps}.pkl", "wb") as filehandle:
        pickle.dump([Pair_id, m_feats, shift_info], filehandle) # [Subject][Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr][Event, map*]
        # * the feature is calculated for each map, where applicable.
        # Transition matrix is calculated for each map -> map transition probability
        # The first row and column correspond to the non common microstate, i.e.
        # there is a different microstate in the pair
    
    # with open(f"{microstate_save_path}/raw_shifted_interbrain_single_micro_fit_all_{ff}_maps{n_maps}.pkl", "rb") as file:
    #     Pair_id, m_feats, shift_info = pickle.load(file) # [Subject][Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr] [Event, map*]
    
    n_pairs = len(Pair_id)
    
    ### Convert all features to dataframes for further processing
    col_names = ["Pair_ID", "Event_ID", "Microstate", "Value"]
    col_values = [Pair_id,list(collapsed_event_id.keys()),Microstate_names]
    dtypes = [int,str,str,"float64"]
    
    # Ratio total Time Covered
    TCo_arr = np.stack([ele[2] for ele in m_feats]) # [Subject, event, n_map]
    TCo_df = numpy_arr_to_pandas_df(TCo_arr, col_names, col_values, dtypes)
    # Add dummy variable to enabling combining of dataframes
    measurement_id = ["Time_covered"]*len(TCo_df)
    TCo_df.insert(2, "Measurement", measurement_id)
    # Save df
    TCo_df.to_pickle(os.path.join(microstate_save_path,f"Shifted_IB_Single_micro_fit_all_{ff}_maps{n_maps}_ratio_time_covered_df.pkl"))