diff --git a/FeatureEstimation.py b/FeatureEstimation.py index 612ce0d02737451720cc3cc12515e505a3817c94..10e0e55f36a396d6b22aaba3ea9877c519bd4bde 100644 --- a/FeatureEstimation.py +++ b/FeatureEstimation.py @@ -255,16 +255,27 @@ frontal_theta_mean_subject = power_df_sub1[power_df_sub1["Freq_band"] == "theta" # Calculate average frontal power beta frontal_beta_mean_subject = power_df_sub1[power_df_sub1["Freq_band"] == "beta"].\ groupby(["Subject_ID","Group_status","Eye_status"]).mean().reset_index() +# Extract all values +frontal_beta_subject_values = power_df_sub1[power_df_sub1["Freq_band"] == "beta"] # Calculate average frontal, midline power theta frontal_midline_theta_mean_subject = power_df_sub2[power_df_sub2["Freq_band"] == "theta"].\ groupby(["Subject_ID","Group_status","Eye_status"]).mean().reset_index() +# Extract all values +frontal_midline_theta_subject_values = power_df_sub2[power_df_sub2["Freq_band"] == "theta"] # Convert from dB to raw power frontal_theta_mean_subject["PSD"] = 10**(frontal_theta_mean_subject["PSD"]/10) frontal_beta_mean_subject["PSD"] = 10**(frontal_beta_mean_subject["PSD"]/10) frontal_midline_theta_mean_subject["PSD"] = 10**(frontal_midline_theta_mean_subject["PSD"]/10) +frontal_beta_subject_values["PSD"] = 10**(frontal_beta_subject_values["PSD"]/10) +frontal_midline_theta_subject_values["PSD"] = 10**(frontal_midline_theta_subject_values["PSD"]/10) + +frontal_beta_mean_subject.to_pickle(os.path.join(Feature_savepath,"fBMS_df.pkl")) +frontal_midline_theta_mean_subject.to_pickle(os.path.join(Feature_savepath,"fMTMS_df.pkl")) +frontal_beta_subject_values.to_pickle(os.path.join(Feature_savepath,"fBSV_df.pkl")) +frontal_midline_theta_subject_values.to_pickle(os.path.join(Feature_savepath,"fMTSV_df.pkl")) # Calculate mean for each group and take ratio for whole group # To confirm trend observed in PSD plots @@ -285,7 +296,7 @@ frontal_theta_beta_ratio.rename(columns={"PSD":"TBR"},inplace=True) dummy_variable = ["Frontal Theta Beta Ratio"]*frontal_theta_beta_ratio.shape[0] frontal_theta_beta_ratio.insert(3, "Measurement", dummy_variable ) -frontal_theta_beta_ratio.to_pickle(os.path.join(Feature_savepath,"fTBR_df.pkl")) +# frontal_theta_beta_ratio.to_pickle(os.path.join(Feature_savepath,"fTBR_df.pkl")) """ # %% Frequency bands asymmetry @@ -922,8 +933,12 @@ for i in range(n_subjects): temp_EC_p_hat = p_empirical(micro_labels[i][0], n_maps) temp_EO_p_hat = p_empirical(micro_labels[i][1], n_maps) # Calculate transition matrix + """ temp_EC_T_hat = T_empirical(micro_labels[i][0], n_maps, gaps_idx[i][0]) temp_EO_T_hat = T_empirical(micro_labels[i][1], n_maps, gaps_idx[i][1]) + """ + temp_EC_T_hat = T_empirical(micro_labels[i][0], n_maps) + temp_EO_T_hat = T_empirical(micro_labels[i][1], n_maps) # Calculate Shannon entropy temp_EC_h_hat = H_1(micro_labels[i][0], n_maps) temp_EO_h_hat = H_1(micro_labels[i][1], n_maps) @@ -942,10 +957,11 @@ np.save(Feature_savepath+"microstate_transition_data.npy", microstate_transition # Transition matrix should be read as probability of row to column microstate_transition_data_arr =\ microstate_transition_data.reshape((n_subjects,n_eye_status,n_maps*n_maps)) # flatten 4 x 4 matrix to 1D -transition_info = ["M1->M1", "M1->M2", "M1->M3", "M1->M4", - "M2->M1", "M2->M2", "M2->M3", "M2->M4", - "M3->M1", "M3->M2", "M3->M3", "M3->M4", - "M4->M1", "M4->M2", "M4->M3", "M4->M4"] +transition_info = ["M1->M1", "M1->M2", "M1->M3", "M1->M4", "M1->M5", + "M2->M1", "M2->M2", "M2->M3", "M2->M4", "M2-M5", + "M3->M1", "M3->M2", "M3->M3", "M3->M4", "M3->M5", + "M4->M1", "M4->M2", "M4->M3", "M4->M4", "M4->M5", + "M5->M1", "M5->M2", "M5->M3", "M5->M4", "M5->M5"] arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, microstate_transition_data_arr.shape), indexing="ij"))) + [microstate_transition_data_arr.ravel()]) microstate_transition_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Transition", "Value"]) @@ -974,7 +990,7 @@ arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, microstate_ microstate_time_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Microstate", "Value"]) # Change from numerical coding to actual values eye_status = list(final_epochs[0].event_id.keys()) -microstates = [1,2,3,4] +microstates = [1,2,3,4,5] index_values = [Subject_id,eye_status,microstates] for col in range(len(index_values)): @@ -1032,1724 +1048,1724 @@ microstate_entropy_df.insert(3, "Measurement", dummy_variable) # Save df microstate_entropy_df.to_pickle(os.path.join(Feature_savepath,"microstate_entropy_df.pkl")) -# %% Long-range temporal correlations (LRTC) -""" -See Hardstone et al, 2012 -Hurst exponent estimation steps: - 1. Preprocess - 2. Band-pass filter for frequency band of interest - 3. Hilbert transform to obtain amplitude envelope - 4. Perform DFA - 4.1 Compute cumulative sum of time series to create signal profile - 4.2 Define set of window sizes (see below) - 4.3 Remove the linear trend using least-squares for each window - 4.4 Calculate standard deviation for each window and take the mean - 4.5 Plot fluctuation function (Standard deviation) as function - for all window sizes, on double logarithmic scale - 4.6 The DFA exponent alpha correspond to Hurst exponent - f(L) = sd = L^alpha (with alpha as linear coefficient in log plot) - -If 0 < alpha < 0.5: The process exhibits anti-correlations -If 0.5 < alpha < 1: The process exhibits positive correlations -If alpha = 0.5: The process is indistinguishable from a random process -If 1.0 < alpha < 2.0: The process is non-stationary. H = alpha - 1 - -Window sizes should be equally spaced on a logarithmic scale -Sizes should be at least 4 samples and up to 10% of total signal length -Filters can influence neighboring samples, thus filters should be tested -on white noise to estimate window sizes that are unaffected by filters - -filter_length=str(2*1/fmin)+"s" # cannot be used with default transition bandwidth - -""" -# From simulations with white noise I determined window size thresholds for the 5 frequency bands: -thresholds = [7,7,7,6.5,6.5] -# And their corresponding log step sizes -with open("LRTC_log_win_sizes.pkl", "rb") as filehandle: - log_win_sizes = pickle.load(filehandle) - -# Variables for the the different conditions -# Sampling frequency -sfreq = final_epochs[0].info["sfreq"] -# Channels -ch_names = final_epochs[0].info["ch_names"] -n_channels = len(ch_names) -# Frequency -Freq_Bands = {"delta": [1.25, 4.0], - "theta": [4.0, 8.0], - "alpha": [8.0, 13.0], - "beta": [13.0, 30.0], - "gamma": [30.0, 49.0]} -n_freq_bands = len(Freq_Bands) -# Eye status -eye_status = list(final_epochs[0].event_id.keys()) -n_eye_status = len(eye_status) - -### Estimating Hurst exponent for the data -# The data should be re-referenced to common average (Already done) - -# Data are transformed to numpy arrays -# Then divided into EO and EC and further into each of the 5 trials -# So DFA is estimated for each trial separately, which was concluded from simulations -gaps_trials_idx = np.load("Gaps_trials_idx.npy") # re-used from microstate analysis -n_trials = 5 - -H_data = [] -for i in range(n_subjects): - # Transform data to correct shape - temp_arr = final_epochs[i].get_data() # get data - arr_shape = temp_arr.shape # get shape - temp_arr = temp_arr.swapaxes(1,2) # swap ch and time axis - temp_arr = temp_arr.reshape(arr_shape[0]*arr_shape[2],arr_shape[1]) # reshape by combining epochs and times - # Get indices for eyes open and closed - EC_index = final_epochs[i].events[:,2] == 1 - EO_index = final_epochs[i].events[:,2] == 2 - # Repeat with 4s * sample frequency to correct for concatenation of times and epochs - EC_index = np.repeat(EC_index,4*sfreq) - EO_index = np.repeat(EO_index,4*sfreq) - # Divide into eye status - EC_data = temp_arr[EC_index] - EO_data = temp_arr[EO_index] - # Divide into trials - EC_gap_idx = np.array([0]+list(gaps_trials_idx[i,0])+[len(EC_data)]) - EO_gap_idx = np.array([0]+list(gaps_trials_idx[i,1])+[len(EO_data)]) +# # %% Long-range temporal correlations (LRTC) +# """ +# See Hardstone et al, 2012 +# Hurst exponent estimation steps: +# 1. Preprocess +# 2. Band-pass filter for frequency band of interest +# 3. Hilbert transform to obtain amplitude envelope +# 4. Perform DFA +# 4.1 Compute cumulative sum of time series to create signal profile +# 4.2 Define set of window sizes (see below) +# 4.3 Remove the linear trend using least-squares for each window +# 4.4 Calculate standard deviation for each window and take the mean +# 4.5 Plot fluctuation function (Standard deviation) as function +# for all window sizes, on double logarithmic scale +# 4.6 The DFA exponent alpha correspond to Hurst exponent +# f(L) = sd = L^alpha (with alpha as linear coefficient in log plot) + +# If 0 < alpha < 0.5: The process exhibits anti-correlations +# If 0.5 < alpha < 1: The process exhibits positive correlations +# If alpha = 0.5: The process is indistinguishable from a random process +# If 1.0 < alpha < 2.0: The process is non-stationary. H = alpha - 1 + +# Window sizes should be equally spaced on a logarithmic scale +# Sizes should be at least 4 samples and up to 10% of total signal length +# Filters can influence neighboring samples, thus filters should be tested +# on white noise to estimate window sizes that are unaffected by filters + +# filter_length=str(2*1/fmin)+"s" # cannot be used with default transition bandwidth + +# """ +# # From simulations with white noise I determined window size thresholds for the 5 frequency bands: +# thresholds = [7,7,7,6.5,6.5] +# # And their corresponding log step sizes +# with open("LRTC_log_win_sizes.pkl", "rb") as filehandle: +# log_win_sizes = pickle.load(filehandle) + +# # Variables for the the different conditions +# # Sampling frequency +# sfreq = final_epochs[0].info["sfreq"] +# # Channels +# ch_names = final_epochs[0].info["ch_names"] +# n_channels = len(ch_names) +# # Frequency +# Freq_Bands = {"delta": [1.25, 4.0], +# "theta": [4.0, 8.0], +# "alpha": [8.0, 13.0], +# "beta": [13.0, 30.0], +# "gamma": [30.0, 49.0]} +# n_freq_bands = len(Freq_Bands) +# # Eye status +# eye_status = list(final_epochs[0].event_id.keys()) +# n_eye_status = len(eye_status) + +# ### Estimating Hurst exponent for the data +# # The data should be re-referenced to common average (Already done) + +# # Data are transformed to numpy arrays +# # Then divided into EO and EC and further into each of the 5 trials +# # So DFA is estimated for each trial separately, which was concluded from simulations +# gaps_trials_idx = np.load("Gaps_trials_idx.npy") # re-used from microstate analysis +# n_trials = 5 + +# H_data = [] +# for i in range(n_subjects): +# # Transform data to correct shape +# temp_arr = final_epochs[i].get_data() # get data +# arr_shape = temp_arr.shape # get shape +# temp_arr = temp_arr.swapaxes(1,2) # swap ch and time axis +# temp_arr = temp_arr.reshape(arr_shape[0]*arr_shape[2],arr_shape[1]) # reshape by combining epochs and times +# # Get indices for eyes open and closed +# EC_index = final_epochs[i].events[:,2] == 1 +# EO_index = final_epochs[i].events[:,2] == 2 +# # Repeat with 4s * sample frequency to correct for concatenation of times and epochs +# EC_index = np.repeat(EC_index,4*sfreq) +# EO_index = np.repeat(EO_index,4*sfreq) +# # Divide into eye status +# EC_data = temp_arr[EC_index] +# EO_data = temp_arr[EO_index] +# # Divide into trials +# EC_gap_idx = np.array([0]+list(gaps_trials_idx[i,0])+[len(EC_data)]) +# EO_gap_idx = np.array([0]+list(gaps_trials_idx[i,1])+[len(EO_data)]) - EC_trial_data = [] - EO_trial_data = [] - for t in range(n_trials): - EC_trial_data.append(EC_data[EC_gap_idx[t]:EC_gap_idx[t+1]]) - EO_trial_data.append(EO_data[EO_gap_idx[t]:EO_gap_idx[t+1]]) +# EC_trial_data = [] +# EO_trial_data = [] +# for t in range(n_trials): +# EC_trial_data.append(EC_data[EC_gap_idx[t]:EC_gap_idx[t+1]]) +# EO_trial_data.append(EO_data[EO_gap_idx[t]:EO_gap_idx[t+1]]) - # Save data - H_data.append([EC_trial_data,EO_trial_data]) # output [subject][eye][trial][time,ch] - -# Calculate H for each subject, eye status, trial, freq and channel -H_arr = np.zeros((n_subjects,n_eye_status,n_trials,n_channels,n_freq_bands)) -w_len = [len(ele) for ele in log_win_sizes] -DFA_arr = np.empty((n_subjects,n_eye_status,n_trials,n_channels,n_freq_bands,2,np.max(w_len))) -DFA_arr[:] = np.nan - -# Get current time -c_time1 = time.localtime() -c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1) -print("Started",c_time1) - -# Nolds are already using all cores so multiprocessing with make it slower -# Warning occurs when R2 is estimated during detrending - but R2 is not used -warnings.simplefilter("ignore") -for i in range(n_subjects): - # Pre-allocate memory - DFA_temp = np.empty((n_eye_status,n_trials,n_channels,n_freq_bands,2,np.max(w_len))) - DFA_temp[:] = np.nan - H_temp = np.empty((n_eye_status,n_trials,n_channels,n_freq_bands)) - for e in range(n_eye_status): - for trial in range(n_trials): - for c in range(n_channels): - # Get the data - signal = H_data[i][e][trial][:,c] +# # Save data +# H_data.append([EC_trial_data,EO_trial_data]) # output [subject][eye][trial][time,ch] + +# # Calculate H for each subject, eye status, trial, freq and channel +# H_arr = np.zeros((n_subjects,n_eye_status,n_trials,n_channels,n_freq_bands)) +# w_len = [len(ele) for ele in log_win_sizes] +# DFA_arr = np.empty((n_subjects,n_eye_status,n_trials,n_channels,n_freq_bands,2,np.max(w_len))) +# DFA_arr[:] = np.nan + +# # Get current time +# c_time1 = time.localtime() +# c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1) +# print("Started",c_time1) + +# # Nolds are already using all cores so multiprocessing with make it slower +# # Warning occurs when R2 is estimated during detrending - but R2 is not used +# warnings.simplefilter("ignore") +# for i in range(n_subjects): +# # Pre-allocate memory +# DFA_temp = np.empty((n_eye_status,n_trials,n_channels,n_freq_bands,2,np.max(w_len))) +# DFA_temp[:] = np.nan +# H_temp = np.empty((n_eye_status,n_trials,n_channels,n_freq_bands)) +# for e in range(n_eye_status): +# for trial in range(n_trials): +# for c in range(n_channels): +# # Get the data +# signal = H_data[i][e][trial][:,c] - counter = 0 # prepare counter - for fmin, fmax in Freq_Bands.values(): - # Filter for each freq band - signal_filtered = mne.filter.filter_data(signal, sfreq=sfreq, verbose=0, - l_freq=fmin, h_freq=fmax) - # Hilbert transform - analytic_signal = scipy.signal.hilbert(signal_filtered) - # Get Amplitude envelope - # np.abs is the same as np.linalg.norm, i.e. the length for complex input which is the amplitude - ampltude_envelope = np.abs(analytic_signal) - # Perform DFA using predefined window sizes from simulation - a, dfa_data = nolds.dfa(ampltude_envelope, - nvals=np.exp(log_win_sizes[counter]).astype("int"), - debug_data=True) - # Save DFA results - DFA_temp[e,trial,c,counter,:,0:w_len[counter]] = dfa_data[0:2] - H_temp[e,trial,c,counter] = a - # Update counter - counter += 1 - - # Print run status - print("Finished {} out of {}".format(i+1,n_subjects)) - # Save the results - H_arr[i] = H_temp - DFA_arr[i] = DFA_temp - -warnings.simplefilter("default") - -# Get current time -c_time2 = time.localtime() -c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2) -print("Started", c_time1, "\nCurrent Time",c_time2) - -# Save the DFA analysis data -np.save(Feature_savepath+"DFA_arr.npy", DFA_arr) -np.save(Feature_savepath+"H_arr.npy", H_arr) - -# Load -DFA_arr = np.load(Feature_savepath+"DFA_arr.npy") -H_arr = np.load(Feature_savepath+"H_arr.npy") +# counter = 0 # prepare counter +# for fmin, fmax in Freq_Bands.values(): +# # Filter for each freq band +# signal_filtered = mne.filter.filter_data(signal, sfreq=sfreq, verbose=0, +# l_freq=fmin, h_freq=fmax) +# # Hilbert transform +# analytic_signal = scipy.signal.hilbert(signal_filtered) +# # Get Amplitude envelope +# # np.abs is the same as np.linalg.norm, i.e. the length for complex input which is the amplitude +# ampltude_envelope = np.abs(analytic_signal) +# # Perform DFA using predefined window sizes from simulation +# a, dfa_data = nolds.dfa(ampltude_envelope, +# nvals=np.exp(log_win_sizes[counter]).astype("int"), +# debug_data=True) +# # Save DFA results +# DFA_temp[e,trial,c,counter,:,0:w_len[counter]] = dfa_data[0:2] +# H_temp[e,trial,c,counter] = a +# # Update counter +# counter += 1 + +# # Print run status +# print("Finished {} out of {}".format(i+1,n_subjects)) +# # Save the results +# H_arr[i] = H_temp +# DFA_arr[i] = DFA_temp + +# warnings.simplefilter("default") + +# # Get current time +# c_time2 = time.localtime() +# c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2) +# print("Started", c_time1, "\nCurrent Time",c_time2) + +# # Save the DFA analysis data +# np.save(Feature_savepath+"DFA_arr.npy", DFA_arr) +# np.save(Feature_savepath+"H_arr.npy", H_arr) -# Average the Hurst Exponent across trials -H_arr = np.mean(H_arr, axis=2) - -# Convert to Pandas dataframe (Hurst exponent) -# The dimensions will each be a column with numbers and the last column will be the actual values -arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, H_arr.shape), indexing="ij"))) + [H_arr.ravel()]) -H_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Channel", "Freq_band", "Value"]) -# Change from numerical coding to actual values -eye_status = list(final_epochs[0].event_id.keys()) -ch_name = final_epochs[0].info["ch_names"] - -index_values = [Subject_id,eye_status,ch_name,list(Freq_Bands.keys())] -for col in range(len(index_values)): - col_name = H_data_df.columns[col] - for shape in range(H_arr.shape[col]): # notice this is the shape of original numpy array. Not shape of DF - H_data_df.loc[H_data_df.iloc[:,col] == shape,col_name]\ - = index_values[col][shape] - -# Add group status -Group_status = np.array(["CTRL"]*len(H_data_df["Subject_ID"])) -Group_status[np.array([i in cases for i in H_data_df["Subject_ID"]])] = "PTSD" -# Add to dataframe -H_data_df.insert(2, "Group_status", Group_status) - -# Fix Freq_band categorical order -H_data_df["Freq_band"] = H_data_df["Freq_band"].astype("category").\ - cat.reorder_categories(list(Freq_Bands.keys()), ordered=True) - -# Global Hurst exponent -H_data_df_global = H_data_df.groupby(["Subject_ID", "Eye_status", "Freq_band"]).mean().reset_index() # by default pandas mean skip nan -# Add group status (cannot use group_by as each subject only have 1 group, not both) -Group_status = np.array(["CTRL"]*len(H_data_df_global["Subject_ID"])) -Group_status[np.array([i in cases for i in H_data_df_global["Subject_ID"]])] = "PTSD" -# Add to dataframe -H_data_df_global.insert(2, "Group_status", Group_status) -# Add dummy variable for re-using plot code -dummy_variable = ["Global Hurst Exponent"]*H_data_df_global.shape[0] -H_data_df_global.insert(3, "Measurement", dummy_variable ) - -# Save the data -H_data_df.to_pickle(os.path.join(Feature_savepath,"H_data_df.pkl")) -H_data_df_global.to_pickle(os.path.join(Feature_savepath,"H_data_global_df.pkl")) - -# %% Source localization of sensor data -# Using non-interpolated channels -# Even interpolated channels during preprocessing and visual inspection -# are dropped +# # Load +# DFA_arr = np.load(Feature_savepath+"DFA_arr.npy") +# H_arr = np.load(Feature_savepath+"H_arr.npy") + +# # Average the Hurst Exponent across trials +# H_arr = np.mean(H_arr, axis=2) + +# # Convert to Pandas dataframe (Hurst exponent) +# # The dimensions will each be a column with numbers and the last column will be the actual values +# arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, H_arr.shape), indexing="ij"))) + [H_arr.ravel()]) +# H_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Channel", "Freq_band", "Value"]) +# # Change from numerical coding to actual values +# eye_status = list(final_epochs[0].event_id.keys()) +# ch_name = final_epochs[0].info["ch_names"] + +# index_values = [Subject_id,eye_status,ch_name,list(Freq_Bands.keys())] +# for col in range(len(index_values)): +# col_name = H_data_df.columns[col] +# for shape in range(H_arr.shape[col]): # notice this is the shape of original numpy array. Not shape of DF +# H_data_df.loc[H_data_df.iloc[:,col] == shape,col_name]\ +# = index_values[col][shape] + +# # Add group status +# Group_status = np.array(["CTRL"]*len(H_data_df["Subject_ID"])) +# Group_status[np.array([i in cases for i in H_data_df["Subject_ID"]])] = "PTSD" +# # Add to dataframe +# H_data_df.insert(2, "Group_status", Group_status) + +# # Fix Freq_band categorical order +# H_data_df["Freq_band"] = H_data_df["Freq_band"].astype("category").\ +# cat.reorder_categories(list(Freq_Bands.keys()), ordered=True) + +# # Global Hurst exponent +# H_data_df_global = H_data_df.groupby(["Subject_ID", "Eye_status", "Freq_band"]).mean().reset_index() # by default pandas mean skip nan +# # Add group status (cannot use group_by as each subject only have 1 group, not both) +# Group_status = np.array(["CTRL"]*len(H_data_df_global["Subject_ID"])) +# Group_status[np.array([i in cases for i in H_data_df_global["Subject_ID"]])] = "PTSD" +# # Add to dataframe +# H_data_df_global.insert(2, "Group_status", Group_status) +# # Add dummy variable for re-using plot code +# dummy_variable = ["Global Hurst Exponent"]*H_data_df_global.shape[0] +# H_data_df_global.insert(3, "Measurement", dummy_variable ) + +# # Save the data +# H_data_df.to_pickle(os.path.join(Feature_savepath,"H_data_df.pkl")) +# H_data_df_global.to_pickle(os.path.join(Feature_savepath,"H_data_global_df.pkl")) + +# # %% Source localization of sensor data +# # Using non-interpolated channels +# # Even interpolated channels during preprocessing and visual inspection +# # are dropped + +# # Prepare epochs for estimation of source connectivity +# source_epochs = [0]*n_subjects +# for i in range(n_subjects): +# source_epochs[i] = final_epochs[i].copy() + +# ### Make forward solutions +# # A forward solution is first made for all individuals with no dropped channels +# # Afterwards individual forward solutions are made for subjects with bad +# # channels that were interpolated in preprocessing and these are dropped +# # First forward operator is computed using a template MRI for each dataset +# fs_dir = "/home/glia/MNE-fsaverage-data/fsaverage" +# subjects_dir = os.path.dirname(fs_dir) +# trans = "fsaverage" +# src = os.path.join(fs_dir, "bem", "fsaverage-ico-5-src.fif") +# bem = os.path.join(fs_dir, "bem", "fsaverage-5120-5120-5120-bem-sol.fif") + +# # Read the template sourcespace +# sourcespace = mne.read_source_spaces(src) + +# temp_idx = 0 # Index with subject that had no bad channels +# subject_eeg = source_epochs[temp_idx].copy() +# subject_eeg.set_eeg_reference(projection=True) # needed for inverse modelling +# # Make forward solution +# fwd = mne.make_forward_solution(subject_eeg.info, trans=trans, src=src, +# bem=bem, eeg=True, mindist=5.0, n_jobs=1) +# # Save forward operator +# fname_fwd = "./Source_fwd/fsaverage-fwd.fif" +# mne.write_forward_solution(fname_fwd, fwd, overwrite=True) + +# # A specific forward solution is also made for each subject with bad channels +# with open("./Preprocessing/bad_ch.pkl", "rb") as file: +# bad_ch = pickle.load(file) + +# All_bad_ch = bad_ch +# All_drop_epochs = dropped_epochs_df +# All_dropped_ch = [] + +# Bad_ch_idx = [idx for idx, item in enumerate(All_bad_ch) if item != 0] +# Bad_ch_subjects = All_drop_epochs["Subject_ID"][Bad_ch_idx] +# # For each subject with bad channels, drop the channels and make forward operator +# for n in range(len(Bad_ch_subjects)): +# Subject = Bad_ch_subjects.iloc[n] +# try: +# Subject_idx = Subject_id.index(Subject) +# # Get unique bad channels +# Bad_ch0 = All_bad_ch[Bad_ch_idx[n]] +# Bad_ch1 = [] +# for i2 in range(len(Bad_ch0)): +# if type(Bad_ch0[i2]) == list: +# for i3 in range(len(Bad_ch0[i2])): +# Bad_ch1.append(Bad_ch0[i2][i3]) +# elif type(Bad_ch0[i2]) == str: +# Bad_ch1.append(Bad_ch0[i2]) +# Bad_ch1 = np.unique(Bad_ch1) +# # Drop the bad channels +# source_epochs[Subject_idx].drop_channels(Bad_ch1) +# # Save the overview of dropped channels +# All_dropped_ch.append([Subject,Subject_idx,Bad_ch1]) +# # Make forward operator +# subject_eeg = source_epochs[Subject_idx].copy() +# subject_eeg.set_eeg_reference(projection=True) # needed for inverse modelling +# # Make forward solution +# fwd = mne.make_forward_solution(subject_eeg.info, trans=trans, src=src, +# bem=bem, eeg=True, mindist=5.0, n_jobs=1) +# # Save forward operator +# fname_fwd = "./Source_fwd/fsaverage_{}-fwd.fif".format(Subject) +# mne.write_forward_solution(fname_fwd, fwd, overwrite=True) +# except: +# print(Subject,"was already dropped") + +# with open("./Preprocessing/All_datasets_bad_ch.pkl", "wb") as filehandle: +# pickle.dump(All_dropped_ch, filehandle) + + +# # %% Load forward operators +# # Re-use for all subjects without dropped channels +# fname_fwd = "./Source_fwd/fsaverage-fwd.fif" +# fwd = mne.read_forward_solution(fname_fwd) + +# fwd_list = [fwd]*n_subjects + +# # Use specific forward solutions for subjects with dropped channels +# with open("./Preprocessing/All_datasets_bad_ch.pkl", "rb") as file: +# All_dropped_ch = pickle.load(file) + +# for i in range(len(All_dropped_ch)): +# Subject = All_dropped_ch[i][0] +# Subject_idx = All_dropped_ch[i][1] +# fname_fwd = "./Source_fwd/fsaverage_{}-fwd.fif".format(Subject) +# fwd = mne.read_forward_solution(fname_fwd) +# fwd_list[Subject_idx] = fwd + +# # Check the correct number of channels are present in fwd +# random_point = int(np.random.randint(0,len(All_dropped_ch)-1,1)) +# assert len(fwds[All_dropped_ch[random_point][1]].ch_names) == source_epochs[All_dropped_ch[random_point][1]].info["nchan"] + +# # %% Make parcellation +# # After mapping to source space, I end up with 20484 vertices +# # but I wanted to map to fewer sources and not many more +# # Thus I need to perform parcellation +# # Get labels for FreeSurfer "aparc" cortical parcellation (example with 74 labels/hemi - Destriuex) +# labels_aparc = mne.read_labels_from_annot("fsaverage", parc="aparc.a2009s", +# subjects_dir=subjects_dir) +# labels_aparc = labels_aparc[:-2] # remove unknowns + +# labels_aparc_names = [label.name for label in labels_aparc] + +# # Manually adding the 31 ROIs (14-lh/rh + 3 in midline) from Toll et al, 2020 +# # Making fuction to take subset of a label +# def label_subset(label, subset, name="ROI_name"): +# label_subset = mne.Label(label.vertices[subset], label.pos[subset,:], +# label.values[subset], label.hemi, +# name = "{}-{}".format(name,label.hemi), +# subject = label.subject, color = None) +# return label_subset + +# ### Visual area 1 (V1 and somatosensory cortex BA1-3) +# label_filenames = ["lh.V1.label", "rh.V1.label", +# "lh.BA1.label", "rh.BA1.label", +# "lh.BA2.label", "rh.BA2.label", +# "lh.BA3a.label", "rh.BA3a.label", +# "lh.BA3b.label", "rh.BA3b.label"] +# labels0 = [0]*len(label_filenames) +# for i, filename in enumerate(label_filenames): +# labels0[i] = mne.read_label(os.path.join(fs_dir, "label", filename), subject="fsaverage") +# # Add V1 to final label variable +# labels = labels0[:2] +# # Rename to remove redundant hemi information +# labels[0].name = "V1-{}".format(labels[0].hemi) +# labels[1].name = "V1-{}".format(labels[1].hemi) +# # Assign a color +# labels[0].color = matplotlib.colors.to_rgba("salmon") +# labels[1].color = matplotlib.colors.to_rgba("salmon") +# # Combine Brodmann Areas for SMC. Only use vertices ones to avoid duplication error +# SMC_labels = labels0[2:] +# for hem in range(2): +# SMC_p1 = SMC_labels[hem] +# for i in range(1,len(SMC_labels)//2): +# SMC_p2 = SMC_labels[hem+2*i] +# p2_idx = np.isin(SMC_p2.vertices, SMC_p1.vertices, invert=True) +# SMC_p21 = label_subset(SMC_p2, p2_idx, "SMC") +# SMC_p1 = SMC_p1.__add__(SMC_p21) +# SMC_p1.name = SMC_p21.name +# # Assign a color +# SMC_p1.color = matplotlib.colors.to_rgba("orange") +# labels.append(SMC_p1) + +# ### Inferior frontal junction +# # Located at junction between inferior frontal and inferior precentral sulcus +# label_aparc_names0 = ["S_front_inf","S_precentral-inf-part"] +# temp_labels = [] +# for i in range(len(label_aparc_names0)): +# labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] +# for i2 in range(len(labels_aparc_idx)): +# temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy()) + +# pos1 = temp_labels[0].pos +# pos2 = temp_labels[2].pos +# distm = scipy.spatial.distance.cdist(pos1,pos2) +# # Find the closest points between the 2 ROIs +# l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.001))[0]) # q chosen to correspond to around 10% of ROI +# l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[1]) # q chosen to correspond to around 10% of ROI + +# IFJ_label_p1 = label_subset(temp_labels[0], l1_idx, "IFJ") +# IFJ_label_p2 = label_subset(temp_labels[2], l2_idx, "IFJ") +# # Combine the 2 parts +# IFJ_label = IFJ_label_p1.__add__(IFJ_label_p2) +# IFJ_label.name = IFJ_label_p1.name +# # Assign a color +# IFJ_label.color = matplotlib.colors.to_rgba("chartreuse") +# # Append to final list +# labels.append(IFJ_label) + +# # Do the same for the right hemisphere +# pos1 = temp_labels[1].pos +# pos2 = temp_labels[3].pos +# distm = scipy.spatial.distance.cdist(pos1,pos2) +# # Find the closest points between the 2 ROIs +# l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.00075))[0]) # q chosen to correspond to around 10% of ROI +# l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[1]) # q chosen to correspond to around 10% of ROI +# IFJ_label_p1 = label_subset(temp_labels[1], l1_idx, "IFJ") +# IFJ_label_p2 = label_subset(temp_labels[3], l2_idx, "IFJ") +# # Combine the 2 parts +# IFJ_label = IFJ_label_p1.__add__(IFJ_label_p2) +# IFJ_label.name = IFJ_label_p1.name +# # Assign a color +# IFJ_label.color = matplotlib.colors.to_rgba("chartreuse") +# # Append to final list +# labels.append(IFJ_label) + +# ### Intraparietal sulcus +# label_aparc_names0 = ["S_intrapariet_and_P_trans"] +# labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[0])] +# for i in range(len(labels_aparc_idx)): +# labels.append(labels_aparc[labels_aparc_idx[i]].copy()) +# labels[-1].name = "IPS-{}".format(labels[-1].hemi) + +# ### Frontal eye field as intersection between middle frontal gyrus and precentral gyrus +# label_aparc_names0 = ["G_front_middle","G_precentral"] +# temp_labels = [] +# for i in range(len(label_aparc_names0)): +# labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] +# for i2 in range(len(labels_aparc_idx)): +# temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy()) + +# # Take 10% of middle frontal gyrus closest to precentral gyrus (most posterior) +# temp_label0 = temp_labels[0] +# G_fm_y = temp_label0.pos[:,1] +# thres_G_fm_y = np.sort(G_fm_y)[len(G_fm_y)//10] +# idx_p1 = np.where(G_fm_y<thres_G_fm_y)[0] +# FEF_label_p1 = label_subset(temp_label0, idx_p1, "FEF") +# # Take 10% closest for precentral gyrus (most anterior) +# temp_label0 = temp_labels[2] +# # I cannot only use y (anterior/posterior) but also need to restrict z-position +# G_pre_cen_z = temp_label0.pos[:,2] +# thres_G_pre_cen_z = 0.04 # visually inspected threshold +# G_pre_cen_y = temp_label0.pos[:,1] +# thres_G_pre_cen_y = np.sort(G_pre_cen_y[G_pre_cen_z>thres_G_pre_cen_z])[-len(G_pre_cen_y)//10] # notice - for anterior +# idx_p2 = np.where((G_pre_cen_y>thres_G_pre_cen_y) & (G_pre_cen_z>thres_G_pre_cen_z))[0] +# FEF_label_p2 = label_subset(temp_label0, idx_p2, "FEF") +# # Combine the 2 parts +# FEF_label = FEF_label_p1.__add__(FEF_label_p2) +# FEF_label.name = FEF_label_p1.name +# # Assign a color +# FEF_label.color = matplotlib.colors.to_rgba("aqua") +# # Append to final list +# labels.append(FEF_label) + +# # Do the same for the right hemisphere +# temp_label0 = temp_labels[1] +# G_fm_y = temp_label0.pos[:,1] +# thres_G_fm_y = np.sort(G_fm_y)[len(G_fm_y)//10] +# idx_p1 = np.where(G_fm_y<thres_G_fm_y)[0] +# FEF_label_p1 = label_subset(temp_label0, idx_p1, "FEF") + +# temp_label0 = temp_labels[3] +# G_pre_cen_z = temp_label0.pos[:,2] +# thres_G_pre_cen_z = 0.04 # visually inspected threshold +# G_pre_cen_y = temp_label0.pos[:,1] +# thres_G_pre_cen_y = np.sort(G_pre_cen_y[G_pre_cen_z>thres_G_pre_cen_z])[-len(G_pre_cen_y)//10] # notice - for anterior +# idx_p2 = np.where((G_pre_cen_y>thres_G_pre_cen_y) & (G_pre_cen_z>thres_G_pre_cen_z))[0] +# FEF_label_p2 = label_subset(temp_label0, idx_p2, "FEF") +# # Combine the 2 parts +# FEF_label = FEF_label_p1.__add__(FEF_label_p2) +# FEF_label.name = FEF_label_p1.name +# # Assign a color +# FEF_label.color = matplotlib.colors.to_rgba("aqua") +# # Append to final list +# labels.append(FEF_label) + +# ### Supplementary eye fields +# # Located at caudal end of frontal gyrus and upper part of paracentral sulcus +# label_aparc_names0 = ["G_and_S_paracentral","G_front_sup"] +# temp_labels = [] +# for i in range(len(label_aparc_names0)): +# labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] +# for i2 in range(len(labels_aparc_idx)): +# temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy()) + +# pos1 = temp_labels[0].pos +# pos2 = temp_labels[2].pos +# distm = scipy.spatial.distance.cdist(pos1,pos2) +# # Find the closest points between the 2 ROIs +# l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[0]) # q chosen to correspond to around 15% of ROI +# l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.005))[1]) # q chosen to correspond to around 10% of ROI +# # Notice that superior frontal gyrus is around 4 times bigger than paracentral +# len(l1_idx)/pos1.shape[0] +# len(l2_idx)/pos2.shape[0] +# # Only use upper part +# z_threshold = 0.06 # visually inspected +# l1_idx = l1_idx[pos1[l1_idx,2] > z_threshold] +# l2_idx = l2_idx[pos2[l2_idx,2] > z_threshold] + +# SEF_label_p1 = label_subset(temp_labels[0], l1_idx, "SEF") +# SEF_label_p2 = label_subset(temp_labels[2], l2_idx, "SEF") +# # Combine the 2 parts +# SEF_label = SEF_label_p1.__add__(SEF_label_p2) +# SEF_label.name = SEF_label_p1.name +# # Assign a color +# SEF_label.color = matplotlib.colors.to_rgba("royalblue") +# # Append to final list +# labels.append(SEF_label) + +# # Do the same for the right hemisphere +# pos1 = temp_labels[1].pos +# pos2 = temp_labels[3].pos +# distm = scipy.spatial.distance.cdist(pos1,pos2) +# # Find the closest points between the 2 ROIs +# l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[0]) # q chosen to correspond to around 15% of ROI +# l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.005))[1]) # q chosen to correspond to around 10% of ROI +# # Notice that superior frontal gyrus is around 4 times bigger than paracentral +# len(l1_idx)/pos1.shape[0] +# len(l2_idx)/pos2.shape[0] +# # Only use upper part +# z_threshold = 0.06 # visually inspected +# l1_idx = l1_idx[pos1[l1_idx,2] > z_threshold] +# l2_idx = l2_idx[pos2[l2_idx,2] > z_threshold] + +# SEF_label_p1 = label_subset(temp_labels[1], l1_idx, "SEF") +# SEF_label_p2 = label_subset(temp_labels[3], l2_idx, "SEF") +# # Combine the 2 parts +# SEF_label = SEF_label_p1.__add__(SEF_label_p2) +# SEF_label.name = SEF_label_p1.name +# # Assign a color +# SEF_label.color = matplotlib.colors.to_rgba("royalblue") +# # Append to final list +# labels.append(SEF_label) + +# ### Posterior cingulate cortex +# label_aparc_names0 = ["G_cingul-Post-dorsal", "G_cingul-Post-ventral"] +# temp_labels = [] +# for i in range(len(label_aparc_names0)): +# labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] +# for i2 in range(len(labels_aparc_idx)): +# temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy()) +# labels0 = [] +# for hem in range(2): +# PCC_p1 = temp_labels[hem] +# for i in range(1,len(temp_labels)//2): +# PCC_p2 = temp_labels[hem+2*i] +# PCC_p1 = PCC_p1.__add__(PCC_p2) +# PCC_p1.name = "PCC-{}".format(PCC_p1.hemi) +# labels0.append(PCC_p1) +# # Combine the 2 hemisphere in 1 label +# labels.append(labels0[0].__add__(labels0[1])) + +# ### Medial prefrontal cortex +# # From their schematic it looks like rostral 1/4 of superior frontal gyrus +# label_aparc_names0 = ["G_front_sup"] +# temp_labels = [] +# for i in range(len(label_aparc_names0)): +# labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] +# for i2 in range(len(labels_aparc_idx)): +# temp_labels0 = labels_aparc[labels_aparc_idx[i2]].copy() +# temp_labels0 = temp_labels0.split(4, subjects_dir=subjects_dir)[3] +# temp_labels0.name = "mPFC-{}".format(temp_labels0.hemi) +# temp_labels.append(temp_labels0) +# # Combine the 2 hemisphere in 1 label +# labels.append(temp_labels[0].__add__(temp_labels[1])) + +# ### Angular gyrus +# label_aparc_names0 = ["G_pariet_inf-Angular"] +# for i in range(len(label_aparc_names0)): +# labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] +# for i2 in range(len(labels_aparc_idx)): +# temp_labels = labels_aparc[labels_aparc_idx[i2]].copy() +# temp_labels.name = "ANG-{}".format(temp_labels.hemi) +# labels.append(temp_labels) + +# ### Posterior middle frontal gyrus +# label_aparc_names0 = ["G_front_middle"] +# for i in range(len(label_aparc_names0)): +# labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] +# for i2 in range(len(labels_aparc_idx)): +# temp_labels = labels_aparc[labels_aparc_idx[i2]].copy() +# temp_labels = temp_labels.split(2, subjects_dir=subjects_dir)[0] +# temp_labels.name = "PMFG-{}".format(temp_labels.hemi) +# labels.append(temp_labels) + +# ### Inferior parietal lobule +# # From their parcellation figure seems to be rostral angular gyrus and posterior supramarginal gyrus +# label_aparc_names0 = ["G_pariet_inf-Angular","G_pariet_inf-Supramar"] +# temp_labels = [] +# for i in range(len(label_aparc_names0)): +# labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] +# for i2 in range(len(labels_aparc_idx)): +# temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy()) +# # Split angular in 2 and get rostral part +# temp_labels[0] = temp_labels[0].split(2, subjects_dir=subjects_dir)[1] +# temp_labels[1] = temp_labels[1].split(2, subjects_dir=subjects_dir)[1] +# # Split supramarginal in 2 and get posterior part +# temp_labels[2] = temp_labels[2].split(2, subjects_dir=subjects_dir)[0] +# temp_labels[3] = temp_labels[3].split(2, subjects_dir=subjects_dir)[0] + +# for hem in range(2): +# PCC_p1 = temp_labels[hem] +# for i in range(1,len(temp_labels)//2): +# PCC_p2 = temp_labels[hem+2*i] +# PCC_p1 = PCC_p1.__add__(PCC_p2) +# PCC_p1.name = "IPL-{}".format(PCC_p1.hemi) +# labels.append(PCC_p1) + +# ### Orbital gyrus +# # From their figure it seems to correspond to orbital part of inferior frontal gyrus +# label_aparc_names0 = ["G_front_inf-Orbital"] +# for i in range(len(label_aparc_names0)): +# labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] +# for i2 in range(len(labels_aparc_idx)): +# temp_labels = labels_aparc[labels_aparc_idx[i2]].copy() +# temp_labels.name = "ORB-{}".format(temp_labels.hemi) +# labels.append(temp_labels) + +# ### Middle temporal gyrus +# # From their figure it seems to only be 1/4 of MTG at the 2nd to last caudal part +# label_aparc_names0 = ["G_temporal_middle"] +# for i in range(len(label_aparc_names0)): +# labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] +# for i2 in range(len(labels_aparc_idx)): +# temp_labels = labels_aparc[labels_aparc_idx[i2]].copy() +# temp_labels = temp_labels.split(4, subjects_dir=subjects_dir)[1] +# temp_labels.name = "MTG-{}".format(temp_labels.hemi) +# labels.append(temp_labels) + +# ### Anterior middle frontal gyrus +# label_aparc_names0 = ["G_front_middle"] +# for i in range(len(label_aparc_names0)): +# labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] +# for i2 in range(len(labels_aparc_idx)): +# temp_labels = labels_aparc[labels_aparc_idx[i2]].copy() +# temp_labels = temp_labels.split(2, subjects_dir=subjects_dir)[1] +# temp_labels.name = "AMFG-{}".format(temp_labels.hemi) +# labels.append(temp_labels) + +# ### Insula +# label_aparc_names0 = ["G_Ins_lg_and_S_cent_ins","G_insular_short"] +# temp_labels = [] +# for i in range(len(label_aparc_names0)): +# labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] +# for i2 in range(len(labels_aparc_idx)): +# temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy()) +# for hem in range(2): +# PCC_p1 = temp_labels[hem] +# for i in range(1,len(temp_labels)//2): +# PCC_p2 = temp_labels[hem+2*i] +# PCC_p1 = PCC_p1.__add__(PCC_p2) +# PCC_p1.name = "INS-{}".format(PCC_p1.hemi) +# labels.append(PCC_p1) + +# ### (Dorsal) Anterior Cingulate Cortex +# label_aparc_names0 = ["G_and_S_cingul-Ant"] +# temp_labels = [] +# for i in range(len(label_aparc_names0)): +# labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] +# for i2 in range(len(labels_aparc_idx)): +# temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy()) +# temp_labels[-1].name = "ACC-{}".format(temp_labels[-1].hemi) +# # Combine the 2 hemisphere in 1 label +# labels.append(temp_labels[0].__add__(temp_labels[1])) + +# ### Supramarginal Gyrus +# label_aparc_names0 = ["G_pariet_inf-Supramar"] +# for i in range(len(label_aparc_names0)): +# labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] +# for i2 in range(len(labels_aparc_idx)): +# temp_labels = labels_aparc[labels_aparc_idx[i2]].copy() +# temp_labels.name = "SUP-{}".format(temp_labels.hemi) +# labels.append(temp_labels) + +# print("{} ROIs have been defined".format(len(labels))) + +# # # Visualize positions +# # fig = plt.figure() +# # ax = fig.add_subplot(111, projection="3d") +# # for i in range(0,3): +# # temp_pos = temp_labels[i].pos +# # ax.scatter(temp_pos[:,0],temp_pos[:,1],temp_pos[:,2], marker="o", alpha=0.1) +# # # Add to plot +# # ax.scatter(labels[-1].pos[:,0],labels[-1].pos[:,1],labels[-1].pos[:,2], marker="o") + +# # # Visualize the labels +# # # temp_l = labels_aparc[labels_aparc_idx[0]] +# # temp_l = labels[-2] +# # l_stc = stc[100].in_label(temp_l) +# # l_stc.vertices + +# # l_stc.plot(**surfer_kwargs) + +# # Save the annotation file +# with open("custom_aparc2009_Li_et_al_2022.pkl", "wb") as file: +# pickle.dump(labels, file) + +# # %% Calculate orthogonalized power envelope connectivity in source space +# # In non-interpolated channels +# # Updated 22/1 - 2021 to use delta = 1/81 and assumption +# # about non-correlated and equal variance noise covariance matrix for channels -# Prepare epochs for estimation of source connectivity -source_epochs = [0]*n_subjects -for i in range(n_subjects): - source_epochs[i] = final_epochs[i].copy() - -### Make forward solutions -# A forward solution is first made for all individuals with no dropped channels -# Afterwards individual forward solutions are made for subjects with bad -# channels that were interpolated in preprocessing and these are dropped -# First forward operator is computed using a template MRI for each dataset -fs_dir = "/home/glia/MNE-fsaverage-data/fsaverage" -subjects_dir = os.path.dirname(fs_dir) -trans = "fsaverage" -src = os.path.join(fs_dir, "bem", "fsaverage-ico-5-src.fif") -bem = os.path.join(fs_dir, "bem", "fsaverage-5120-5120-5120-bem-sol.fif") - -# Read the template sourcespace -sourcespace = mne.read_source_spaces(src) - -temp_idx = 0 # Index with subject that had no bad channels -subject_eeg = source_epochs[temp_idx].copy() -subject_eeg.set_eeg_reference(projection=True) # needed for inverse modelling -# Make forward solution -fwd = mne.make_forward_solution(subject_eeg.info, trans=trans, src=src, - bem=bem, eeg=True, mindist=5.0, n_jobs=1) -# Save forward operator -fname_fwd = "./Source_fwd/fsaverage-fwd.fif" -mne.write_forward_solution(fname_fwd, fwd, overwrite=True) - -# A specific forward solution is also made for each subject with bad channels -with open("./Preprocessing/bad_ch.pkl", "rb") as file: - bad_ch = pickle.load(file) - -All_bad_ch = bad_ch -All_drop_epochs = dropped_epochs_df -All_dropped_ch = [] - -Bad_ch_idx = [idx for idx, item in enumerate(All_bad_ch) if item != 0] -Bad_ch_subjects = All_drop_epochs["Subject_ID"][Bad_ch_idx] -# For each subject with bad channels, drop the channels and make forward operator -for n in range(len(Bad_ch_subjects)): - Subject = Bad_ch_subjects.iloc[n] - try: - Subject_idx = Subject_id.index(Subject) - # Get unique bad channels - Bad_ch0 = All_bad_ch[Bad_ch_idx[n]] - Bad_ch1 = [] - for i2 in range(len(Bad_ch0)): - if type(Bad_ch0[i2]) == list: - for i3 in range(len(Bad_ch0[i2])): - Bad_ch1.append(Bad_ch0[i2][i3]) - elif type(Bad_ch0[i2]) == str: - Bad_ch1.append(Bad_ch0[i2]) - Bad_ch1 = np.unique(Bad_ch1) - # Drop the bad channels - source_epochs[Subject_idx].drop_channels(Bad_ch1) - # Save the overview of dropped channels - All_dropped_ch.append([Subject,Subject_idx,Bad_ch1]) - # Make forward operator - subject_eeg = source_epochs[Subject_idx].copy() - subject_eeg.set_eeg_reference(projection=True) # needed for inverse modelling - # Make forward solution - fwd = mne.make_forward_solution(subject_eeg.info, trans=trans, src=src, - bem=bem, eeg=True, mindist=5.0, n_jobs=1) - # Save forward operator - fname_fwd = "./Source_fwd/fsaverage_{}-fwd.fif".format(Subject) - mne.write_forward_solution(fname_fwd, fwd, overwrite=True) - except: - print(Subject,"was already dropped") - -with open("./Preprocessing/All_datasets_bad_ch.pkl", "wb") as filehandle: - pickle.dump(All_dropped_ch, filehandle) - - -# %% Load forward operators -# Re-use for all subjects without dropped channels -fname_fwd = "./Source_fwd/fsaverage-fwd.fif" -fwd = mne.read_forward_solution(fname_fwd) - -fwd_list = [fwd]*n_subjects - -# Use specific forward solutions for subjects with dropped channels -with open("./Preprocessing/All_datasets_bad_ch.pkl", "rb") as file: - All_dropped_ch = pickle.load(file) - -for i in range(len(All_dropped_ch)): - Subject = All_dropped_ch[i][0] - Subject_idx = All_dropped_ch[i][1] - fname_fwd = "./Source_fwd/fsaverage_{}-fwd.fif".format(Subject) - fwd = mne.read_forward_solution(fname_fwd) - fwd_list[Subject_idx] = fwd - -# Check the correct number of channels are present in fwd -random_point = int(np.random.randint(0,len(All_dropped_ch)-1,1)) -assert len(fwds[All_dropped_ch[random_point][1]].ch_names) == source_epochs[All_dropped_ch[random_point][1]].info["nchan"] - -# %% Make parcellation -# After mapping to source space, I end up with 20484 vertices -# but I wanted to map to fewer sources and not many more -# Thus I need to perform parcellation -# Get labels for FreeSurfer "aparc" cortical parcellation (example with 74 labels/hemi - Destriuex) -labels_aparc = mne.read_labels_from_annot("fsaverage", parc="aparc.a2009s", - subjects_dir=subjects_dir) -labels_aparc = labels_aparc[:-2] # remove unknowns - -labels_aparc_names = [label.name for label in labels_aparc] - -# Manually adding the 31 ROIs (14-lh/rh + 3 in midline) from Toll et al, 2020 -# Making fuction to take subset of a label -def label_subset(label, subset, name="ROI_name"): - label_subset = mne.Label(label.vertices[subset], label.pos[subset,:], - label.values[subset], label.hemi, - name = "{}-{}".format(name,label.hemi), - subject = label.subject, color = None) - return label_subset - -### Visual area 1 (V1 and somatosensory cortex BA1-3) -label_filenames = ["lh.V1.label", "rh.V1.label", - "lh.BA1.label", "rh.BA1.label", - "lh.BA2.label", "rh.BA2.label", - "lh.BA3a.label", "rh.BA3a.label", - "lh.BA3b.label", "rh.BA3b.label"] -labels0 = [0]*len(label_filenames) -for i, filename in enumerate(label_filenames): - labels0[i] = mne.read_label(os.path.join(fs_dir, "label", filename), subject="fsaverage") -# Add V1 to final label variable -labels = labels0[:2] -# Rename to remove redundant hemi information -labels[0].name = "V1-{}".format(labels[0].hemi) -labels[1].name = "V1-{}".format(labels[1].hemi) -# Assign a color -labels[0].color = matplotlib.colors.to_rgba("salmon") -labels[1].color = matplotlib.colors.to_rgba("salmon") -# Combine Brodmann Areas for SMC. Only use vertices ones to avoid duplication error -SMC_labels = labels0[2:] -for hem in range(2): - SMC_p1 = SMC_labels[hem] - for i in range(1,len(SMC_labels)//2): - SMC_p2 = SMC_labels[hem+2*i] - p2_idx = np.isin(SMC_p2.vertices, SMC_p1.vertices, invert=True) - SMC_p21 = label_subset(SMC_p2, p2_idx, "SMC") - SMC_p1 = SMC_p1.__add__(SMC_p21) - SMC_p1.name = SMC_p21.name - # Assign a color - SMC_p1.color = matplotlib.colors.to_rgba("orange") - labels.append(SMC_p1) - -### Inferior frontal junction -# Located at junction between inferior frontal and inferior precentral sulcus -label_aparc_names0 = ["S_front_inf","S_precentral-inf-part"] -temp_labels = [] -for i in range(len(label_aparc_names0)): - labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] - for i2 in range(len(labels_aparc_idx)): - temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy()) - -pos1 = temp_labels[0].pos -pos2 = temp_labels[2].pos -distm = scipy.spatial.distance.cdist(pos1,pos2) -# Find the closest points between the 2 ROIs -l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.001))[0]) # q chosen to correspond to around 10% of ROI -l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[1]) # q chosen to correspond to around 10% of ROI - -IFJ_label_p1 = label_subset(temp_labels[0], l1_idx, "IFJ") -IFJ_label_p2 = label_subset(temp_labels[2], l2_idx, "IFJ") -# Combine the 2 parts -IFJ_label = IFJ_label_p1.__add__(IFJ_label_p2) -IFJ_label.name = IFJ_label_p1.name -# Assign a color -IFJ_label.color = matplotlib.colors.to_rgba("chartreuse") -# Append to final list -labels.append(IFJ_label) - -# Do the same for the right hemisphere -pos1 = temp_labels[1].pos -pos2 = temp_labels[3].pos -distm = scipy.spatial.distance.cdist(pos1,pos2) -# Find the closest points between the 2 ROIs -l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.00075))[0]) # q chosen to correspond to around 10% of ROI -l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[1]) # q chosen to correspond to around 10% of ROI -IFJ_label_p1 = label_subset(temp_labels[1], l1_idx, "IFJ") -IFJ_label_p2 = label_subset(temp_labels[3], l2_idx, "IFJ") -# Combine the 2 parts -IFJ_label = IFJ_label_p1.__add__(IFJ_label_p2) -IFJ_label.name = IFJ_label_p1.name -# Assign a color -IFJ_label.color = matplotlib.colors.to_rgba("chartreuse") -# Append to final list -labels.append(IFJ_label) - -### Intraparietal sulcus -label_aparc_names0 = ["S_intrapariet_and_P_trans"] -labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[0])] -for i in range(len(labels_aparc_idx)): - labels.append(labels_aparc[labels_aparc_idx[i]].copy()) - labels[-1].name = "IPS-{}".format(labels[-1].hemi) - -### Frontal eye field as intersection between middle frontal gyrus and precentral gyrus -label_aparc_names0 = ["G_front_middle","G_precentral"] -temp_labels = [] -for i in range(len(label_aparc_names0)): - labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] - for i2 in range(len(labels_aparc_idx)): - temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy()) - -# Take 10% of middle frontal gyrus closest to precentral gyrus (most posterior) -temp_label0 = temp_labels[0] -G_fm_y = temp_label0.pos[:,1] -thres_G_fm_y = np.sort(G_fm_y)[len(G_fm_y)//10] -idx_p1 = np.where(G_fm_y<thres_G_fm_y)[0] -FEF_label_p1 = label_subset(temp_label0, idx_p1, "FEF") -# Take 10% closest for precentral gyrus (most anterior) -temp_label0 = temp_labels[2] -# I cannot only use y (anterior/posterior) but also need to restrict z-position -G_pre_cen_z = temp_label0.pos[:,2] -thres_G_pre_cen_z = 0.04 # visually inspected threshold -G_pre_cen_y = temp_label0.pos[:,1] -thres_G_pre_cen_y = np.sort(G_pre_cen_y[G_pre_cen_z>thres_G_pre_cen_z])[-len(G_pre_cen_y)//10] # notice - for anterior -idx_p2 = np.where((G_pre_cen_y>thres_G_pre_cen_y) & (G_pre_cen_z>thres_G_pre_cen_z))[0] -FEF_label_p2 = label_subset(temp_label0, idx_p2, "FEF") -# Combine the 2 parts -FEF_label = FEF_label_p1.__add__(FEF_label_p2) -FEF_label.name = FEF_label_p1.name -# Assign a color -FEF_label.color = matplotlib.colors.to_rgba("aqua") -# Append to final list -labels.append(FEF_label) - -# Do the same for the right hemisphere -temp_label0 = temp_labels[1] -G_fm_y = temp_label0.pos[:,1] -thres_G_fm_y = np.sort(G_fm_y)[len(G_fm_y)//10] -idx_p1 = np.where(G_fm_y<thres_G_fm_y)[0] -FEF_label_p1 = label_subset(temp_label0, idx_p1, "FEF") - -temp_label0 = temp_labels[3] -G_pre_cen_z = temp_label0.pos[:,2] -thres_G_pre_cen_z = 0.04 # visually inspected threshold -G_pre_cen_y = temp_label0.pos[:,1] -thres_G_pre_cen_y = np.sort(G_pre_cen_y[G_pre_cen_z>thres_G_pre_cen_z])[-len(G_pre_cen_y)//10] # notice - for anterior -idx_p2 = np.where((G_pre_cen_y>thres_G_pre_cen_y) & (G_pre_cen_z>thres_G_pre_cen_z))[0] -FEF_label_p2 = label_subset(temp_label0, idx_p2, "FEF") -# Combine the 2 parts -FEF_label = FEF_label_p1.__add__(FEF_label_p2) -FEF_label.name = FEF_label_p1.name -# Assign a color -FEF_label.color = matplotlib.colors.to_rgba("aqua") -# Append to final list -labels.append(FEF_label) - -### Supplementary eye fields -# Located at caudal end of frontal gyrus and upper part of paracentral sulcus -label_aparc_names0 = ["G_and_S_paracentral","G_front_sup"] -temp_labels = [] -for i in range(len(label_aparc_names0)): - labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] - for i2 in range(len(labels_aparc_idx)): - temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy()) - -pos1 = temp_labels[0].pos -pos2 = temp_labels[2].pos -distm = scipy.spatial.distance.cdist(pos1,pos2) -# Find the closest points between the 2 ROIs -l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[0]) # q chosen to correspond to around 15% of ROI -l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.005))[1]) # q chosen to correspond to around 10% of ROI -# Notice that superior frontal gyrus is around 4 times bigger than paracentral -len(l1_idx)/pos1.shape[0] -len(l2_idx)/pos2.shape[0] -# Only use upper part -z_threshold = 0.06 # visually inspected -l1_idx = l1_idx[pos1[l1_idx,2] > z_threshold] -l2_idx = l2_idx[pos2[l2_idx,2] > z_threshold] - -SEF_label_p1 = label_subset(temp_labels[0], l1_idx, "SEF") -SEF_label_p2 = label_subset(temp_labels[2], l2_idx, "SEF") -# Combine the 2 parts -SEF_label = SEF_label_p1.__add__(SEF_label_p2) -SEF_label.name = SEF_label_p1.name -# Assign a color -SEF_label.color = matplotlib.colors.to_rgba("royalblue") -# Append to final list -labels.append(SEF_label) - -# Do the same for the right hemisphere -pos1 = temp_labels[1].pos -pos2 = temp_labels[3].pos -distm = scipy.spatial.distance.cdist(pos1,pos2) -# Find the closest points between the 2 ROIs -l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[0]) # q chosen to correspond to around 15% of ROI -l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.005))[1]) # q chosen to correspond to around 10% of ROI -# Notice that superior frontal gyrus is around 4 times bigger than paracentral -len(l1_idx)/pos1.shape[0] -len(l2_idx)/pos2.shape[0] -# Only use upper part -z_threshold = 0.06 # visually inspected -l1_idx = l1_idx[pos1[l1_idx,2] > z_threshold] -l2_idx = l2_idx[pos2[l2_idx,2] > z_threshold] - -SEF_label_p1 = label_subset(temp_labels[1], l1_idx, "SEF") -SEF_label_p2 = label_subset(temp_labels[3], l2_idx, "SEF") -# Combine the 2 parts -SEF_label = SEF_label_p1.__add__(SEF_label_p2) -SEF_label.name = SEF_label_p1.name -# Assign a color -SEF_label.color = matplotlib.colors.to_rgba("royalblue") -# Append to final list -labels.append(SEF_label) - -### Posterior cingulate cortex -label_aparc_names0 = ["G_cingul-Post-dorsal", "G_cingul-Post-ventral"] -temp_labels = [] -for i in range(len(label_aparc_names0)): - labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] - for i2 in range(len(labels_aparc_idx)): - temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy()) -labels0 = [] -for hem in range(2): - PCC_p1 = temp_labels[hem] - for i in range(1,len(temp_labels)//2): - PCC_p2 = temp_labels[hem+2*i] - PCC_p1 = PCC_p1.__add__(PCC_p2) - PCC_p1.name = "PCC-{}".format(PCC_p1.hemi) - labels0.append(PCC_p1) -# Combine the 2 hemisphere in 1 label -labels.append(labels0[0].__add__(labels0[1])) - -### Medial prefrontal cortex -# From their schematic it looks like rostral 1/4 of superior frontal gyrus -label_aparc_names0 = ["G_front_sup"] -temp_labels = [] -for i in range(len(label_aparc_names0)): - labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] - for i2 in range(len(labels_aparc_idx)): - temp_labels0 = labels_aparc[labels_aparc_idx[i2]].copy() - temp_labels0 = temp_labels0.split(4, subjects_dir=subjects_dir)[3] - temp_labels0.name = "mPFC-{}".format(temp_labels0.hemi) - temp_labels.append(temp_labels0) -# Combine the 2 hemisphere in 1 label -labels.append(temp_labels[0].__add__(temp_labels[1])) - -### Angular gyrus -label_aparc_names0 = ["G_pariet_inf-Angular"] -for i in range(len(label_aparc_names0)): - labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] - for i2 in range(len(labels_aparc_idx)): - temp_labels = labels_aparc[labels_aparc_idx[i2]].copy() - temp_labels.name = "ANG-{}".format(temp_labels.hemi) - labels.append(temp_labels) - -### Posterior middle frontal gyrus -label_aparc_names0 = ["G_front_middle"] -for i in range(len(label_aparc_names0)): - labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] - for i2 in range(len(labels_aparc_idx)): - temp_labels = labels_aparc[labels_aparc_idx[i2]].copy() - temp_labels = temp_labels.split(2, subjects_dir=subjects_dir)[0] - temp_labels.name = "PMFG-{}".format(temp_labels.hemi) - labels.append(temp_labels) - -### Inferior parietal lobule -# From their parcellation figure seems to be rostral angular gyrus and posterior supramarginal gyrus -label_aparc_names0 = ["G_pariet_inf-Angular","G_pariet_inf-Supramar"] -temp_labels = [] -for i in range(len(label_aparc_names0)): - labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] - for i2 in range(len(labels_aparc_idx)): - temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy()) -# Split angular in 2 and get rostral part -temp_labels[0] = temp_labels[0].split(2, subjects_dir=subjects_dir)[1] -temp_labels[1] = temp_labels[1].split(2, subjects_dir=subjects_dir)[1] -# Split supramarginal in 2 and get posterior part -temp_labels[2] = temp_labels[2].split(2, subjects_dir=subjects_dir)[0] -temp_labels[3] = temp_labels[3].split(2, subjects_dir=subjects_dir)[0] - -for hem in range(2): - PCC_p1 = temp_labels[hem] - for i in range(1,len(temp_labels)//2): - PCC_p2 = temp_labels[hem+2*i] - PCC_p1 = PCC_p1.__add__(PCC_p2) - PCC_p1.name = "IPL-{}".format(PCC_p1.hemi) - labels.append(PCC_p1) - -### Orbital gyrus -# From their figure it seems to correspond to orbital part of inferior frontal gyrus -label_aparc_names0 = ["G_front_inf-Orbital"] -for i in range(len(label_aparc_names0)): - labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] - for i2 in range(len(labels_aparc_idx)): - temp_labels = labels_aparc[labels_aparc_idx[i2]].copy() - temp_labels.name = "ORB-{}".format(temp_labels.hemi) - labels.append(temp_labels) - -### Middle temporal gyrus -# From their figure it seems to only be 1/4 of MTG at the 2nd to last caudal part -label_aparc_names0 = ["G_temporal_middle"] -for i in range(len(label_aparc_names0)): - labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] - for i2 in range(len(labels_aparc_idx)): - temp_labels = labels_aparc[labels_aparc_idx[i2]].copy() - temp_labels = temp_labels.split(4, subjects_dir=subjects_dir)[1] - temp_labels.name = "MTG-{}".format(temp_labels.hemi) - labels.append(temp_labels) - -### Anterior middle frontal gyrus -label_aparc_names0 = ["G_front_middle"] -for i in range(len(label_aparc_names0)): - labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] - for i2 in range(len(labels_aparc_idx)): - temp_labels = labels_aparc[labels_aparc_idx[i2]].copy() - temp_labels = temp_labels.split(2, subjects_dir=subjects_dir)[1] - temp_labels.name = "AMFG-{}".format(temp_labels.hemi) - labels.append(temp_labels) - -### Insula -label_aparc_names0 = ["G_Ins_lg_and_S_cent_ins","G_insular_short"] -temp_labels = [] -for i in range(len(label_aparc_names0)): - labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] - for i2 in range(len(labels_aparc_idx)): - temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy()) -for hem in range(2): - PCC_p1 = temp_labels[hem] - for i in range(1,len(temp_labels)//2): - PCC_p2 = temp_labels[hem+2*i] - PCC_p1 = PCC_p1.__add__(PCC_p2) - PCC_p1.name = "INS-{}".format(PCC_p1.hemi) - labels.append(PCC_p1) - -### (Dorsal) Anterior Cingulate Cortex -label_aparc_names0 = ["G_and_S_cingul-Ant"] -temp_labels = [] -for i in range(len(label_aparc_names0)): - labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] - for i2 in range(len(labels_aparc_idx)): - temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy()) - temp_labels[-1].name = "ACC-{}".format(temp_labels[-1].hemi) -# Combine the 2 hemisphere in 1 label -labels.append(temp_labels[0].__add__(temp_labels[1])) - -### Supramarginal Gyrus -label_aparc_names0 = ["G_pariet_inf-Supramar"] -for i in range(len(label_aparc_names0)): - labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])] - for i2 in range(len(labels_aparc_idx)): - temp_labels = labels_aparc[labels_aparc_idx[i2]].copy() - temp_labels.name = "SUP-{}".format(temp_labels.hemi) - labels.append(temp_labels) - -print("{} ROIs have been defined".format(len(labels))) - -# # Visualize positions -# fig = plt.figure() -# ax = fig.add_subplot(111, projection="3d") -# for i in range(0,3): -# temp_pos = temp_labels[i].pos -# ax.scatter(temp_pos[:,0],temp_pos[:,1],temp_pos[:,2], marker="o", alpha=0.1) -# # Add to plot -# ax.scatter(labels[-1].pos[:,0],labels[-1].pos[:,1],labels[-1].pos[:,2], marker="o") - -# # Visualize the labels -# # temp_l = labels_aparc[labels_aparc_idx[0]] -# temp_l = labels[-2] -# l_stc = stc[100].in_label(temp_l) -# l_stc.vertices - -# l_stc.plot(**surfer_kwargs) - -# Save the annotation file -with open("custom_aparc2009_Li_et_al_2022.pkl", "wb") as file: - pickle.dump(labels, file) - -# %% Calculate orthogonalized power envelope connectivity in source space -# In non-interpolated channels -# Updated 22/1 - 2021 to use delta = 1/81 and assumption -# about non-correlated and equal variance noise covariance matrix for channels - -# Load -with open("custom_aparc2009_Li_et_al_2022.pkl", "rb") as file: - labels = pickle.load(file) -label_names = [label.name for label in labels] - -# Define function to estimate PEC -def PEC_estimation(x, freq_bands, sfreq=200): - """ - This function takes a source timeseries signal x and performs: - 1. Bandpass filtering - 2. Hilbert transform to yield analytical signal - 3. Compute all to all connectivity by iteratively computing for each pair - a. Orthogonalization - b. Computing power envelopes by squaring the signals |x|^2 - c. Log-transform to enhance normality - d. Pearson's correlation between each pair - e. Fisher's r-to-z transform to enhance normality - The code has been optimized by inspiration from MNE-Python's function: - mne.connectivity.enelope_correlation. +# # Load +# with open("custom_aparc2009_Li_et_al_2022.pkl", "rb") as file: +# labels = pickle.load(file) +# label_names = [label.name for label in labels] + +# # Define function to estimate PEC +# def PEC_estimation(x, freq_bands, sfreq=200): +# """ +# This function takes a source timeseries signal x and performs: +# 1. Bandpass filtering +# 2. Hilbert transform to yield analytical signal +# 3. Compute all to all connectivity by iteratively computing for each pair +# a. Orthogonalization +# b. Computing power envelopes by squaring the signals |x|^2 +# c. Log-transform to enhance normality +# d. Pearson's correlation between each pair +# e. Fisher's r-to-z transform to enhance normality +# The code has been optimized by inspiration from MNE-Python's function: +# mne.connectivity.enelope_correlation. - In MNE-python version < 0.22 there was a bug, but after the fix in 0.22 - the mne function is equivalent to my implementation, although they don't - use epsilon but gives same result with a RuntimeWarning about log(0) +# In MNE-python version < 0.22 there was a bug, but after the fix in 0.22 +# the mne function is equivalent to my implementation, although they don't +# use epsilon but gives same result with a RuntimeWarning about log(0) - IMPORTANT NOTE: - Filtering introduce artifacts for first and last timepoint - The values are very low, more than 1e-12 less than the others - If they are not removed, then they will heavily influence Pearson's - correlation as it is outlier sensitive +# IMPORTANT NOTE: +# Filtering introduce artifacts for first and last timepoint +# The values are very low, more than 1e-12 less than the others +# If they are not removed, then they will heavily influence Pearson's +# correlation as it is outlier sensitive - Inputs: - x - The signal in source space as np.array with shape (ROIs,Timepoints) - freq_bands - The frequency bands of interest as a dictionary e.g. - {"alpha": [8.0, 13.0], "beta": [13.0, 30.0]} - sfreq - The sampling frequency in Hertz +# Inputs: +# x - The signal in source space as np.array with shape (ROIs,Timepoints) +# freq_bands - The frequency bands of interest as a dictionary e.g. +# {"alpha": [8.0, 13.0], "beta": [13.0, 30.0]} +# sfreq - The sampling frequency in Hertz - Output: - The pairwise connectivity matrix - """ - n_roi, n_timepoints = x.shape - n_freq_bands = len(freq_bands) +# Output: +# The pairwise connectivity matrix +# """ +# n_roi, n_timepoints = x.shape +# n_freq_bands = len(freq_bands) - epsilon = 1e-100 # small value to prevent log(0) errors +# epsilon = 1e-100 # small value to prevent log(0) errors - # Filter the signal in the different freq bands - PEC_con0 = np.zeros((n_roi,n_roi,n_freq_bands)) - for fname, frange in freq_bands.items(): - fmin, fmax = [float(interval) for interval in frange] - signal_filtered = mne.filter.filter_data(x, sfreq, fmin, fmax, - fir_design="firwin", verbose=0) - # Filtering on finite signals will yield very low values for first - # and last timepoint, which can create outliers. E.g. 1e-29 compared to 1e-14 - # Outlier sensitive methods, like Pearson's correlation, is therefore - # heavily affected and this systematic error is removed by removing - # the first and last timepoint - signal_filtered = signal_filtered[:,1:-1] +# # Filter the signal in the different freq bands +# PEC_con0 = np.zeros((n_roi,n_roi,n_freq_bands)) +# for fname, frange in freq_bands.items(): +# fmin, fmax = [float(interval) for interval in frange] +# signal_filtered = mne.filter.filter_data(x, sfreq, fmin, fmax, +# fir_design="firwin", verbose=0) +# # Filtering on finite signals will yield very low values for first +# # and last timepoint, which can create outliers. E.g. 1e-29 compared to 1e-14 +# # Outlier sensitive methods, like Pearson's correlation, is therefore +# # heavily affected and this systematic error is removed by removing +# # the first and last timepoint +# signal_filtered = signal_filtered[:,1:-1] - # Hilbert transform - analytic_signal = scipy.signal.hilbert(signal_filtered) - # I will use x and y to keep track of orthogonalization - x0 = analytic_signal - # Get power envelope - x0_mag = np.abs(x0) - # Get scaled conjugate used for orthogonalization estimation - x0_conj_scaled = x0.conj() - x0_conj_scaled /= x0_mag - # Take square power envelope - PEx = np.square(x0_mag) - # Take log transform - lnPEx = np.log(PEx+epsilon) - # Remove mean for Pearson correlation calculation - lnPEx_nomean = lnPEx - np.mean(lnPEx, axis=-1, keepdims=True) # normalize each roi timeseries - # Get std for Pearson correlation calculation - lnPEx_std = np.std(lnPEx, axis=-1) - lnPEx_std[lnPEx_std == 0] = 1 # Prevent std = 0 problems - # Prepare con matrix - con0 = np.zeros((n_roi,n_roi)) - for roi_r, y0 in enumerate(x0): # for each y0 - # Calculate orthogonalized signal y with respect to x for all x - # Using y_ort = imag(y*x_conj/|x|) - # I checked the formula in temp_v3 and it works as intended - # I want to orthogonalize element wise for each timepoint - y0_ort = (y0*x0_conj_scaled).imag - # Here y0_ort.shape = (n_roi, n_timepoints) - # So y is current roi and the first axis gives each x it is orthogonalized to - # Take the abs to get power envelope - y0_ort = np.abs(y0_ort) - # Prevent log(0) error when calculating y_ort on y - y0_ort[roi_r] = 1. # this will be 0 zero after mean subtraction - # Take square power envelope - PEy = np.square(y0_ort) # squared power envelope - # Take log transform - lnPEy = np.log(PEy+epsilon) - # Remove mean for pearson correlation calculation - lnPEy_nomean = lnPEy - np.mean(lnPEy, axis=-1, keepdims=True) - # Get std for Pearson correlation calculation - lnPEy_std = np.std(lnPEy, axis=-1) - lnPEy_std[lnPEy_std == 0] = 1. - # Pearson correlation is expectation of X_nomean * Y_nomean for each time-series divided with standard deviations - PEC = np.mean(lnPEx_nomean*lnPEy_nomean, axis=-1) - PEC /= lnPEx_std - PEC /= lnPEy_std - con0[roi_r] = PEC - # The con0 connectivity matrix should be read as correlation between - # orthogonalized y (row number) and x (column number) - # It is not symmetrical, as cor(roi2_ort, roi1) is not cor(roi1_ort, roi2) - # To make it symmetrical the average of the absolute correlation - # of the 2 possibilities for each pair are taken - con0 = np.abs(con0) - con0 = (con0.T+con0)/2. - # Fisher's z transform - which is equivalent to arctanh - con0 = np.arctanh(con0) - # The diagonal is not 0 as I wanted to avoid numerical errors with log(0) - # and used a small epsilon value. Thus the diagonal is explicitly set to 0 +# # Hilbert transform +# analytic_signal = scipy.signal.hilbert(signal_filtered) +# # I will use x and y to keep track of orthogonalization +# x0 = analytic_signal +# # Get power envelope +# x0_mag = np.abs(x0) +# # Get scaled conjugate used for orthogonalization estimation +# x0_conj_scaled = x0.conj() +# x0_conj_scaled /= x0_mag +# # Take square power envelope +# PEx = np.square(x0_mag) +# # Take log transform +# lnPEx = np.log(PEx+epsilon) +# # Remove mean for Pearson correlation calculation +# lnPEx_nomean = lnPEx - np.mean(lnPEx, axis=-1, keepdims=True) # normalize each roi timeseries +# # Get std for Pearson correlation calculation +# lnPEx_std = np.std(lnPEx, axis=-1) +# lnPEx_std[lnPEx_std == 0] = 1 # Prevent std = 0 problems +# # Prepare con matrix +# con0 = np.zeros((n_roi,n_roi)) +# for roi_r, y0 in enumerate(x0): # for each y0 +# # Calculate orthogonalized signal y with respect to x for all x +# # Using y_ort = imag(y*x_conj/|x|) +# # I checked the formula in temp_v3 and it works as intended +# # I want to orthogonalize element wise for each timepoint +# y0_ort = (y0*x0_conj_scaled).imag +# # Here y0_ort.shape = (n_roi, n_timepoints) +# # So y is current roi and the first axis gives each x it is orthogonalized to +# # Take the abs to get power envelope +# y0_ort = np.abs(y0_ort) +# # Prevent log(0) error when calculating y_ort on y +# y0_ort[roi_r] = 1. # this will be 0 zero after mean subtraction +# # Take square power envelope +# PEy = np.square(y0_ort) # squared power envelope +# # Take log transform +# lnPEy = np.log(PEy+epsilon) +# # Remove mean for pearson correlation calculation +# lnPEy_nomean = lnPEy - np.mean(lnPEy, axis=-1, keepdims=True) +# # Get std for Pearson correlation calculation +# lnPEy_std = np.std(lnPEy, axis=-1) +# lnPEy_std[lnPEy_std == 0] = 1. +# # Pearson correlation is expectation of X_nomean * Y_nomean for each time-series divided with standard deviations +# PEC = np.mean(lnPEx_nomean*lnPEy_nomean, axis=-1) +# PEC /= lnPEx_std +# PEC /= lnPEy_std +# con0[roi_r] = PEC +# # The con0 connectivity matrix should be read as correlation between +# # orthogonalized y (row number) and x (column number) +# # It is not symmetrical, as cor(roi2_ort, roi1) is not cor(roi1_ort, roi2) +# # To make it symmetrical the average of the absolute correlation +# # of the 2 possibilities for each pair are taken +# con0 = np.abs(con0) +# con0 = (con0.T+con0)/2. +# # Fisher's z transform - which is equivalent to arctanh +# con0 = np.arctanh(con0) +# # The diagonal is not 0 as I wanted to avoid numerical errors with log(0) +# # and used a small epsilon value. Thus the diagonal is explicitly set to 0 - # Save to array - PEC_con0[:,:,list(freq_bands.keys()).index(fname)] = con0 - return PEC_con0 - -# Prepare variables -Freq_Bands = {"delta": [1.25, 4.0], - "theta": [4.0, 8.0], - "alpha": [8.0, 13.0], - "beta": [13.0, 30.0], - "gamma": [30.0, 49.0]} -n_freq_bands = len(Freq_Bands) -n_roi = len(labels) - -# Get current time -c_time1 = time.localtime() -c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1) -print(c_time1) - -# PEC analysis -PEC_data_list = [0]*n_subjects -STCs_list = [0]*n_subjects - -# Using inverse operator as generator interferes with concurrent processes -# If I run it for multiple subjects I run out of ram -# Thus concurrent processes are used inside the for loop -def PEC_analysis(input_args): # iterable epoch number and corresponding ts - i2, ts = input_args - # Estimate PEC - PEC_con0 = PEC_estimation(ts, Freq_Bands, sfreq) - print("Finished {} out of {} epochs".format(i2+1,n_epochs)) - return i2, PEC_con0, ts - -for i in range(n_subjects): - n_epochs, n_ch, n_timepoints = source_epochs[i].get_data().shape - # Use different forward solutions depending on number of channels - cur_subject_id = Subject_id[i] - fwd = fwds[i] +# # Save to array +# PEC_con0[:,:,list(freq_bands.keys()).index(fname)] = con0 +# return PEC_con0 + +# # Prepare variables +# Freq_Bands = {"delta": [1.25, 4.0], +# "theta": [4.0, 8.0], +# "alpha": [8.0, 13.0], +# "beta": [13.0, 30.0], +# "gamma": [30.0, 49.0]} +# n_freq_bands = len(Freq_Bands) +# n_roi = len(labels) + +# # Get current time +# c_time1 = time.localtime() +# c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1) +# print(c_time1) + +# # PEC analysis +# PEC_data_list = [0]*n_subjects +# STCs_list = [0]*n_subjects + +# # Using inverse operator as generator interferes with concurrent processes +# # If I run it for multiple subjects I run out of ram +# # Thus concurrent processes are used inside the for loop +# def PEC_analysis(input_args): # iterable epoch number and corresponding ts +# i2, ts = input_args +# # Estimate PEC +# PEC_con0 = PEC_estimation(ts, Freq_Bands, sfreq) +# print("Finished {} out of {} epochs".format(i2+1,n_epochs)) +# return i2, PEC_con0, ts + +# for i in range(n_subjects): +# n_epochs, n_ch, n_timepoints = source_epochs[i].get_data().shape +# # Use different forward solutions depending on number of channels +# cur_subject_id = Subject_id[i] +# fwd = fwds[i] - # Using assumption about equal variance and no correlations I make a diagonal matrix - # Using the default option for 0.2µV std for EEG data - noise_cov = mne.make_ad_hoc_cov(source_epochs[i].info, None) +# # Using assumption about equal variance and no correlations I make a diagonal matrix +# # Using the default option for 0.2µV std for EEG data +# noise_cov = mne.make_ad_hoc_cov(source_epochs[i].info, None) - # Make inverse operator - # Using default depth parameter = 0.8 and free orientation (loose = 1) - inverse_operator = mne.minimum_norm.make_inverse_operator(source_epochs[i].info, - fwd, noise_cov, - loose = 1, depth = 0.8, - verbose = 0) - src_inv = inverse_operator["src"] - # Compute inverse solution and retrieve time series for each label - # Preallocate memory - label_ts = np.full((n_epochs,len(labels),n_timepoints),np.nan) - # Define regularization - snr = 9 # Zhang et al, 2020 used delta = 1/81, which is inverse SNR and correspond to lambda2 - # A for loop is used for each label due to memory issues when doing all labels at the same time - for l in range(len(labels)): - stc = mne.minimum_norm.apply_inverse_epochs(source_epochs[i],inverse_operator, - lambda2 = 1/(snr**2), - label = labels[l], - pick_ori = "vector", - return_generator=False, - method = "MNE", verbose = 0) - # Use PCA to reduce the 3 orthogonal directions to 1 principal direction with max power - # There can be ambiguity about the orientation, thus the one that - # is pointing most "normal", i.e. closest 90 degrees to the skull is used - stc_pca = [0]*len(stc) - for ep in range(n_epochs): - stc_pca[ep], pca_dir = stc[ep].project(directions="pca", src=src_inv) - # Get mean time series for the whole label - temp_label_ts = mne.extract_label_time_course(stc_pca, labels[l], src_inv, mode="mean_flip", - return_generator=False, verbose=0) - # Save to array - label_ts[:,l,:] = np.squeeze(np.array(temp_label_ts)) - print("Finished estimating STC for {} out of {} ROIs".format(l+1,len(labels))) +# # Make inverse operator +# # Using default depth parameter = 0.8 and free orientation (loose = 1) +# inverse_operator = mne.minimum_norm.make_inverse_operator(source_epochs[i].info, +# fwd, noise_cov, +# loose = 1, depth = 0.8, +# verbose = 0) +# src_inv = inverse_operator["src"] +# # Compute inverse solution and retrieve time series for each label +# # Preallocate memory +# label_ts = np.full((n_epochs,len(labels),n_timepoints),np.nan) +# # Define regularization +# snr = 9 # Zhang et al, 2020 used delta = 1/81, which is inverse SNR and correspond to lambda2 +# # A for loop is used for each label due to memory issues when doing all labels at the same time +# for l in range(len(labels)): +# stc = mne.minimum_norm.apply_inverse_epochs(source_epochs[i],inverse_operator, +# lambda2 = 1/(snr**2), +# label = labels[l], +# pick_ori = "vector", +# return_generator=False, +# method = "MNE", verbose = 0) +# # Use PCA to reduce the 3 orthogonal directions to 1 principal direction with max power +# # There can be ambiguity about the orientation, thus the one that +# # is pointing most "normal", i.e. closest 90 degrees to the skull is used +# stc_pca = [0]*len(stc) +# for ep in range(n_epochs): +# stc_pca[ep], pca_dir = stc[ep].project(directions="pca", src=src_inv) +# # Get mean time series for the whole label +# temp_label_ts = mne.extract_label_time_course(stc_pca, labels[l], src_inv, mode="mean_flip", +# return_generator=False, verbose=0) +# # Save to array +# label_ts[:,l,:] = np.squeeze(np.array(temp_label_ts)) +# print("Finished estimating STC for {} out of {} ROIs".format(l+1,len(labels))) - # Free up memory - del stc - - # Prepare variables - sfreq=source_epochs[i].info["sfreq"] - n_epochs = len(source_epochs[i]) - # Estimate the pairwise PEC for each epoch - PEC_con_subject = np.zeros((n_epochs,n_roi,n_roi,n_freq_bands)) - stcs0 = np.zeros((n_epochs,n_roi,int(sfreq)*4)) # 4s epochs - # Make list of arguments to pass into PEC_analysis using the helper func - args = [] - for i2 in range(n_epochs): - args.append((i2,label_ts[i2])) +# # Free up memory +# del stc + +# # Prepare variables +# sfreq=source_epochs[i].info["sfreq"] +# n_epochs = len(source_epochs[i]) +# # Estimate the pairwise PEC for each epoch +# PEC_con_subject = np.zeros((n_epochs,n_roi,n_roi,n_freq_bands)) +# stcs0 = np.zeros((n_epochs,n_roi,int(sfreq)*4)) # 4s epochs +# # Make list of arguments to pass into PEC_analysis using the helper func +# args = [] +# for i2 in range(n_epochs): +# args.append((i2,label_ts[i2])) - with concurrent.futures.ProcessPoolExecutor(max_workers=16) as executor: - for i2, PEC_result, stc_result in executor.map(PEC_analysis, args): # Function and arguments - PEC_con_subject[i2] = PEC_result - stcs0[i2] = stc_result +# with concurrent.futures.ProcessPoolExecutor(max_workers=16) as executor: +# for i2, PEC_result, stc_result in executor.map(PEC_analysis, args): # Function and arguments +# PEC_con_subject[i2] = PEC_result +# stcs0[i2] = stc_result - # Save to list - PEC_data_list[i] = PEC_con_subject # [subject](epoch,ch,ch,freq) - STCs_list[i] = stcs0 # [subject][epoch,roi,timepoint] +# # Save to list +# PEC_data_list[i] = PEC_con_subject # [subject](epoch,ch,ch,freq) +# STCs_list[i] = stcs0 # [subject][epoch,roi,timepoint] - # Print progress - print("Finished {} out of {} subjects".format(i+1,n_subjects)) +# # Print progress +# print("Finished {} out of {} subjects".format(i+1,n_subjects)) -# Get current time -c_time2 = time.localtime() -c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2) -print("Started", c_time1, "\nFinished",c_time2) +# # Get current time +# c_time2 = time.localtime() +# c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2) +# print("Started", c_time1, "\nFinished",c_time2) -with open(Feature_savepath+"PEC_each_epoch_drop_interpol_ch_fix_snr.pkl", "wb") as file: - pickle.dump(PEC_data_list, file) -with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "wb") as file: - pickle.dump(STCs_list, file) +# with open(Feature_savepath+"PEC_each_epoch_drop_interpol_ch_fix_snr.pkl", "wb") as file: +# pickle.dump(PEC_data_list, file) +# with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "wb") as file: +# pickle.dump(STCs_list, file) + +# # # # Load +# # with open(Feature_savepath+"PEC_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file: +# # PEC_data_list = pickle.load(file) # # # Load -# with open(Feature_savepath+"PEC_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file: -# PEC_data_list = pickle.load(file) +# # with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file: +# # STCs_list = pickle.load(file) + +# # Average over eye status +# eye_status = list(source_epochs[0].event_id.keys()) +# n_eye_status = len(eye_status) +# pec_data = np.zeros((n_subjects,n_eye_status,n_roi,n_roi,n_freq_bands)) +# for i in range(n_subjects): +# # Get indices for eyes open and closed +# EC_index = source_epochs[i].events[:,2] == 1 +# EO_index = source_epochs[i].events[:,2] == 2 +# # Average over the indices and save to array +# pec_data[i,0] = np.mean(PEC_data_list[i][EC_index], axis=0) +# pec_data[i,1] = np.mean(PEC_data_list[i][EO_index], axis=0) +# # Only use the lower diagonal as the diagonal should be 0 (or very small due to numerical errors) +# # And it is symmetric +# for f in range(n_freq_bands): +# pec_data[i,0,:,:,f] = np.tril(pec_data[i,0,:,:,f],k=-1) +# pec_data[i,1,:,:,f] = np.tril(pec_data[i,1,:,:,f],k=-1) + +# # Also save as dataframe format for feature selection +# # Convert to Pandas dataframe +# # The dimensions will each be a column with numbers and the last column will be the actual values +# arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, pec_data.shape), indexing="ij"))) + [pec_data.ravel()]) +# pec_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "chx", "chy", "Freq_band", "Value"]) +# # Change from numerical coding to actual values +# eye_status = list(source_epochs[0].event_id.keys()) +# freq_bands_name = list(Freq_Bands.keys()) +# label_names = [label.name for label in labels] + +# index_values = [Subject_id,eye_status,label_names,label_names,freq_bands_name] +# for col in range(len(index_values)): +# col_name = pec_data_df.columns[col] +# for shape in range(pec_data.shape[col]): # notice not dataframe but the array +# pec_data_df.loc[pec_data_df.iloc[:,col] == shape,col_name]\ +# = index_values[col][shape] + +# # Add group status +# Group_status = np.array(["CTRL"]*len(pec_data_df["Subject_ID"])) +# Group_status[np.array([i in cases for i in pec_data_df["Subject_ID"]])] = "PTSD" +# # Add to dataframe +# pec_data_df.insert(3, "Group_status", Group_status) + +# # Remove all diagonal and upper-matrix entries by removing zeros +# pec_data_df = pec_data_df.iloc[pec_data_df["Value"].to_numpy().nonzero()] + +# # Save df +# pec_data_df.to_pickle(os.path.join(Feature_savepath,"pec_data_drop_interpol_ch_df.pkl")) + +# # %% Sparse clustering of PEC for subtyping PTSD group +# # They did it for both eye status together, so all data in one matrix +# # Load PEC df +# # pec_data_df = pd.read_pickle(os.path.join(Feature_savepath,"pec_data_df.pkl")) +# pec_data_df = pd.read_pickle(os.path.join(Feature_savepath,"pec_data_drop_interpol_ch_df.pkl")) + +# # Convert to wide format +# # Make function to add measurement column for indexing +# def add_measurement_column(df, measurement = "Text"): +# dummy_variable = [measurement]*df.shape[0] +# df.insert(1, "Measurement", dummy_variable) +# return df +# # Make function to convert column tuple to string +# def convertTupleHeader(header): +# header = list(header) +# str = "_".join(header) +# return str + +# # Prepare overall dataframe +# PEC_df = pd.DataFrame(Subject_id, columns = ["Subject_ID"]) +# # Add PEC +# pec_data_df = add_measurement_column(pec_data_df, "PEC") +# temp_df = pec_data_df.pivot_table(index="Subject_ID",columns=["Measurement", +# "Eye_status", "chx", "chy", +# "Freq_band"], dropna=True, +# values="Value").reset_index(drop=True) +# # check NaN is properly dropped and subject index is correct +# assert pec_data_df.shape[0] == np.prod(temp_df.shape) +# test1 = pec_data_df.iloc[np.random.randint(n_subjects),:] +# assert test1["Value"] ==\ +# temp_df[test1[1]][test1[2]][test1[3]][test1[5]][test1[6]][Subject_id.index(test1[0])] +# # Fix column names +# temp_df.columns = [convertTupleHeader(temp_df.columns[i]) for i in range(len(temp_df.columns))] + +# PEC_df = pd.concat([PEC_df,temp_df], axis=1) + +# # Add group status +# Groups = ["CTRL", "PTSD"] +# Group_status = np.array([0]*PEC_df.shape[0]) # CTRL = 0 +# Group_status[np.array([i in cases for i in PEC_df["Subject_ID"]])] = 1 # PTSD = 1 +# PEC_df.insert(1, "Group_status", Group_status) + +# # Only use PTSD patient group +# PEC_df2 = PEC_df.loc[PEC_df["Group_status"]==1,:] + +# Subject_info_cols = ["Subject_ID","Group_status"] + +# # Use gridsearch and permutations to estimate gap statistic and use it to +# # determine number of clusters and sparsity s +# # I will use 100 permutations and test 2 to 6 clusters as Zhang 2020 +# # Error when trying to determine Gap statistic for 1 cluster? Also in R package +# max_clusters = 6 +# n_sparsity_feat = 20 +# perm_res = [] +# for k in range(1,max_clusters): +# # Cannot permute with 1 cluster +# n_clusters = k+1 +# x = np.array(PEC_df2.copy().drop(Subject_info_cols, axis=1)) +# perm = pysparcl.cluster.permute_modified(x, k=n_clusters, verbose=True, +# nvals=n_sparsity_feat, nperms=100) +# perm_res.append(perm) + +# # Save the results +# with open(Feature_savepath+"PEC_drop_interpol_ch_kmeans_perm.pkl", "wb") as file: +# pickle.dump(perm_res, file) -# # Load +# # # Load +# # with open(Feature_savepath+"PEC_drop_interpol_ch_kmeans_perm.pkl", "rb") as file: +# # perm_res = pickle.load(file) + +# # Convert results to array +# perm_res_arr = np.zeros((len(perm_res)*n_sparsity_feat,4)) +# for i in range(len(perm_res)): +# _, gaps, sdgaps, wbounds, _ = perm_res[i].values() +# for i2 in range(n_sparsity_feat): +# perm_res_arr[20*i+i2,0] = i+2 # cluster size +# perm_res_arr[20*i+i2,1] = gaps[i2] # gap statistic +# perm_res_arr[20*i+i2,2] = sdgaps[i2] # gap statistic std +# perm_res_arr[20*i+i2,3] = wbounds[i2] # sparsity feature s + +# # For each sparsity s, determine best k using one-standard-error criterion +# # Meaning the cluster and sparsity is chosen for the smallest value of k for a fixed s +# # that fulfill Gap(k) >= Gap(k+1)-std(k+1) +# def one_standard_deviation_search(gaps, std): +# best_gaps = np.argmax(gaps) +# current_gaps = gaps[best_gaps] +# current_std = std[best_gaps] +# current_gaps_idx = best_gaps +# while (gaps[current_gaps_idx-1] >= current_gaps - current_std): +# if current_gaps_idx == 0: +# break +# else: +# current_gaps_idx -= 1 +# current_gaps = gaps[current_gaps_idx] +# current_std = std[current_gaps_idx] +# out = current_gaps, current_std, current_gaps_idx +# return out + +# best_ks = np.zeros((n_sparsity_feat, 2)) +# all_s = np.unique(perm_res_arr[:,3]) +# plt.figure(figsize=(12,12)) +# for i2 in range(n_sparsity_feat): +# current_s = all_s[i2] +# gaps = perm_res_arr[perm_res_arr[:,3] == current_s,1] +# std = perm_res_arr[perm_res_arr[:,3] == current_s,2] +# _, _, idx = one_standard_deviation_search(gaps, std) +# # Save to array +# best_ks[i2,0] = current_s +# best_ks[i2,1] = perm_res_arr[perm_res_arr[:,3] == current_s,0][idx] +# # Plot gap +# plt.errorbar(perm_res_arr[perm_res_arr[:,3] == current_s,0].astype("int"), +# gaps, yerr=std, capsize=5, label = np.round(current_s,3)) +# plt.title("Gap statistic for different fixed s") +# plt.legend(loc=1) +# plt.xlabel("Number of clusters") +# plt.ylabel("Gap statistic") + +# best_k = int(scipy.stats.mode(best_ks[:,1])[0]) + +# # Determine s using fixed k as lowest s within 1 std of max gap statistic +# # According to Witten & Tibshirani, 2010 +# best_gaps_idx = np.argmax(perm_res_arr[perm_res_arr[:,0] == best_k,1]) +# best_gaps = perm_res_arr[perm_res_arr[:,0] == best_k,1][best_gaps_idx] +# best_gaps_std = perm_res_arr[perm_res_arr[:,0] == best_k,2][best_gaps_idx] +# one_std_crit = perm_res_arr[perm_res_arr[:,0] == best_k,1]>=best_gaps-best_gaps_std + +# best_s = np.array([perm_res_arr[perm_res_arr[:,0] == best_k,3][one_std_crit][0]]) + +# # Perform clustering with k clusters +# x = np.array(PEC_df2.copy().drop(Subject_info_cols, axis=1)) +# sparcl = pysparcl.cluster.kmeans(x, k=best_k, wbounds=best_s)[0] + +# # Save the results +# with open(Feature_savepath+"PEC_drop_interpol_ch_sparse_kmeans.pkl", "wb") as file: +# pickle.dump(sparcl, file) + +# # Get overview of the features chosen and summarize feature type with countplot +# nonzero_idx = sparcl["ws"].nonzero() +# sparcl_features = PEC_df2.copy().drop(Subject_info_cols, axis=1).columns[nonzero_idx] + +# # Prepare variables +# Freq_Bands = {"delta": [1.25, 4.0], +# "theta": [4.0, 8.0], +# "alpha": [8.0, 13.0], +# "beta": [13.0, 30.0], +# "gamma": [30.0, 49.0]} +# n_freq_bands = len(Freq_Bands) +# eye_status = list(source_epochs[0].event_id.keys()) +# n_eye_status = len(eye_status) + +# sparcl_feat = [] +# sparcl_feat_counts = [] +# for e in range(n_eye_status): +# ee = eye_status[e] +# for f in range(n_freq_bands): +# ff = list(Freq_Bands.keys())[f] +# temp_feat = sparcl_features[sparcl_features.str.contains(("_"+ee))] +# temp_feat = temp_feat[temp_feat.str.contains(("_"+ff))] +# # Save to list +# sparcl_feat.append(temp_feat) +# sparcl_feat_counts.append(["{}_{}".format(ee,ff), len(temp_feat)]) + +# # Convert the list to dataframe to use in countplot +# sparcl_feat_counts_df = pd.DataFrame(columns=["Eye_status", "Freq_band"]) +# for i in range(len(sparcl_feat_counts)): +# # If this feature type does not exist, then skip it +# if sparcl_feat_counts[i][1] == 0: +# continue +# ee, ff = sparcl_feat_counts[i][0].split("_") +# counts = sparcl_feat_counts[i][1] +# temp_df = pd.DataFrame({"Eye_status":np.repeat(ee,counts), +# "Freq_band":np.repeat(ff,counts)}) +# sparcl_feat_counts_df = sparcl_feat_counts_df.append(temp_df, ignore_index=True) + +# # Fix Freq_band categorical order +# cat_type = pd.CategoricalDtype(categories=list(Freq_Bands.keys()), ordered=True) +# sparcl_feat_counts_df["Freq_band"] = sparcl_feat_counts_df["Freq_band"].astype(cat_type) + +# plt.figure(figsize=(8,8)) +# g = sns.countplot(y="Freq_band", hue="Eye_status", data=sparcl_feat_counts_df) +# plt.title("PEC Sparse K-means features") +# plt.xlabel("Number of non-zero weights") +# plt.ylabel("Frequency Band") + +# # %% Functional connectivity in source space +# # MNE implementation of PLV and wPLI is phase across trials(epochs), e.g. for ERPs +# # I'll use my own manually implemented PLV and wPLI across time and then average across epochs +# # Notice that the new MNE-connectivity library now also takes phase across time + +# sfreq = final_epochs[0].info["sfreq"] +# # error when using less than 5 cycles for spectrum estimation +# # 1Hz too low with epoch length of 4, thus I changed the fmin to 1.25 for delta +# Freq_Bands = {"delta": [1.25, 4.0], +# "theta": [4.0, 8.0], +# "alpha": [8.0, 13.0], +# "beta": [13.0, 30.0], +# "gamma": [30.0, 49.0]} +# n_freq_bands = len(Freq_Bands) +# freq_centers = np.array([2.5,6,10.5,21.5,40]) +# # Convert to tuples for the mne function +# fmin=tuple([list(Freq_Bands.values())[f][0] for f in range(len(Freq_Bands))]) +# fmax=tuple([list(Freq_Bands.values())[f][1] for f in range(len(Freq_Bands))]) + +# # Make linspace array for morlet waves +# freq_centers = np.arange(fmin[0],fmax[-1]+0.25,0.25) +# # Prepare Morlets +# morlets = mne.time_frequency.tfr.morlet(sfreq,freq_centers,n_cycles=3) + +# # Make freqs array for indexing +# freqs0 = [0]*n_freq_bands +# for f in range(n_freq_bands): +# freqs0[f] = freq_centers[(freq_centers>=fmin[f]) & (freq_centers<=fmax[f])] + +# # The in-built connectivity function gives an (n_channel, n_channel, freqs output +# # For loop over subject ID and eye status is implemented +# n_subjects = len(Subject_id) +# eye_status = list(final_epochs[0].event_id.keys()) +# n_eye_status = len(eye_status) +# ch_names = final_epochs[0].info["ch_names"] + +# # Load source labels +# with open("custom_aparc2009_Li_et_al_2022.pkl", "rb") as file: +# labels = pickle.load(file) +# label_names = [label.name for label in labels] +# n_sources = len(label_names) + +# # Connectivity methods +# connectivity_methods = ["coh","imcoh","plv","wpli"] +# n_con_methods=len(connectivity_methods) + +# # Number of pairwise ch connections +# n_ch_connections = scipy.special.comb(n_sources,2, exact=True, repetition=False) + +# # Load source time series # with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file: # STCs_list = pickle.load(file) -# Average over eye status -eye_status = list(source_epochs[0].event_id.keys()) -n_eye_status = len(eye_status) -pec_data = np.zeros((n_subjects,n_eye_status,n_roi,n_roi,n_freq_bands)) -for i in range(n_subjects): - # Get indices for eyes open and closed - EC_index = source_epochs[i].events[:,2] == 1 - EO_index = source_epochs[i].events[:,2] == 2 - # Average over the indices and save to array - pec_data[i,0] = np.mean(PEC_data_list[i][EC_index], axis=0) - pec_data[i,1] = np.mean(PEC_data_list[i][EO_index], axis=0) - # Only use the lower diagonal as the diagonal should be 0 (or very small due to numerical errors) - # And it is symmetric - for f in range(n_freq_bands): - pec_data[i,0,:,:,f] = np.tril(pec_data[i,0,:,:,f],k=-1) - pec_data[i,1,:,:,f] = np.tril(pec_data[i,1,:,:,f],k=-1) - -# Also save as dataframe format for feature selection -# Convert to Pandas dataframe -# The dimensions will each be a column with numbers and the last column will be the actual values -arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, pec_data.shape), indexing="ij"))) + [pec_data.ravel()]) -pec_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "chx", "chy", "Freq_band", "Value"]) -# Change from numerical coding to actual values -eye_status = list(source_epochs[0].event_id.keys()) -freq_bands_name = list(Freq_Bands.keys()) -label_names = [label.name for label in labels] - -index_values = [Subject_id,eye_status,label_names,label_names,freq_bands_name] -for col in range(len(index_values)): - col_name = pec_data_df.columns[col] - for shape in range(pec_data.shape[col]): # notice not dataframe but the array - pec_data_df.loc[pec_data_df.iloc[:,col] == shape,col_name]\ - = index_values[col][shape] - -# Add group status -Group_status = np.array(["CTRL"]*len(pec_data_df["Subject_ID"])) -Group_status[np.array([i in cases for i in pec_data_df["Subject_ID"]])] = "PTSD" -# Add to dataframe -pec_data_df.insert(3, "Group_status", Group_status) - -# Remove all diagonal and upper-matrix entries by removing zeros -pec_data_df = pec_data_df.iloc[pec_data_df["Value"].to_numpy().nonzero()] - -# Save df -pec_data_df.to_pickle(os.path.join(Feature_savepath,"pec_data_drop_interpol_ch_df.pkl")) - -# %% Sparse clustering of PEC for subtyping PTSD group -# They did it for both eye status together, so all data in one matrix -# Load PEC df -# pec_data_df = pd.read_pickle(os.path.join(Feature_savepath,"pec_data_df.pkl")) -pec_data_df = pd.read_pickle(os.path.join(Feature_savepath,"pec_data_drop_interpol_ch_df.pkl")) - -# Convert to wide format -# Make function to add measurement column for indexing -def add_measurement_column(df, measurement = "Text"): - dummy_variable = [measurement]*df.shape[0] - df.insert(1, "Measurement", dummy_variable) - return df -# Make function to convert column tuple to string -def convertTupleHeader(header): - header = list(header) - str = "_".join(header) - return str - -# Prepare overall dataframe -PEC_df = pd.DataFrame(Subject_id, columns = ["Subject_ID"]) -# Add PEC -pec_data_df = add_measurement_column(pec_data_df, "PEC") -temp_df = pec_data_df.pivot_table(index="Subject_ID",columns=["Measurement", - "Eye_status", "chx", "chy", - "Freq_band"], dropna=True, - values="Value").reset_index(drop=True) -# check NaN is properly dropped and subject index is correct -assert pec_data_df.shape[0] == np.prod(temp_df.shape) -test1 = pec_data_df.iloc[np.random.randint(n_subjects),:] -assert test1["Value"] ==\ - temp_df[test1[1]][test1[2]][test1[3]][test1[5]][test1[6]][Subject_id.index(test1[0])] -# Fix column names -temp_df.columns = [convertTupleHeader(temp_df.columns[i]) for i in range(len(temp_df.columns))] - -PEC_df = pd.concat([PEC_df,temp_df], axis=1) - -# Add group status -Groups = ["CTRL", "PTSD"] -Group_status = np.array([0]*PEC_df.shape[0]) # CTRL = 0 -Group_status[np.array([i in cases for i in PEC_df["Subject_ID"]])] = 1 # PTSD = 1 -PEC_df.insert(1, "Group_status", Group_status) - -# Only use PTSD patient group -PEC_df2 = PEC_df.loc[PEC_df["Group_status"]==1,:] - -Subject_info_cols = ["Subject_ID","Group_status"] - -# Use gridsearch and permutations to estimate gap statistic and use it to -# determine number of clusters and sparsity s -# I will use 100 permutations and test 2 to 6 clusters as Zhang 2020 -# Error when trying to determine Gap statistic for 1 cluster? Also in R package -max_clusters = 6 -n_sparsity_feat = 20 -perm_res = [] -for k in range(1,max_clusters): - # Cannot permute with 1 cluster - n_clusters = k+1 - x = np.array(PEC_df2.copy().drop(Subject_info_cols, axis=1)) - perm = pysparcl.cluster.permute_modified(x, k=n_clusters, verbose=True, - nvals=n_sparsity_feat, nperms=100) - perm_res.append(perm) - -# Save the results -with open(Feature_savepath+"PEC_drop_interpol_ch_kmeans_perm.pkl", "wb") as file: - pickle.dump(perm_res, file) - -# # Load -# with open(Feature_savepath+"PEC_drop_interpol_ch_kmeans_perm.pkl", "rb") as file: -# perm_res = pickle.load(file) - -# Convert results to array -perm_res_arr = np.zeros((len(perm_res)*n_sparsity_feat,4)) -for i in range(len(perm_res)): - _, gaps, sdgaps, wbounds, _ = perm_res[i].values() - for i2 in range(n_sparsity_feat): - perm_res_arr[20*i+i2,0] = i+2 # cluster size - perm_res_arr[20*i+i2,1] = gaps[i2] # gap statistic - perm_res_arr[20*i+i2,2] = sdgaps[i2] # gap statistic std - perm_res_arr[20*i+i2,3] = wbounds[i2] # sparsity feature s - -# For each sparsity s, determine best k using one-standard-error criterion -# Meaning the cluster and sparsity is chosen for the smallest value of k for a fixed s -# that fulfill Gap(k) >= Gap(k+1)-std(k+1) -def one_standard_deviation_search(gaps, std): - best_gaps = np.argmax(gaps) - current_gaps = gaps[best_gaps] - current_std = std[best_gaps] - current_gaps_idx = best_gaps - while (gaps[current_gaps_idx-1] >= current_gaps - current_std): - if current_gaps_idx == 0: - break - else: - current_gaps_idx -= 1 - current_gaps = gaps[current_gaps_idx] - current_std = std[current_gaps_idx] - out = current_gaps, current_std, current_gaps_idx - return out - -best_ks = np.zeros((n_sparsity_feat, 2)) -all_s = np.unique(perm_res_arr[:,3]) -plt.figure(figsize=(12,12)) -for i2 in range(n_sparsity_feat): - current_s = all_s[i2] - gaps = perm_res_arr[perm_res_arr[:,3] == current_s,1] - std = perm_res_arr[perm_res_arr[:,3] == current_s,2] - _, _, idx = one_standard_deviation_search(gaps, std) - # Save to array - best_ks[i2,0] = current_s - best_ks[i2,1] = perm_res_arr[perm_res_arr[:,3] == current_s,0][idx] - # Plot gap - plt.errorbar(perm_res_arr[perm_res_arr[:,3] == current_s,0].astype("int"), - gaps, yerr=std, capsize=5, label = np.round(current_s,3)) -plt.title("Gap statistic for different fixed s") -plt.legend(loc=1) -plt.xlabel("Number of clusters") -plt.ylabel("Gap statistic") - -best_k = int(scipy.stats.mode(best_ks[:,1])[0]) - -# Determine s using fixed k as lowest s within 1 std of max gap statistic -# According to Witten & Tibshirani, 2010 -best_gaps_idx = np.argmax(perm_res_arr[perm_res_arr[:,0] == best_k,1]) -best_gaps = perm_res_arr[perm_res_arr[:,0] == best_k,1][best_gaps_idx] -best_gaps_std = perm_res_arr[perm_res_arr[:,0] == best_k,2][best_gaps_idx] -one_std_crit = perm_res_arr[perm_res_arr[:,0] == best_k,1]>=best_gaps-best_gaps_std - -best_s = np.array([perm_res_arr[perm_res_arr[:,0] == best_k,3][one_std_crit][0]]) - -# Perform clustering with k clusters -x = np.array(PEC_df2.copy().drop(Subject_info_cols, axis=1)) -sparcl = pysparcl.cluster.kmeans(x, k=best_k, wbounds=best_s)[0] - -# Save the results -with open(Feature_savepath+"PEC_drop_interpol_ch_sparse_kmeans.pkl", "wb") as file: - pickle.dump(sparcl, file) - -# Get overview of the features chosen and summarize feature type with countplot -nonzero_idx = sparcl["ws"].nonzero() -sparcl_features = PEC_df2.copy().drop(Subject_info_cols, axis=1).columns[nonzero_idx] - -# Prepare variables -Freq_Bands = {"delta": [1.25, 4.0], - "theta": [4.0, 8.0], - "alpha": [8.0, 13.0], - "beta": [13.0, 30.0], - "gamma": [30.0, 49.0]} -n_freq_bands = len(Freq_Bands) -eye_status = list(source_epochs[0].event_id.keys()) -n_eye_status = len(eye_status) - -sparcl_feat = [] -sparcl_feat_counts = [] -for e in range(n_eye_status): - ee = eye_status[e] - for f in range(n_freq_bands): - ff = list(Freq_Bands.keys())[f] - temp_feat = sparcl_features[sparcl_features.str.contains(("_"+ee))] - temp_feat = temp_feat[temp_feat.str.contains(("_"+ff))] - # Save to list - sparcl_feat.append(temp_feat) - sparcl_feat_counts.append(["{}_{}".format(ee,ff), len(temp_feat)]) - -# Convert the list to dataframe to use in countplot -sparcl_feat_counts_df = pd.DataFrame(columns=["Eye_status", "Freq_band"]) -for i in range(len(sparcl_feat_counts)): - # If this feature type does not exist, then skip it - if sparcl_feat_counts[i][1] == 0: - continue - ee, ff = sparcl_feat_counts[i][0].split("_") - counts = sparcl_feat_counts[i][1] - temp_df = pd.DataFrame({"Eye_status":np.repeat(ee,counts), - "Freq_band":np.repeat(ff,counts)}) - sparcl_feat_counts_df = sparcl_feat_counts_df.append(temp_df, ignore_index=True) - -# Fix Freq_band categorical order -cat_type = pd.CategoricalDtype(categories=list(Freq_Bands.keys()), ordered=True) -sparcl_feat_counts_df["Freq_band"] = sparcl_feat_counts_df["Freq_band"].astype(cat_type) - -plt.figure(figsize=(8,8)) -g = sns.countplot(y="Freq_band", hue="Eye_status", data=sparcl_feat_counts_df) -plt.title("PEC Sparse K-means features") -plt.xlabel("Number of non-zero weights") -plt.ylabel("Frequency Band") - -# %% Functional connectivity in source space -# MNE implementation of PLV and wPLI is phase across trials(epochs), e.g. for ERPs -# I'll use my own manually implemented PLV and wPLI across time and then average across epochs -# Notice that the new MNE-connectivity library now also takes phase across time - -sfreq = final_epochs[0].info["sfreq"] -# error when using less than 5 cycles for spectrum estimation -# 1Hz too low with epoch length of 4, thus I changed the fmin to 1.25 for delta -Freq_Bands = {"delta": [1.25, 4.0], - "theta": [4.0, 8.0], - "alpha": [8.0, 13.0], - "beta": [13.0, 30.0], - "gamma": [30.0, 49.0]} -n_freq_bands = len(Freq_Bands) -freq_centers = np.array([2.5,6,10.5,21.5,40]) -# Convert to tuples for the mne function -fmin=tuple([list(Freq_Bands.values())[f][0] for f in range(len(Freq_Bands))]) -fmax=tuple([list(Freq_Bands.values())[f][1] for f in range(len(Freq_Bands))]) - -# Make linspace array for morlet waves -freq_centers = np.arange(fmin[0],fmax[-1]+0.25,0.25) -# Prepare Morlets -morlets = mne.time_frequency.tfr.morlet(sfreq,freq_centers,n_cycles=3) - -# Make freqs array for indexing -freqs0 = [0]*n_freq_bands -for f in range(n_freq_bands): - freqs0[f] = freq_centers[(freq_centers>=fmin[f]) & (freq_centers<=fmax[f])] - -# The in-built connectivity function gives an (n_channel, n_channel, freqs output -# For loop over subject ID and eye status is implemented -n_subjects = len(Subject_id) -eye_status = list(final_epochs[0].event_id.keys()) -n_eye_status = len(eye_status) -ch_names = final_epochs[0].info["ch_names"] - -# Load source labels -with open("custom_aparc2009_Li_et_al_2022.pkl", "rb") as file: - labels = pickle.load(file) -label_names = [label.name for label in labels] -n_sources = len(label_names) - -# Connectivity methods -connectivity_methods = ["coh","imcoh","plv","wpli"] -n_con_methods=len(connectivity_methods) - -# Number of pairwise ch connections -n_ch_connections = scipy.special.comb(n_sources,2, exact=True, repetition=False) - -# Load source time series -with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file: - STCs_list = pickle.load(file) - -# I made my own slightly-optimized PLV & WPLI function -# Version 2 based on Filter + Hilbert instead of Morlets -def calculate_PLV_WPLI_across_time(data): - n_ch, n_time0 = data.shape - x = data.copy() - # Filter the signal in the different freq bands - con_array0 = np.zeros((2,n_ch,n_ch,n_freq_bands)) - # con_array0[con_array0==0] = np.nan - for fname, frange in Freq_Bands.items(): - fmin, fmax = [float(interval) for interval in frange] - signal_filtered = mne.filter.filter_data(x, sfreq, fmin, fmax, - fir_design="firwin", verbose=0) - # Filtering on finite signals will yield very low values for first - # and last timepoint, which can create outliers. E.g. 1e-29 compared to 1e-14 - # This systematic error is removed by removing the first and last timepoint - signal_filtered = signal_filtered[:,1:-1] - # Hilbert transform to get complex signal - analytic_signal = scipy.signal.hilbert(signal_filtered) - # Calculate for the lower diagnonal only as it is symmetric - for ch_r in range(n_ch): - for ch_c in range(n_ch): - if ch_r>ch_c: - # ========================================================================= - # PLV over time correspond to mean across time of the absolute value of - # the circular length of the relative phases. So PLV will be 1 if - # the phases of 2 signals maintain a constant lag - # In equational form: PLV = 1/N * |sum(e^i(phase1-phase2))| - # In code: abs(mean(exp(1i*phase_diff))) - # ========================================================================= - # The real part correspond to the amplitude and the imaginary part can be used to calculate the phase - phase_diff = np.angle(analytic_signal[ch_r])-np.angle(analytic_signal[ch_c]) - # Convert phase difference to complex part i(phase1-phase2) - phase_diff_im = 0*phase_diff+1j*phase_diff - # Take the exponential, then the mean followed by absolute value - PLV = np.abs(np.mean(np.exp(phase_diff_im))) - # Save to array - con_array0[0,ch_r,ch_c,list(Freq_Bands.keys()).index(fname)] = PLV - # ========================================================================= - # PLI over time correspond to the sign of the sine of relative phase - # differences. So PLI will be 1 if one signal is always leading or - # lagging behind the other signal. But it is insensitive to changes in - # relative phase, as long as it is the same signal that leads. - # If 2 signals are almost in phase, they might shift between lead/lag - # due to small fluctuations from noise. This would lead to unstable - # estimation of "phase" synchronisation. - # The wPLI tries to correct for this by weighting the PLI with the - # magnitude of the lag, to attenuate noise sources giving rise to - # near zero phase lag "synchronization" - # In equational form: WPLI = |E{|phase_diff|*sign(phase_diff)}| / E{|phase_diff|} - # ========================================================================= - # Calculate the magnitude of phase differences - phase_diff_mag = np.abs(np.sin(phase_diff)) - # Calculate the signed phase difference (PLI) - sign_phase_diff = np.sign(np.sin(phase_diff)) - # Calculate the nominator (abs and average across time) - WPLI_nominator = np.abs(np.mean(phase_diff_mag*sign_phase_diff)) - # Calculate denominator for normalization - WPLI_denom = np.mean(phase_diff_mag) - # Calculate WPLI - WPLI = WPLI_nominator/WPLI_denom - # Save to array - con_array0[1,ch_r,ch_c,list(Freq_Bands.keys()).index(fname)] = WPLI - return con_array0 - -# Pre-allocatate memory -con_data = np.zeros((n_con_methods,n_subjects,n_eye_status,n_sources,n_sources,n_freq_bands)) -n_epochs_matrix = np.zeros((n_subjects,n_eye_status)) - -# Get current time -c_time = time.localtime() -c_time = time.strftime("%H:%M:%S", c_time) -print(c_time) - -def connectivity_estimation(i): - con_data0 = np.zeros((n_con_methods,n_eye_status,n_sources,n_sources,n_freq_bands)) - con_data0[con_data0==0] = np.nan - n_epochs_matrix0 = np.zeros((n_eye_status)) - for e in range(n_eye_status): - ee = eye_status[e] - eye_idx = final_epochs[i].events[:,2] == e+1 # EC = 1 and EO = 2 - # Get source time series - temp_STC = STCs_list[i][eye_idx] - # Calculate the coherence and ImgCoh for the given subject and eye status - con, freqs, times, n_epochs, n_tapers = spectral_connectivity( - temp_STC, method=connectivity_methods[0:2], - mode="multitaper", sfreq=sfreq, fmin=fmin, fmax=fmax, - faverage=True, verbose=0) - # Save the results in array - con_data0[0,e,:,:,:] = con[0] # coherence - con_data0[1,e,:,:,:] = np.abs(con[1]) # Absolute value of ImgCoh to reflect magnitude of ImgCoh +# # I made my own slightly-optimized PLV & WPLI function +# # Version 2 based on Filter + Hilbert instead of Morlets +# def calculate_PLV_WPLI_across_time(data): +# n_ch, n_time0 = data.shape +# x = data.copy() +# # Filter the signal in the different freq bands +# con_array0 = np.zeros((2,n_ch,n_ch,n_freq_bands)) +# # con_array0[con_array0==0] = np.nan +# for fname, frange in Freq_Bands.items(): +# fmin, fmax = [float(interval) for interval in frange] +# signal_filtered = mne.filter.filter_data(x, sfreq, fmin, fmax, +# fir_design="firwin", verbose=0) +# # Filtering on finite signals will yield very low values for first +# # and last timepoint, which can create outliers. E.g. 1e-29 compared to 1e-14 +# # This systematic error is removed by removing the first and last timepoint +# signal_filtered = signal_filtered[:,1:-1] +# # Hilbert transform to get complex signal +# analytic_signal = scipy.signal.hilbert(signal_filtered) +# # Calculate for the lower diagnonal only as it is symmetric +# for ch_r in range(n_ch): +# for ch_c in range(n_ch): +# if ch_r>ch_c: +# # ========================================================================= +# # PLV over time correspond to mean across time of the absolute value of +# # the circular length of the relative phases. So PLV will be 1 if +# # the phases of 2 signals maintain a constant lag +# # In equational form: PLV = 1/N * |sum(e^i(phase1-phase2))| +# # In code: abs(mean(exp(1i*phase_diff))) +# # ========================================================================= +# # The real part correspond to the amplitude and the imaginary part can be used to calculate the phase +# phase_diff = np.angle(analytic_signal[ch_r])-np.angle(analytic_signal[ch_c]) +# # Convert phase difference to complex part i(phase1-phase2) +# phase_diff_im = 0*phase_diff+1j*phase_diff +# # Take the exponential, then the mean followed by absolute value +# PLV = np.abs(np.mean(np.exp(phase_diff_im))) +# # Save to array +# con_array0[0,ch_r,ch_c,list(Freq_Bands.keys()).index(fname)] = PLV +# # ========================================================================= +# # PLI over time correspond to the sign of the sine of relative phase +# # differences. So PLI will be 1 if one signal is always leading or +# # lagging behind the other signal. But it is insensitive to changes in +# # relative phase, as long as it is the same signal that leads. +# # If 2 signals are almost in phase, they might shift between lead/lag +# # due to small fluctuations from noise. This would lead to unstable +# # estimation of "phase" synchronisation. +# # The wPLI tries to correct for this by weighting the PLI with the +# # magnitude of the lag, to attenuate noise sources giving rise to +# # near zero phase lag "synchronization" +# # In equational form: WPLI = |E{|phase_diff|*sign(phase_diff)}| / E{|phase_diff|} +# # ========================================================================= +# # Calculate the magnitude of phase differences +# phase_diff_mag = np.abs(np.sin(phase_diff)) +# # Calculate the signed phase difference (PLI) +# sign_phase_diff = np.sign(np.sin(phase_diff)) +# # Calculate the nominator (abs and average across time) +# WPLI_nominator = np.abs(np.mean(phase_diff_mag*sign_phase_diff)) +# # Calculate denominator for normalization +# WPLI_denom = np.mean(phase_diff_mag) +# # Calculate WPLI +# WPLI = WPLI_nominator/WPLI_denom +# # Save to array +# con_array0[1,ch_r,ch_c,list(Freq_Bands.keys()).index(fname)] = WPLI +# return con_array0 + +# # Pre-allocatate memory +# con_data = np.zeros((n_con_methods,n_subjects,n_eye_status,n_sources,n_sources,n_freq_bands)) +# n_epochs_matrix = np.zeros((n_subjects,n_eye_status)) + +# # Get current time +# c_time = time.localtime() +# c_time = time.strftime("%H:%M:%S", c_time) +# print(c_time) + +# def connectivity_estimation(i): +# con_data0 = np.zeros((n_con_methods,n_eye_status,n_sources,n_sources,n_freq_bands)) +# con_data0[con_data0==0] = np.nan +# n_epochs_matrix0 = np.zeros((n_eye_status)) +# for e in range(n_eye_status): +# ee = eye_status[e] +# eye_idx = final_epochs[i].events[:,2] == e+1 # EC = 1 and EO = 2 +# # Get source time series +# temp_STC = STCs_list[i][eye_idx] +# # Calculate the coherence and ImgCoh for the given subject and eye status +# con, freqs, times, n_epochs, n_tapers = spectral_connectivity( +# temp_STC, method=connectivity_methods[0:2], +# mode="multitaper", sfreq=sfreq, fmin=fmin, fmax=fmax, +# faverage=True, verbose=0) +# # Save the results in array +# con_data0[0,e,:,:,:] = con[0] # coherence +# con_data0[1,e,:,:,:] = np.abs(con[1]) # Absolute value of ImgCoh to reflect magnitude of ImgCoh - # Calculate PLV and wPLI for each epoch and then average - n_epochs0 = temp_STC.shape[0] - con_data1 = np.zeros((len(connectivity_methods[2:]),n_epochs0,n_sources,n_sources,n_freq_bands)) - for epoch in range(n_epochs0): - # First the data is retrieved and epoch axis dropped - temp_data = temp_STC[epoch,:,:] - # PLV and WPLI value is calculated across timepoints in each freq band - PLV_WPLI_con = calculate_PLV_WPLI_across_time(temp_data) - # Save results - con_data1[0,epoch,:,:,:] = PLV_WPLI_con[0] # phase locking value - con_data1[1,epoch,:,:,:] = PLV_WPLI_con[1] # weighted phase lag index - # Take average across epochs for PLV and wPLI - con_data2 = np.mean(con_data1,axis=1) - # Save to final array - con_data0[2,e,:,:,:] = con_data2[0] # phase locking value - con_data0[3,e,:,:,:] = con_data2[1] # weighted phase lag index - n_epochs_matrix0[e] = n_epochs +# # Calculate PLV and wPLI for each epoch and then average +# n_epochs0 = temp_STC.shape[0] +# con_data1 = np.zeros((len(connectivity_methods[2:]),n_epochs0,n_sources,n_sources,n_freq_bands)) +# for epoch in range(n_epochs0): +# # First the data is retrieved and epoch axis dropped +# temp_data = temp_STC[epoch,:,:] +# # PLV and WPLI value is calculated across timepoints in each freq band +# PLV_WPLI_con = calculate_PLV_WPLI_across_time(temp_data) +# # Save results +# con_data1[0,epoch,:,:,:] = PLV_WPLI_con[0] # phase locking value +# con_data1[1,epoch,:,:,:] = PLV_WPLI_con[1] # weighted phase lag index +# # Take average across epochs for PLV and wPLI +# con_data2 = np.mean(con_data1,axis=1) +# # Save to final array +# con_data0[2,e,:,:,:] = con_data2[0] # phase locking value +# con_data0[3,e,:,:,:] = con_data2[1] # weighted phase lag index +# n_epochs_matrix0[e] = n_epochs - print("{} out of {} finished".format(i+1,n_subjects)) - return i, con_data0, n_epochs_matrix0 - -with concurrent.futures.ProcessPoolExecutor(max_workers=16) as executor: - for i, con_result, n_epochs_mat in executor.map(connectivity_estimation, range(n_subjects)): # Function and arguments - con_data[:,i,:,:,:,:] = con_result - n_epochs_matrix[i] = n_epochs_mat - -# Get current time -c_time = time.localtime() -c_time = time.strftime("%H:%M:%S", c_time) -print(c_time) - -# Save the results -np.save(Feature_savepath+"Source_drop_interpol_ch_connectivity_measures_data.npy", con_data) # (con_measure,subject,eye,ch,ch,freq) - -# Also save as dataframe format for feature selection -# Convert to Pandas dataframe -# The dimensions will each be a column with numbers and the last column will be the actual values -arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, con_data.shape), indexing="ij"))) + [con_data.ravel()]) -con_data_df = pd.DataFrame(arr, columns = ["Con_measurement", "Subject_ID", "Eye_status", "chx", "chy", "Freq_band", "Value"]) -# Change from numerical coding to actual values -eye_status = list(final_epochs[0].event_id.keys()) -freq_bands_name = list(Freq_Bands.keys()) - -index_values = [connectivity_methods,Subject_id,eye_status,label_names,label_names,freq_bands_name] -for col in range(len(index_values)): - col_name = con_data_df.columns[col] - for shape in range(con_data.shape[col]): # notice not dataframe but the array - con_data_df.loc[con_data_df.iloc[:,col] == shape,col_name]\ - = index_values[col][shape] - -# Add group status -Group_status = np.array(["CTRL"]*len(con_data_df["Subject_ID"])) -Group_status[np.array([i in cases for i in con_data_df["Subject_ID"]])] = "PTSD" -# Add to dataframe -con_data_df.insert(3, "Group_status", Group_status) - -# Remove all diagonal and upper-matrix entries -con_data_df = con_data_df.iloc[con_data_df["Value"].to_numpy().nonzero()] - -# Save df -con_data_df.to_pickle(os.path.join(Feature_savepath,"con_data_source_drop_interpol_df.pkl")) - -# %% Estimate Granger's Causality in source space -# Load source labels -with open("custom_aparc2009_Li_et_al_2022.pkl", "rb") as file: - labels = pickle.load(file) -label_names = [label.name for label in labels] -n_sources = len(label_names) - -# Load source time series -with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file: - STCs_list = pickle.load(file) - -# Granger's causality might be influenced by volume conduction, thus working with CSD might be beneficial -# But since I already used source modelling to alleviate this problem I will not apply CSD -# Barrett et al, 2012 also do not apply CSD on source GC - -# GC assumes stationarity, thus I will test for stationarity using ADF test -# The null hypothesis of ADF is that it has unit root, i.e. is non-stationary -# I will test how many can reject the null hypothesis, i.e. are stationary - -# Due to the low numerical values in STC the ADF test is unstable, thus I multiply it to be around 1e0 - -stationary_test_arr = [0]*n_subjects -n_tests = [0]*n_subjects -for i in range(n_subjects): - # Get data - data_arr = STCs_list[i] - # Get shape - n_epochs, n_channels, n_timepoints = data_arr.shape - # Create array for indices to print out progress - ep_progress_idx = np.arange(n_epochs//5,n_epochs,n_epochs//5) - # Calculate number of tests performed for each subject - n_tests[i] = n_epochs*n_channels - # Prepare empty array (with 2's as 0 and 1 will be used) - stationary_test_arr0 = np.zeros((n_epochs,n_channels))+2 # make array of 2's - for ep in range(n_epochs): - for c in range(n_channels): - ADF = adfuller(data_arr[ep,c,:]*1e14) # multilying with a constant does not change ADF, but helps against numerical instability - p_value = ADF[1] - if p_value < 0.05: - stationary_test_arr0[ep,c] = True # Stationary set to 1 - else: - stationary_test_arr0[ep,c] = False # Non-stationary set to 0 - # Print partial progress - if len(np.where(ep_progress_idx==ep)[0]) > 0: - print("Finished epoch number: {} out of {}".format(ep,n_epochs)) - # Indices that were not tested - no_test_idx = np.where(stationary_test_arr0==2)[0] - if len(no_test_idx) > 0: - print("An unexpected error occurred and {} was not tested".format(no_test_idx)) - # Save to list - stationary_test_arr[i] = stationary_test_arr0 - # Print progress - print("Finished subject {} out of {}".format(i+1,n_subjects)) - -with open(Stat_savepath+"Source_drop_interpol_GC_stationarity_tests.pkl", "wb") as filehandle: - # The data is stored as binary data stream - pickle.dump(stationary_test_arr, filehandle) - -# I used a threshold of 0.05 -# This means that on average I would expect 5% false positives among the tests that showed significance for stationarity -ratio_stationary = [0]*n_subjects -for i in range(n_subjects): - # Ratio of tests that showed stationarity - ratio_stationary[i] = np.sum(stationary_test_arr[i])/n_tests[i] - -print("Ratio of stationary time series: {0:.3f}".format(np.mean(ratio_stationary))) # 88% - -# The pre-processing have already ensured that most of the data fulfills the stationarity assumption. - -# Divide the data into eyes closed and open -ch_names = label_names -n_channels = len(ch_names) - -STC_eye_data = [] -for i in range(n_subjects): - # Get index for eyes open and eyes closed - EC_index = final_epochs[i].events[:,2] == 1 - EO_index = final_epochs[i].events[:,2] == 2 - # Get the data - EC_epoch_data = STCs_list[i][EC_index,:,:] # eye index - EO_epoch_data = STCs_list[i][EO_index,:,:] - # Save to list - STC_eye_data.append([EC_epoch_data, EO_epoch_data]) - -# Make each epoch a TimeSeries object -# Input for TimeSeries is: (ch, time) -eye_status = list(final_epochs[0].event_id.keys()) -n_eye_status = len(eye_status) -sfreq = final_epochs[0].info["sfreq"] - -Timeseries_data = [] -for i in range(n_subjects): - temp_list1 = [] - for e in range(n_eye_status): - temp_list2 = [] - n_epochs = STC_eye_data[i][e].shape[0] - for ep in range(n_epochs): - # Convert to TimeSeries - time_series = nts.TimeSeries(STC_eye_data[i][e][ep,:,:], sampling_rate=sfreq) - # Save the object - temp_list2.append(time_series) - # Save the timeseries across eye status - temp_list1.append(temp_list2) - # Save the timeseries across subjects - Timeseries_data.append(temp_list1) # output [subject][eye][epoch](ch,time) - -# Test multiple specified model orders of AR models, each combination has its own model -m_orders = np.linspace(1,25,25) # the model orders tested -m_orders = np.round(m_orders) -n_timepoints = len(Timeseries_data[0][0][0]) -n_ch_combinations = scipy.special.comb(n_channels,2, exact=True, repetition=False) - -# To reduce computation time I only test representative epochs (1 from each 1 min session) -# There will be 5 epochs from eyes closed and 5 from eyes open -n_rep_epoch = 5 -# The subjects have different number of epochs due to dropped epochs -gaps_trials_idx = np.load("Gaps_trials_idx.npy") # time_points between sessions -# I convert the gap time points to epoch number used as representative epoch -epoch_idx = np.zeros((n_subjects,n_eye_status,n_rep_epoch), dtype=int) # prepare array -epoch_idx[:,:,0:4] = np.round(gaps_trials_idx/n_timepoints,0)-8 # take random epoch from sessions 1 to 4 -epoch_idx[:,:,4] = np.round(gaps_trials_idx[:,:,3]/n_timepoints,0)+5 # take random epoch from session 5 - -# Checking if all epoch idx exists -for i in range(n_subjects): - EC_index = final_epochs[i].events[:,2] == 1 - EO_index = final_epochs[i].events[:,2] == 2 - assert np.sum(EC_index) >= epoch_idx[i,0,4] - assert np.sum(EO_index) >= epoch_idx[i,1,4] - -# Prepare model order estimation -AIC_arr = np.zeros((len(m_orders),n_subjects,n_eye_status,n_rep_epoch,n_ch_combinations)) -BIC_arr = np.zeros((len(m_orders),n_subjects,n_eye_status,n_rep_epoch,n_ch_combinations)) - -def GC_model_order_est(i): - AIC_arr0 = np.zeros((len(m_orders),n_eye_status,n_rep_epoch,n_ch_combinations)) - BIC_arr0 = np.zeros((len(m_orders),n_eye_status,n_rep_epoch,n_ch_combinations)) - for e in range(n_eye_status): - n_epochs = STC_eye_data[i][e].shape[0] - N_total = n_timepoints*n_epochs # total number of datapoints for specific eye condition - for ep in range(n_rep_epoch): - epp = epoch_idx[i,e,ep] - for o in range(len(m_orders)): - order = int(m_orders[o]) - # Make the Granger Causality object - GCA1 = nta.GrangerAnalyzer(Timeseries_data[i][e][epp-1], order=order, - n_freqs=2000) - for c in range(n_ch_combinations): - # Retrieve error covariance matrix for all combinations - ecov = np.array(list(GCA1.error_cov.values())) - # Calculate AIC - AIC = ntsu.akaike_information_criterion(ecov[c,:,:], p = n_channels, - m=order, Ntotal=N_total) - # Calculate BIC - BIC = ntsu.bayesian_information_criterion(ecov[c,:,:], p = n_channels, - m=order, Ntotal=N_total) - # Save the information criterions - AIC_arr0[o,e,ep,c] = AIC - BIC_arr0[o,e,ep,c] = BIC - - print("{} out of {} finished testing".format(i+1,n_subjects)) - return i, AIC_arr0, BIC_arr0 - -# Get current time -c_time1 = time.localtime() -c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1) -print(c_time1) - -with concurrent.futures.ProcessPoolExecutor() as executor: - for i, AIC_result, BIC_result in executor.map(GC_model_order_est, range(n_subjects)): # Function and arguments - AIC_arr[:,i] = AIC_result - BIC_arr[:,i] = BIC_result - -# Get current time -c_time2 = time.localtime() -c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2) -print("Started", c_time1, "\nCurrent Time",c_time2) - -# Save the AIC and BIC results -np.save(Feature_savepath+"AIC_Source_drop_interpol_GC_model_order.npy", AIC_arr) # (m. order, subject, eye, epoch, combination) -np.save(Feature_savepath+"BIC_Source_drop_interpol_GC_model_order.npy", BIC_arr) # (m. order, subject, eye, epoch, combination) - -# Load data -AIC_arr = np.load(Feature_savepath+"AIC_Source_drop_interpol_GC_model_order.npy") -BIC_arr = np.load(Feature_savepath+"BIC_Source_drop_interpol_GC_model_order.npy") - -# Average across all subjects, eye status, epochs and combinations -plt.figure(figsize=(8,6)) -plt.plot(m_orders, np.nanmean(AIC_arr, axis=(1,2,3,4)), label="AIC") -plt.plot(m_orders, np.nanmean(BIC_arr, axis=(1,2,3,4)), label="BIC") -plt.title("Average information criteria value") -plt.xlabel("Model order (Lag)") -plt.legend() - -np.sum(np.isnan(AIC_arr))/AIC_arr.size # around 0.07% NaN due to non-convergence -np.sum(np.isnan(BIC_arr))/BIC_arr.size # around 0.07% NaN due to non-convergence - -# If we look at each subject -mean_subject_AIC = np.nanmean(AIC_arr, axis=(2,3,4)) - -plt.figure(figsize=(8,6)) -for i in range(n_subjects): - plt.plot(m_orders, mean_subject_AIC[:,i]) -plt.title("Average AIC for each subject") -plt.xlabel("Model order (Lag)") - -mean_subject_BIC = np.nanmean(BIC_arr, axis=(2,3,4)) -plt.figure(figsize=(8,6)) -for i in range(n_subjects): - plt.plot(m_orders, mean_subject_BIC[:,i]) -plt.title("Average BIC for each subject") -plt.xlabel("Model order (Lag)") - -# We see that for many cases in BIC, it does not converge. Monotonic increasing! - -# We can look at the distribution of chosen order for each time series analyzed -# I.e. I will find the minima in model order for each model -AIC_min_arr = np.argmin(AIC_arr, axis=0) -BIC_min_arr = np.argmin(BIC_arr, axis=0) - -# Plot the distributions of the model order chosen -plt.figure(figsize=(8,6)) -sns.distplot(AIC_min_arr.reshape(-1)+1, kde=False, norm_hist=True, - bins=np.linspace(0.75,30.25,60), label="AIC") -plt.ylabel("Frequency density") -plt.xlabel("Model order") -plt.title("AIC Model Order Estimation") - -plt.figure(figsize=(8,6)) -sns.distplot(BIC_min_arr.reshape(-1)+1, kde=False, norm_hist=True, color="blue", - bins=np.linspace(0.75,30.25,60), label="BIC") -plt.ylabel("Frequency density") -plt.xlabel("Model order") -plt.title("BIC Model Order Estimation") -# It is clear from the BIC model that most have model order 1 -# which reflect their monotonic increasing nature without convergence -# Thus I will only use AIC - -# There is a bias variance trade-off with model order [Stokes & Purdon, 2017] -# Lower order is associated with higher bias and higher order with variance -# I will choose the model order that is chosen the most (i.e. majority voting) -AR_order = int(np.nanquantile(AIC_min_arr.reshape(-1), q=0.5)) -# Order = 5 - -# Calculate Granger Causality for each subject, eye and epoch -Freq_Bands = {"delta": [1.25, 4.0], - "theta": [4.0, 8.0], - "alpha": [8.0, 13.0], - "beta": [13.0, 30.0], - "gamma": [30.0, 49.0]} -n_freq_bands = len(Freq_Bands) - -# Pre-allocate memory -GC_data = np.zeros((2,n_subjects,n_eye_status,n_channels,n_channels,n_freq_bands)) +# print("{} out of {} finished".format(i+1,n_subjects)) +# return i, con_data0, n_epochs_matrix0 + +# with concurrent.futures.ProcessPoolExecutor(max_workers=16) as executor: +# for i, con_result, n_epochs_mat in executor.map(connectivity_estimation, range(n_subjects)): # Function and arguments +# con_data[:,i,:,:,:,:] = con_result +# n_epochs_matrix[i] = n_epochs_mat + +# # Get current time +# c_time = time.localtime() +# c_time = time.strftime("%H:%M:%S", c_time) +# print(c_time) + +# # Save the results +# np.save(Feature_savepath+"Source_drop_interpol_ch_connectivity_measures_data.npy", con_data) # (con_measure,subject,eye,ch,ch,freq) + +# # Also save as dataframe format for feature selection +# # Convert to Pandas dataframe +# # The dimensions will each be a column with numbers and the last column will be the actual values +# arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, con_data.shape), indexing="ij"))) + [con_data.ravel()]) +# con_data_df = pd.DataFrame(arr, columns = ["Con_measurement", "Subject_ID", "Eye_status", "chx", "chy", "Freq_band", "Value"]) +# # Change from numerical coding to actual values +# eye_status = list(final_epochs[0].event_id.keys()) +# freq_bands_name = list(Freq_Bands.keys()) + +# index_values = [connectivity_methods,Subject_id,eye_status,label_names,label_names,freq_bands_name] +# for col in range(len(index_values)): +# col_name = con_data_df.columns[col] +# for shape in range(con_data.shape[col]): # notice not dataframe but the array +# con_data_df.loc[con_data_df.iloc[:,col] == shape,col_name]\ +# = index_values[col][shape] + +# # Add group status +# Group_status = np.array(["CTRL"]*len(con_data_df["Subject_ID"])) +# Group_status[np.array([i in cases for i in con_data_df["Subject_ID"]])] = "PTSD" +# # Add to dataframe +# con_data_df.insert(3, "Group_status", Group_status) + +# # Remove all diagonal and upper-matrix entries +# con_data_df = con_data_df.iloc[con_data_df["Value"].to_numpy().nonzero()] + +# # Save df +# con_data_df.to_pickle(os.path.join(Feature_savepath,"con_data_source_drop_interpol_df.pkl")) + +# # %% Estimate Granger's Causality in source space +# # Load source labels +# with open("custom_aparc2009_Li_et_al_2022.pkl", "rb") as file: +# labels = pickle.load(file) +# label_names = [label.name for label in labels] +# n_sources = len(label_names) + +# # Load source time series +# with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file: +# STCs_list = pickle.load(file) -def GC_analysis(i): - GC_data0 = np.zeros((2,n_eye_status,n_channels,n_channels,n_freq_bands)) - for e in range(n_eye_status): - n_epochs = STC_eye_data[i][e].shape[0] - # Make temporary array to save GC for each epoch - temp_GC_data = np.zeros((2,n_epochs,n_channels,n_channels,n_freq_bands)) - for ep in range(n_epochs): - # Fit the AR model - GCA = nta.GrangerAnalyzer(Timeseries_data[i][e][ep], order=AR_order, - n_freqs=int(800)) # n_Freq=800 correspond to step of 0.25Hz, the same as multitaper for power estimation - for f in range(n_freq_bands): - # Define lower and upper band - f_lb = list(Freq_Bands.values())[f][0] - f_ub = list(Freq_Bands.values())[f][1] - # Get index corresponding to the frequency bands of interest - freq_idx_G = np.where((GCA.frequencies >= f_lb) * (GCA.frequencies < f_ub))[0] - # Calculcate Granger causality quantities - g_xy = np.mean(GCA.causality_xy[:, :, freq_idx_G], -1) # avg on last dimension - g_yx = np.mean(GCA.causality_yx[:, :, freq_idx_G], -1) # avg on last dimension - # Transpose to use same format as con_measurement and save - temp_GC_data[0,ep,:,:,f] = np.transpose(g_xy) - temp_GC_data[1,ep,:,:,f] = np.transpose(g_yx) +# # Granger's causality might be influenced by volume conduction, thus working with CSD might be beneficial +# # But since I already used source modelling to alleviate this problem I will not apply CSD +# # Barrett et al, 2012 also do not apply CSD on source GC + +# # GC assumes stationarity, thus I will test for stationarity using ADF test +# # The null hypothesis of ADF is that it has unit root, i.e. is non-stationary +# # I will test how many can reject the null hypothesis, i.e. are stationary + +# # Due to the low numerical values in STC the ADF test is unstable, thus I multiply it to be around 1e0 + +# stationary_test_arr = [0]*n_subjects +# n_tests = [0]*n_subjects +# for i in range(n_subjects): +# # Get data +# data_arr = STCs_list[i] +# # Get shape +# n_epochs, n_channels, n_timepoints = data_arr.shape +# # Create array for indices to print out progress +# ep_progress_idx = np.arange(n_epochs//5,n_epochs,n_epochs//5) +# # Calculate number of tests performed for each subject +# n_tests[i] = n_epochs*n_channels +# # Prepare empty array (with 2's as 0 and 1 will be used) +# stationary_test_arr0 = np.zeros((n_epochs,n_channels))+2 # make array of 2's +# for ep in range(n_epochs): +# for c in range(n_channels): +# ADF = adfuller(data_arr[ep,c,:]*1e14) # multilying with a constant does not change ADF, but helps against numerical instability +# p_value = ADF[1] +# if p_value < 0.05: +# stationary_test_arr0[ep,c] = True # Stationary set to 1 +# else: +# stationary_test_arr0[ep,c] = False # Non-stationary set to 0 +# # Print partial progress +# if len(np.where(ep_progress_idx==ep)[0]) > 0: +# print("Finished epoch number: {} out of {}".format(ep,n_epochs)) +# # Indices that were not tested +# no_test_idx = np.where(stationary_test_arr0==2)[0] +# if len(no_test_idx) > 0: +# print("An unexpected error occurred and {} was not tested".format(no_test_idx)) +# # Save to list +# stationary_test_arr[i] = stationary_test_arr0 +# # Print progress +# print("Finished subject {} out of {}".format(i+1,n_subjects)) + +# with open(Stat_savepath+"Source_drop_interpol_GC_stationarity_tests.pkl", "wb") as filehandle: +# # The data is stored as binary data stream +# pickle.dump(stationary_test_arr, filehandle) + +# # I used a threshold of 0.05 +# # This means that on average I would expect 5% false positives among the tests that showed significance for stationarity +# ratio_stationary = [0]*n_subjects +# for i in range(n_subjects): +# # Ratio of tests that showed stationarity +# ratio_stationary[i] = np.sum(stationary_test_arr[i])/n_tests[i] + +# print("Ratio of stationary time series: {0:.3f}".format(np.mean(ratio_stationary))) # 88% + +# # The pre-processing have already ensured that most of the data fulfills the stationarity assumption. + +# # Divide the data into eyes closed and open +# ch_names = label_names +# n_channels = len(ch_names) + +# STC_eye_data = [] +# for i in range(n_subjects): +# # Get index for eyes open and eyes closed +# EC_index = final_epochs[i].events[:,2] == 1 +# EO_index = final_epochs[i].events[:,2] == 2 +# # Get the data +# EC_epoch_data = STCs_list[i][EC_index,:,:] # eye index +# EO_epoch_data = STCs_list[i][EO_index,:,:] +# # Save to list +# STC_eye_data.append([EC_epoch_data, EO_epoch_data]) + +# # Make each epoch a TimeSeries object +# # Input for TimeSeries is: (ch, time) +# eye_status = list(final_epochs[0].event_id.keys()) +# n_eye_status = len(eye_status) +# sfreq = final_epochs[0].info["sfreq"] + +# Timeseries_data = [] +# for i in range(n_subjects): +# temp_list1 = [] +# for e in range(n_eye_status): +# temp_list2 = [] +# n_epochs = STC_eye_data[i][e].shape[0] +# for ep in range(n_epochs): +# # Convert to TimeSeries +# time_series = nts.TimeSeries(STC_eye_data[i][e][ep,:,:], sampling_rate=sfreq) +# # Save the object +# temp_list2.append(time_series) +# # Save the timeseries across eye status +# temp_list1.append(temp_list2) +# # Save the timeseries across subjects +# Timeseries_data.append(temp_list1) # output [subject][eye][epoch](ch,time) + +# # Test multiple specified model orders of AR models, each combination has its own model +# m_orders = np.linspace(1,25,25) # the model orders tested +# m_orders = np.round(m_orders) +# n_timepoints = len(Timeseries_data[0][0][0]) +# n_ch_combinations = scipy.special.comb(n_channels,2, exact=True, repetition=False) + +# # To reduce computation time I only test representative epochs (1 from each 1 min session) +# # There will be 5 epochs from eyes closed and 5 from eyes open +# n_rep_epoch = 5 +# # The subjects have different number of epochs due to dropped epochs +# gaps_trials_idx = np.load("Gaps_trials_idx.npy") # time_points between sessions +# # I convert the gap time points to epoch number used as representative epoch +# epoch_idx = np.zeros((n_subjects,n_eye_status,n_rep_epoch), dtype=int) # prepare array +# epoch_idx[:,:,0:4] = np.round(gaps_trials_idx/n_timepoints,0)-8 # take random epoch from sessions 1 to 4 +# epoch_idx[:,:,4] = np.round(gaps_trials_idx[:,:,3]/n_timepoints,0)+5 # take random epoch from session 5 + +# # Checking if all epoch idx exists +# for i in range(n_subjects): +# EC_index = final_epochs[i].events[:,2] == 1 +# EO_index = final_epochs[i].events[:,2] == 2 +# assert np.sum(EC_index) >= epoch_idx[i,0,4] +# assert np.sum(EO_index) >= epoch_idx[i,1,4] + +# # Prepare model order estimation +# AIC_arr = np.zeros((len(m_orders),n_subjects,n_eye_status,n_rep_epoch,n_ch_combinations)) +# BIC_arr = np.zeros((len(m_orders),n_subjects,n_eye_status,n_rep_epoch,n_ch_combinations)) + +# def GC_model_order_est(i): +# AIC_arr0 = np.zeros((len(m_orders),n_eye_status,n_rep_epoch,n_ch_combinations)) +# BIC_arr0 = np.zeros((len(m_orders),n_eye_status,n_rep_epoch,n_ch_combinations)) +# for e in range(n_eye_status): +# n_epochs = STC_eye_data[i][e].shape[0] +# N_total = n_timepoints*n_epochs # total number of datapoints for specific eye condition +# for ep in range(n_rep_epoch): +# epp = epoch_idx[i,e,ep] +# for o in range(len(m_orders)): +# order = int(m_orders[o]) +# # Make the Granger Causality object +# GCA1 = nta.GrangerAnalyzer(Timeseries_data[i][e][epp-1], order=order, +# n_freqs=2000) +# for c in range(n_ch_combinations): +# # Retrieve error covariance matrix for all combinations +# ecov = np.array(list(GCA1.error_cov.values())) +# # Calculate AIC +# AIC = ntsu.akaike_information_criterion(ecov[c,:,:], p = n_channels, +# m=order, Ntotal=N_total) +# # Calculate BIC +# BIC = ntsu.bayesian_information_criterion(ecov[c,:,:], p = n_channels, +# m=order, Ntotal=N_total) +# # Save the information criterions +# AIC_arr0[o,e,ep,c] = AIC +# BIC_arr0[o,e,ep,c] = BIC + +# print("{} out of {} finished testing".format(i+1,n_subjects)) +# return i, AIC_arr0, BIC_arr0 + +# # Get current time +# c_time1 = time.localtime() +# c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1) +# print(c_time1) + +# with concurrent.futures.ProcessPoolExecutor() as executor: +# for i, AIC_result, BIC_result in executor.map(GC_model_order_est, range(n_subjects)): # Function and arguments +# AIC_arr[:,i] = AIC_result +# BIC_arr[:,i] = BIC_result + +# # Get current time +# c_time2 = time.localtime() +# c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2) +# print("Started", c_time1, "\nCurrent Time",c_time2) + +# # Save the AIC and BIC results +# np.save(Feature_savepath+"AIC_Source_drop_interpol_GC_model_order.npy", AIC_arr) # (m. order, subject, eye, epoch, combination) +# np.save(Feature_savepath+"BIC_Source_drop_interpol_GC_model_order.npy", BIC_arr) # (m. order, subject, eye, epoch, combination) + +# # Load data +# AIC_arr = np.load(Feature_savepath+"AIC_Source_drop_interpol_GC_model_order.npy") +# BIC_arr = np.load(Feature_savepath+"BIC_Source_drop_interpol_GC_model_order.npy") + +# # Average across all subjects, eye status, epochs and combinations +# plt.figure(figsize=(8,6)) +# plt.plot(m_orders, np.nanmean(AIC_arr, axis=(1,2,3,4)), label="AIC") +# plt.plot(m_orders, np.nanmean(BIC_arr, axis=(1,2,3,4)), label="BIC") +# plt.title("Average information criteria value") +# plt.xlabel("Model order (Lag)") +# plt.legend() + +# np.sum(np.isnan(AIC_arr))/AIC_arr.size # around 0.07% NaN due to non-convergence +# np.sum(np.isnan(BIC_arr))/BIC_arr.size # around 0.07% NaN due to non-convergence + +# # If we look at each subject +# mean_subject_AIC = np.nanmean(AIC_arr, axis=(2,3,4)) + +# plt.figure(figsize=(8,6)) +# for i in range(n_subjects): +# plt.plot(m_orders, mean_subject_AIC[:,i]) +# plt.title("Average AIC for each subject") +# plt.xlabel("Model order (Lag)") + +# mean_subject_BIC = np.nanmean(BIC_arr, axis=(2,3,4)) +# plt.figure(figsize=(8,6)) +# for i in range(n_subjects): +# plt.plot(m_orders, mean_subject_BIC[:,i]) +# plt.title("Average BIC for each subject") +# plt.xlabel("Model order (Lag)") + +# # We see that for many cases in BIC, it does not converge. Monotonic increasing! + +# # We can look at the distribution of chosen order for each time series analyzed +# # I.e. I will find the minima in model order for each model +# AIC_min_arr = np.argmin(AIC_arr, axis=0) +# BIC_min_arr = np.argmin(BIC_arr, axis=0) + +# # Plot the distributions of the model order chosen +# plt.figure(figsize=(8,6)) +# sns.distplot(AIC_min_arr.reshape(-1)+1, kde=False, norm_hist=True, +# bins=np.linspace(0.75,30.25,60), label="AIC") +# plt.ylabel("Frequency density") +# plt.xlabel("Model order") +# plt.title("AIC Model Order Estimation") + +# plt.figure(figsize=(8,6)) +# sns.distplot(BIC_min_arr.reshape(-1)+1, kde=False, norm_hist=True, color="blue", +# bins=np.linspace(0.75,30.25,60), label="BIC") +# plt.ylabel("Frequency density") +# plt.xlabel("Model order") +# plt.title("BIC Model Order Estimation") +# # It is clear from the BIC model that most have model order 1 +# # which reflect their monotonic increasing nature without convergence +# # Thus I will only use AIC + +# # There is a bias variance trade-off with model order [Stokes & Purdon, 2017] +# # Lower order is associated with higher bias and higher order with variance +# # I will choose the model order that is chosen the most (i.e. majority voting) +# AR_order = int(np.nanquantile(AIC_min_arr.reshape(-1), q=0.5)) +# # Order = 5 + +# # Calculate Granger Causality for each subject, eye and epoch +# Freq_Bands = {"delta": [1.25, 4.0], +# "theta": [4.0, 8.0], +# "alpha": [8.0, 13.0], +# "beta": [13.0, 30.0], +# "gamma": [30.0, 49.0]} +# n_freq_bands = len(Freq_Bands) + +# # Pre-allocate memory +# GC_data = np.zeros((2,n_subjects,n_eye_status,n_channels,n_channels,n_freq_bands)) + +# def GC_analysis(i): +# GC_data0 = np.zeros((2,n_eye_status,n_channels,n_channels,n_freq_bands)) +# for e in range(n_eye_status): +# n_epochs = STC_eye_data[i][e].shape[0] +# # Make temporary array to save GC for each epoch +# temp_GC_data = np.zeros((2,n_epochs,n_channels,n_channels,n_freq_bands)) +# for ep in range(n_epochs): +# # Fit the AR model +# GCA = nta.GrangerAnalyzer(Timeseries_data[i][e][ep], order=AR_order, +# n_freqs=int(800)) # n_Freq=800 correspond to step of 0.25Hz, the same as multitaper for power estimation +# for f in range(n_freq_bands): +# # Define lower and upper band +# f_lb = list(Freq_Bands.values())[f][0] +# f_ub = list(Freq_Bands.values())[f][1] +# # Get index corresponding to the frequency bands of interest +# freq_idx_G = np.where((GCA.frequencies >= f_lb) * (GCA.frequencies < f_ub))[0] +# # Calculcate Granger causality quantities +# g_xy = np.mean(GCA.causality_xy[:, :, freq_idx_G], -1) # avg on last dimension +# g_yx = np.mean(GCA.causality_yx[:, :, freq_idx_G], -1) # avg on last dimension +# # Transpose to use same format as con_measurement and save +# temp_GC_data[0,ep,:,:,f] = np.transpose(g_xy) +# temp_GC_data[1,ep,:,:,f] = np.transpose(g_yx) - # Average over epochs for each person, eye condition, direction and frequency band - temp_GC_epoch_mean = np.nanmean(temp_GC_data, axis=1) # sometimes Log(Sxx/xx_auto_component) is nan - # Save to array - GC_data0[:,e,:,:,:] = temp_GC_epoch_mean +# # Average over epochs for each person, eye condition, direction and frequency band +# temp_GC_epoch_mean = np.nanmean(temp_GC_data, axis=1) # sometimes Log(Sxx/xx_auto_component) is nan +# # Save to array +# GC_data0[:,e,:,:,:] = temp_GC_epoch_mean - print("{} out of {} finished analyzing".format(i+1,n_subjects)) - return i, GC_data0 +# print("{} out of {} finished analyzing".format(i+1,n_subjects)) +# return i, GC_data0 -# Get current time -c_time1 = time.localtime() -c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1) -print(c_time1) +# # Get current time +# c_time1 = time.localtime() +# c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1) +# print(c_time1) -with concurrent.futures.ProcessPoolExecutor() as executor: - for i, GC_result in executor.map(GC_analysis, range(n_subjects)): # Function and arguments - GC_data[:,i] = GC_result +# with concurrent.futures.ProcessPoolExecutor() as executor: +# for i, GC_result in executor.map(GC_analysis, range(n_subjects)): # Function and arguments +# GC_data[:,i] = GC_result -# Get current time -c_time2 = time.localtime() -c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2) -print("Started", c_time1, "\nCurrent Time",c_time2) - -# Output: GC_data (g_xy/g_yx, subject, eye, chx, chy, freq) -# Notice that for g_xy ([0,...]) it means "chy" Granger causes "chx" -# and for g_yx ([1,...]) it means "chx" Granger causes "chy" -# This is due to the transposing which flipped the results on to the lower-part of the diagonal - -# Save the Granger_Causality data -np.save(Feature_savepath+"Source_drop_interpol_GrangerCausality_data.npy", GC_data) - -# Theoretically negative GC values should be impossible, but in practice -# they can still occur due to problems with model fitting (see Stokes & Purdon, 2017) -print("{:.3f}% negative GC values".\ - format(np.sum(GC_data[~np.isnan(GC_data)]<0)/np.sum(~np.isnan(GC_data))*100)) # 0.08% negative values -# These values cannot be interpreted, but seems to occur mostly for true non-causal connections -# Hence I set them to 0 -with np.errstate(invalid="ignore"): # invalid number refers to np.nan, which will be set to False for comparisons - GC_data[(GC_data<0)] = 0 - -# Save as dataframe for further processing with other features -# Convert to Pandas dataframe -# The dimensions will each be a column with numbers and the last column will be the actual values -arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, GC_data.shape), indexing="ij"))) + [GC_data.ravel()]) -GC_data_df = pd.DataFrame(arr, columns = ["GC_direction", "Subject_ID", "Eye_status", "chx", "chy", "Freq_band", "Value"]) -# Change from numerical coding to actual values -eye_status = list(final_epochs[0].event_id.keys()) -freq_bands_name = list(Freq_Bands.keys()) -GC_directions_info = ["chy -> chx", "chx -> chy"] - -index_values = [GC_directions_info,Subject_id,eye_status,ch_names,ch_names,freq_bands_name] -for col in range(len(index_values)): - col_name = GC_data_df.columns[col] - for shape in range(GC_data.shape[col]): # notice not dataframe but the array - GC_data_df.loc[GC_data_df.iloc[:,col] == shape,col_name]\ - = index_values[col][shape] - -# Add group status -Group_status = np.array(["CTRL"]*len(GC_data_df["Subject_ID"])) -Group_status[np.array([i in cases for i in GC_data_df["Subject_ID"]])] = "PTSD" -# Add to dataframe -GC_data_df.insert(3, "Group_status", Group_status) - -# Remove all nan (including diagonal and upper-matrix entries) -GC_data_df = GC_data_df.iloc[np.invert(np.isnan(GC_data_df["Value"].to_numpy()))] - -# Swap ch values for GC_direction chy -> chx (so it is always chx -> chy) -tempchy = GC_data_df[GC_data_df["GC_direction"] == "chy -> chx"]["chy"] # save chy -GC_data_df.loc[GC_data_df["GC_direction"] == "chy -> chx","chy"] =\ - GC_data_df.loc[GC_data_df["GC_direction"] == "chy -> chx","chx"] # overwrite old chy -GC_data_df.loc[GC_data_df["GC_direction"] == "chy -> chx","chx"] = tempchy # overwrite chx - -# Drop the GC_direction column -GC_data_df = GC_data_df.drop("GC_direction", axis=1) - -# Save df -GC_data_df.to_pickle(os.path.join(Feature_savepath,"GC_data_source_drop_interpol_df.pkl")) - -# Testing if df was formatted correctly -expected_GC_values = n_subjects*n_eye_status*n_ch_combinations*n_freq_bands*2 # 2 because it is bidirectional -assert GC_data_df.shape[0] == expected_GC_values -# Testing a random GC value -random_connection = np.random.randint(0,GC_data_df.shape[0]) -test_connection = GC_data_df.iloc[random_connection,:] -i = np.where(Subject_id==test_connection["Subject_ID"])[0] -e = np.where(np.array(eye_status)==test_connection["Eye_status"])[0] -chx = np.where(np.array(ch_names)==test_connection["chx"])[0] -chy = np.where(np.array(ch_names)==test_connection["chy"])[0] -f = np.where(np.array(freq_bands_name)==test_connection["Freq_band"])[0] -value = test_connection["Value"] -if chx < chy: # the GC array is only lower diagonal to save memory - assert GC_data[0,i,e,chy,chx,f] == value -if chx > chy: - assert GC_data[1,i,e,chx,chy,f] == value +# # Get current time +# c_time2 = time.localtime() +# c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2) +# print("Started", c_time1, "\nCurrent Time",c_time2) + +# # Output: GC_data (g_xy/g_yx, subject, eye, chx, chy, freq) +# # Notice that for g_xy ([0,...]) it means "chy" Granger causes "chx" +# # and for g_yx ([1,...]) it means "chx" Granger causes "chy" +# # This is due to the transposing which flipped the results on to the lower-part of the diagonal + +# # Save the Granger_Causality data +# np.save(Feature_savepath+"Source_drop_interpol_GrangerCausality_data.npy", GC_data) + +# # Theoretically negative GC values should be impossible, but in practice +# # they can still occur due to problems with model fitting (see Stokes & Purdon, 2017) +# print("{:.3f}% negative GC values".\ +# format(np.sum(GC_data[~np.isnan(GC_data)]<0)/np.sum(~np.isnan(GC_data))*100)) # 0.08% negative values +# # These values cannot be interpreted, but seems to occur mostly for true non-causal connections +# # Hence I set them to 0 +# with np.errstate(invalid="ignore"): # invalid number refers to np.nan, which will be set to False for comparisons +# GC_data[(GC_data<0)] = 0 + +# # Save as dataframe for further processing with other features +# # Convert to Pandas dataframe +# # The dimensions will each be a column with numbers and the last column will be the actual values +# arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, GC_data.shape), indexing="ij"))) + [GC_data.ravel()]) +# GC_data_df = pd.DataFrame(arr, columns = ["GC_direction", "Subject_ID", "Eye_status", "chx", "chy", "Freq_band", "Value"]) +# # Change from numerical coding to actual values +# eye_status = list(final_epochs[0].event_id.keys()) +# freq_bands_name = list(Freq_Bands.keys()) +# GC_directions_info = ["chy -> chx", "chx -> chy"] + +# index_values = [GC_directions_info,Subject_id,eye_status,ch_names,ch_names,freq_bands_name] +# for col in range(len(index_values)): +# col_name = GC_data_df.columns[col] +# for shape in range(GC_data.shape[col]): # notice not dataframe but the array +# GC_data_df.loc[GC_data_df.iloc[:,col] == shape,col_name]\ +# = index_values[col][shape] + +# # Add group status +# Group_status = np.array(["CTRL"]*len(GC_data_df["Subject_ID"])) +# Group_status[np.array([i in cases for i in GC_data_df["Subject_ID"]])] = "PTSD" +# # Add to dataframe +# GC_data_df.insert(3, "Group_status", Group_status) + +# # Remove all nan (including diagonal and upper-matrix entries) +# GC_data_df = GC_data_df.iloc[np.invert(np.isnan(GC_data_df["Value"].to_numpy()))] + +# # Swap ch values for GC_direction chy -> chx (so it is always chx -> chy) +# tempchy = GC_data_df[GC_data_df["GC_direction"] == "chy -> chx"]["chy"] # save chy +# GC_data_df.loc[GC_data_df["GC_direction"] == "chy -> chx","chy"] =\ +# GC_data_df.loc[GC_data_df["GC_direction"] == "chy -> chx","chx"] # overwrite old chy +# GC_data_df.loc[GC_data_df["GC_direction"] == "chy -> chx","chx"] = tempchy # overwrite chx + +# # Drop the GC_direction column +# GC_data_df = GC_data_df.drop("GC_direction", axis=1) + +# # Save df +# GC_data_df.to_pickle(os.path.join(Feature_savepath,"GC_data_source_drop_interpol_df.pkl")) + +# # Testing if df was formatted correctly +# expected_GC_values = n_subjects*n_eye_status*n_ch_combinations*n_freq_bands*2 # 2 because it is bidirectional +# assert GC_data_df.shape[0] == expected_GC_values +# # Testing a random GC value +# random_connection = np.random.randint(0,GC_data_df.shape[0]) +# test_connection = GC_data_df.iloc[random_connection,:] +# i = np.where(Subject_id==test_connection["Subject_ID"])[0] +# e = np.where(np.array(eye_status)==test_connection["Eye_status"])[0] +# chx = np.where(np.array(ch_names)==test_connection["chx"])[0] +# chy = np.where(np.array(ch_names)==test_connection["chy"])[0] +# f = np.where(np.array(freq_bands_name)==test_connection["Freq_band"])[0] +# value = test_connection["Value"] +# if chx < chy: # the GC array is only lower diagonal to save memory +# assert GC_data[0,i,e,chy,chx,f] == value +# if chx > chy: +# assert GC_data[1,i,e,chx,chy,f] == value diff --git a/Preprocessing.py b/Preprocessing.py index b3381ae6252478b97de7ee18d179f4d4cd768dbd..4c14f0e1803be275fa8ca659b3083b5039841e6c 100644 --- a/Preprocessing.py +++ b/Preprocessing.py @@ -40,6 +40,7 @@ from mne.datasets import eegbci n_subjects = 78 Subject_id = list(range(1,n_subjects+1)) # Download and get filenames +<<<<<<< HEAD data_path = "/data/may2020/QEEG" files = [] for r, d, f in os.walk(data_path): @@ -54,18 +55,37 @@ n_subjects = 51 Subject_id = list(range(1,n_subjects+1)) # Original code to get filenames in a folder data_path = "/data/raw/FOR_DTU/rawEEGforDTU" +======= + +from mne.datasets import eegbci +files = [] +for i in range(n_subjects): + raw_fnames = eegbci.load_data(Subject_id[i], [1,2]) # The first 2 runs + files.append(raw_fnames) +""" +# Original code to get filenames in a folder +data_path = "/Users/benj3542/Desktop/Uni/Noter/Semester_6/Bachelor/Test_bdf" +>>>>>>> 592a824fc18e7b7d354f8c8638fd1d4cda16ad34 # Get filenames holder = [] files = [] for r, d, f in os.walk(data_path): for file in f: if ".bdf" in file: +<<<<<<< HEAD holder.append(os.path.join(r, file)) files.append(holder) holder = [] files.pop(0) """ +======= + holder.append((os.path.join(r, file))) + files.append(holder) + holder = [] +files.pop(0) +""" +>>>>>>> 592a824fc18e7b7d354f8c8638fd1d4cda16ad34 # Eye status anno_to_event = {'Eyes Closed': 1, 'Eyes Open': 2} # manually defined event id eye_status = list(anno_to_event.keys())