diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..bc975d8d2b78991fad68a23c0ac2ab94d094180f Binary files /dev/null and b/.DS_Store differ diff --git a/Autoreject_overview/AR_0_Eyes Closed.png b/Autoreject_overview/AR_0_Eyes Closed.png new file mode 100644 index 0000000000000000000000000000000000000000..d4b63a2a2e1fbb8b43330edf3e922800b5e781c1 Binary files /dev/null and b/Autoreject_overview/AR_0_Eyes Closed.png differ diff --git a/Autoreject_overview/AR_1_Eyes Open.png b/Autoreject_overview/AR_1_Eyes Open.png new file mode 100644 index 0000000000000000000000000000000000000000..06082f94c112ec11696ba51d2bf1af0d6c9ca9a5 Binary files /dev/null and b/Autoreject_overview/AR_1_Eyes Open.png differ diff --git a/FeatureEstimation.py b/FeatureEstimation.py index 10e0e55f36a396d6b22aaba3ea9877c519bd4bde..f1d63aa2e170732ea202fcef53ecfab6ca5603a1 100644 --- a/FeatureEstimation.py +++ b/FeatureEstimation.py @@ -174,12 +174,14 @@ Group_status[np.array([i in cases for i in Subject_id])] = "PTSD" Frontal_chs = ["Fp1", "Fpz", "Fp2", "AFz", "Fz", "F3", "F4", "F7", "F8"] Central_chs = ["Cz", "C3", "C4", "T7", "T8", "FT7", "FC3", "FCz", "FC4", "FT8", "TP7", "CP3", "CPz", "CP4", "TP8"] Posterior_chs = ["Pz", "P3", "P4", "P7", "P8", "POz", "O1", "O2", "Oz"] +Parietal_chs = ["TP7", "CP3", "CPz", "CP4", "TP8", "P7", "P3", "Pz", "P4", "P8", "POz"] -Brain_region_labels = ["Frontal","Central","Posterior"] +Brain_region_labels = ["Frontal","Central","Posterior","Parietal"] Brain_region = np.array(ch_names, dtype = "<U9") Brain_region[np.array([i in Frontal_chs for i in ch_names])] = Brain_region_labels[0] Brain_region[np.array([i in Central_chs for i in ch_names])] = Brain_region_labels[1] Brain_region[np.array([i in Posterior_chs for i in ch_names])] = Brain_region_labels[2] +Brain_region[np.array([i in Parietal_chs for i in ch_names])] = Brain_region_labels[3] # A variable that codes the channels based on M/L localization Left_chs = ["Fp1", "F3", "F7", "FC3", "FT7", "C3", "T7", "CP3", "TP7", "P3", "P7", "O1"] @@ -246,6 +248,9 @@ power_df_sub1 = power_df[(power_df["Quant_status"] == "Absolute")& power_df_sub2 = power_df[(power_df["Quant_status"] == "Absolute")& (power_df["Brain_region"] == "Frontal")& (power_df["Brain_side"] == "Mid")] +# Subset posterior absolute power +power_df_sub3 = power_df[(power_df["Quant_status"] == "Absolute")& + (power_df["Brain_region"] == "Posterior")] # Calculate average frontal power theta @@ -264,6 +269,12 @@ frontal_midline_theta_mean_subject = power_df_sub2[power_df_sub2["Freq_band"] == # Extract all values frontal_midline_theta_subject_values = power_df_sub2[power_df_sub2["Freq_band"] == "theta"] +# Calculate average parietal alpha power +parietal_alpha_mean_subject = power_df_sub3[power_df_sub3["Freq_band"] == "alpha"].\ + groupby(["Subject_ID","Group_status","Eye_status"]).mean().reset_index() +# Extract all values +parietal_alpha_subject_values = power_df_sub3[power_df_sub3["Freq_band"] == "alpha"] + # Convert from dB to raw power frontal_theta_mean_subject["PSD"] = 10**(frontal_theta_mean_subject["PSD"]/10) @@ -271,11 +282,16 @@ frontal_beta_mean_subject["PSD"] = 10**(frontal_beta_mean_subject["PSD"]/10) frontal_midline_theta_mean_subject["PSD"] = 10**(frontal_midline_theta_mean_subject["PSD"]/10) frontal_beta_subject_values["PSD"] = 10**(frontal_beta_subject_values["PSD"]/10) frontal_midline_theta_subject_values["PSD"] = 10**(frontal_midline_theta_subject_values["PSD"]/10) +parietal_alpha_mean_subject["PSD"] = 10**(parietal_alpha_mean_subject["PSD"]/10) +parietal_alpha_subject_values["PSD"] = 10**(parietal_alpha_subject_values["PSD"]/10) +# Safe values frontal_beta_mean_subject.to_pickle(os.path.join(Feature_savepath,"fBMS_df.pkl")) frontal_midline_theta_mean_subject.to_pickle(os.path.join(Feature_savepath,"fMTMS_df.pkl")) frontal_beta_subject_values.to_pickle(os.path.join(Feature_savepath,"fBSV_df.pkl")) frontal_midline_theta_subject_values.to_pickle(os.path.join(Feature_savepath,"fMTSV_df.pkl")) +parietal_alpha_mean_subject.to_pickle(os.path.join(Feature_savepath,"pAMS_df.pkl")) +parietal_alpha_subject_values.to_pickle(os.path.join(Feature_savepath,"pASV_df.pkl")) # Calculate mean for each group and take ratio for whole group # To confirm trend observed in PSD plots @@ -397,6 +413,8 @@ assert asymmetry[i,e,f,r] == asymmetry_df.iloc[random_point]["Asymmetry_score"] # Save the dataframe asymmetry_df.to_pickle(os.path.join(Feature_savepath,"asymmetry_df.pkl")) +""" + # %% Using FOOOF # Peak alpha frequency (PAF) and 1/f exponent (OOF) # Using the FOOOF algorithm (Fitting oscillations and one over f) @@ -416,6 +434,8 @@ n_freq_bands = len(Freq_Bands) # To overcome this problem, we try multiple start freq OOF_r2_thres = 0.95 # a high threshold as we allow for overfitting PAF_r2_thres = 0.90 # a more lenient threshold for PAF, as it is usually still captured even if fit for 1/f is not perfect +PTF_r2_thres = 0.90 # a more lenient threshold for PTF, as it is usually still captured even if fit for 1/f is not perfect +PBF_r2_thres = 0.90 # a more lenient threshold for PBF, as it is usually still captured even if fit for 1/f is not perfect freq_start_it_range = [2,3,4,5,6] freq_end = 40 # Stop freq at 40Hz to not be influenced by the Notch Filter @@ -423,10 +443,14 @@ eye_status = list(final_epochs[0].event_id.keys()) n_eye_status = len(eye_status) PAF_data = np.zeros((n_subjects,n_eye_status,n_channels,3)) # CF, power, band_width +PTF_data = np.zeros((n_subjects,n_eye_status,n_channels,3)) # CF, power, band_width +PBF_data = np.zeros((n_subjects,n_eye_status,n_channels,3)) # CF, power, band_width OOF_data = np.zeros((n_subjects,n_eye_status,n_channels,2)) # offset and exponent def FOOOF_estimation(i): PAF_data0 = np.zeros((n_eye_status,n_channels,3)) # CF, power, band_width + PTF_data0 = np.zeros((n_eye_status,n_channels,3)) # CF, power, band_width + PBF_data0 = np.zeros((n_eye_status,n_channels,3)) # CF, power, band_width OOF_data0 = np.zeros((n_eye_status,n_channels,2)) # offset and exponent # Get Eye status eye_idx = [final_epochs[i].events[:,2] == 1, final_epochs[i].events[:,2] == 2] # EC and EO @@ -443,6 +467,8 @@ def FOOOF_estimation(i): psds.append(temp_psd) # Try multiple start freq PAF_data00 = np.zeros((n_eye_status,n_channels,len(freq_start_it_range),3)) # CF, power, band_width + PTF_data00 = np.zeros((n_eye_status,n_channels,len(freq_start_it_range),3)) # CF, power, band_width + PBF_data00 = np.zeros((n_eye_status,n_channels,len(freq_start_it_range),3)) # CF, power, band_width OOF_data00 = np.zeros((n_eye_status,n_channels,len(freq_start_it_range),2)) # offset and exponent r2s00 = np.zeros((n_eye_status,n_channels,len(freq_start_it_range))) for e in range(n_eye_status): @@ -476,9 +502,37 @@ def FOOOF_estimation(i): else: # No alpha peaks PAF_data00[e,c,f] = [np.nan]*3 + # Find the theta peak with greatest power + for c in range(n_channels): + peaks0 = peaks[peaks[:,3] == c] + # Subset the peaks within the theta band + in_theta_band = (peaks0[:,0] >= Freq_Bands["theta"][0]) & (peaks0[:,0] <= Freq_Bands["theta"][1]) + if sum(in_theta_band) > 0: + # Choose the peak with the highest power + max_theta_idx = np.argmax(peaks0[in_theta_band,1]) + # Save results + PTF_data00[e,c,f] = peaks0[in_theta_band][max_theta_idx,:-1] + else: + # No theta peaks + PTF_data00[e,c,f] = [np.nan]*3 + # Find the beta peak with greatest power + for c in range(n_channels): + peaks0 = peaks[peaks[:,3] == c] + # Subset the peaks within the beta band + in_beta_band = (peaks0[:,0] >= Freq_Bands["beta"][0]) & (peaks0[:,0] <= Freq_Bands["beta"][1]) + if sum(in_beta_band) > 0: + # Choose the peak with the highest power + max_beta_idx = np.argmax(peaks0[in_beta_band,1]) + # Save results + PBF_data00[e,c,f] = peaks0[in_beta_band][max_beta_idx,:-1] + else: + # No beta peaks + PBF_data00[e,c,f] = [np.nan]*3 # Check criterias good_fits_OOF = (r2s00 > OOF_r2_thres) & (OOF_data00[:,:,:,1] > 0) # r^2 > 0.95 and exponent > 0 good_fits_PAF = (r2s00 > PAF_r2_thres) & (np.isfinite(PAF_data00[:,:,:,0])) # r^2 > 0.90 and detected peak in alpha band + good_fits_PTF = (r2s00 > PTF_r2_thres) & (np.isfinite(PTF_data00[:,:,:,0])) # r^2 > 0.90 and detected peak in theta band + good_fits_PBF = (r2s00 > PBF_r2_thres) & (np.isfinite(PBF_data00[:,:,:,0])) # r^2 > 0.90 and detected peak in beta band # Save the data or NaN if criterias were not fulfilled for e in range(n_eye_status): for c in range(n_channels): @@ -490,24 +544,43 @@ def FOOOF_estimation(i): PAF_data0[e,c] = [np.nan]*3 else: # Save PAF associated with greatest r^2 that fulfilled criterias PAF_data0[e,c] = PAF_data00[e,c,np.argmax(r2s00[e,c,good_fits_PAF[e,c]])] + if sum(good_fits_PTF[e,c]) == 0: # no good PTF estimation + PTF_data0[e,c] = [np.nan]*3 + else: # Save PTF associated with greatest r^2 that fulfilled criterias + PTF_data0[e,c] = PTF_data00[e,c,np.argmax(r2s00[e,c,good_fits_PTF[e,c]])] + if sum(good_fits_PBF[e,c]) == 0: # no good PBF estimation + PBF_data0[e,c] = [np.nan]*3 + else: # Save PBF associated with greatest r^2 that fulfilled criterias + PBF_data0[e,c] = PBF_data00[e,c,np.argmax(r2s00[e,c,good_fits_PBF[e,c]])] print("Finished {} out of {} subjects".format(i+1,n_subjects)) - return i, PAF_data0, OOF_data0 + return i, PAF_data0, OOF_data0, PTF_data0, PBF_data0 # Get current time c_time1 = time.localtime() c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1) print(c_time1) -with concurrent.futures.ProcessPoolExecutor() as executor: - for i, PAF_result, OOF_result in executor.map(FOOOF_estimation, range(n_subjects)): # Function and arguments - PAF_data[i] = PAF_result - OOF_data[i] = OOF_result +# with concurrent.futures.ProcessPoolExecutor() as executor: +# for i, PAF_result, OOF_result in executor.map(FOOOF_estimation, range(n_subjects)): # Function and arguments +# PAF_data[i] = PAF_result +# OOF_data[i] = OOF_result + +for i in range(n_subjects): + j, PAF_result, OOF_result, PTF_data0, PBF_data0 = FOOOF_estimation(i) # Function and arguments + PAF_data[i] = PAF_result + OOF_data[i] = OOF_result + PTF_data[i] = PTF_data0 + PBF_data[i] = PBF_data0 # Save data with open(Feature_savepath+"PAF_data_arr.pkl", "wb") as file: pickle.dump(PAF_data, file) -with open(Feature_savepath+"OOF_data_arr.pkl", "wb") as file: - pickle.dump(OOF_data, file) +with open(Feature_savepath+"PTF_data_arr.pkl", "wb") as file: + pickle.dump(PTF_data, file) +with open(Feature_savepath+"PBF_data_arr.pkl", "wb") as file: + pickle.dump(PBF_data, file) +# with open(Feature_savepath+"OOF_data_arr.pkl", "wb") as file: +# pickle.dump(OOF_data, file) # Get current time c_time2 = time.localtime() @@ -517,75 +590,134 @@ print("Started", c_time1, "\nFinished",c_time2) # Convert to Pandas dataframe (only keep mean parameter for PAF) # The dimensions will each be a column with numbers and the last column will be the actual values ori = PAF_data[:,:,:,0] +ori_2 = PTF_data[:,:,:,0] +ori_3 = PBF_data[:,:,:,0] arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, ori.shape), indexing="ij"))) + [ori.ravel()]) +arr_2 = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, ori_2.shape), indexing="ij"))) + [ori_2.ravel()]) +arr_3 = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, ori_3.shape), indexing="ij"))) + [ori_3.ravel()]) PAF_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Channel", "Value"]) +PTF_data_df = pd.DataFrame(arr_2, columns = ["Subject_ID", "Eye_status", "Channel", "Value"]) +PBF_data_df = pd.DataFrame(arr_3, columns = ["Subject_ID", "Eye_status", "Channel", "Value"]) # Change from numerical coding to actual values index_values = [Subject_id,eye_status,ch_names] temp_df = PAF_data_df.copy() # make temp df to not sequential overwrite what is changed +temp_df_2 = PTF_data_df.copy() # make temp df to not sequential overwrite what is changed +temp_df_3 = PBF_data_df.copy() # make temp df to not sequential overwrite what is changed for col in range(len(index_values)): col_name = PAF_data_df.columns[col] + col_name_2 = PTF_data_df.columns[col] + col_name_3 = PBF_data_df.columns[col] for shape in range(ori.shape[col]): temp_df.loc[PAF_data_df.iloc[:,col] == shape,col_name]\ = index_values[col][shape] + temp_df_2.loc[PTF_data_df.iloc[:,col] == shape,col_name_2]\ + = index_values[col][shape] + temp_df_3.loc[PBF_data_df.iloc[:,col] == shape,col_name_3]\ + = index_values[col][shape] PAF_data_df = temp_df # replace original df +PTF_data_df = temp_df_2 # replace original df +PBF_data_df = temp_df_3 # replace original df # Add group status Group_status = np.array(["CTRL"]*len(PAF_data_df["Subject_ID"])) Group_status[np.array([i in cases for i in PAF_data_df["Subject_ID"]])] = "PTSD" # Add to dataframe PAF_data_df.insert(3, "Group_status", Group_status) +PTF_data_df.insert(3, "Group_status", Group_status) +PBF_data_df.insert(3, "Group_status", Group_status) # Global peak alpha PAF_data_df_global = PAF_data_df.groupby(["Subject_ID", "Group_status", "Eye_status"]).mean().reset_index() # by default pandas mean skip nan +PTF_data_df_global = PTF_data_df.groupby(["Subject_ID", "Group_status", "Eye_status"]).mean().reset_index() # by default pandas mean skip nan +PBF_data_df_global = PBF_data_df.groupby(["Subject_ID", "Group_status", "Eye_status"]).mean().reset_index() # by default pandas mean skip nan + # Add dummy variable for re-using plot code dummy_variable = ["Global Peak Alpha Frequency"]*PAF_data_df_global.shape[0] +dummy_variable_2 = ["Global Peak Theta Frequency"]*PTF_data_df_global.shape[0] +dummy_variable_3 = ["Global Peak Beta Frequency"]*PBF_data_df_global.shape[0] PAF_data_df_global.insert(3, "Measurement", dummy_variable ) +PTF_data_df_global.insert(3, "Measurement", dummy_variable_2 ) +PBF_data_df_global.insert(3, "Measurement", dummy_variable_3 ) # Regional peak alpha # A variable that codes the channels based on A/P localization is also made Frontal_chs = ["Fp1", "Fpz", "Fp2", "AFz", "Fz", "F3", "F4", "F7", "F8"] Central_chs = ["Cz", "C3", "C4", "T7", "T8", "FT7", "FC3", "FCz", "FC4", "FT8", "TP7", "CP3", "CPz", "CP4", "TP8"] Posterior_chs = ["Pz", "P3", "P4", "P7", "P8", "POz", "O1", "O2", "Oz"] +Parietal_chs = ["TP7", "CP3", "CPz", "CP4", "TP8", "P7", "P3", "Pz", "P4", "P8", "POz"] +Brain_region_labels = ["Frontal","Central","Posterior","Parietal"] Brain_region = np.array(ch_names, dtype = "<U9") -Brain_region[np.array([i in Frontal_chs for i in ch_names])] = "Frontal" -Brain_region[np.array([i in Central_chs for i in ch_names])] = "Central" -Brain_region[np.array([i in Posterior_chs for i in ch_names])] = "Posterior" +Brain_region[np.array([i in Frontal_chs for i in ch_names])] = Brain_region_labels[0] +Brain_region[np.array([i in Central_chs for i in ch_names])] = Brain_region_labels[1] +Brain_region[np.array([i in Posterior_chs for i in ch_names])] = Brain_region_labels[2] +Brain_region[np.array([i in Parietal_chs for i in ch_names])] = Brain_region_labels[3] +# Insert region type into dataframe PAF_data_df.insert(4, "Brain_region", list(Brain_region)*int(PAF_data_df.shape[0]/len(Brain_region))) +PTF_data_df.insert(4, "Brain_region", list(Brain_region)*int(PTF_data_df.shape[0]/len(Brain_region))) +PBF_data_df.insert(4, "Brain_region", list(Brain_region)*int(PBF_data_df.shape[0]/len(Brain_region))) + +# A variable that codes the channels based on M/L localization +Left_chs = ["Fp1", "F3", "F7", "FC3", "FT7", "C3", "T7", "CP3", "TP7", "P3", "P7", "O1"] +Right_chs = ["Fp2", "F4", "F8", "FC4", "FT8", "C4", "T8", "CP4", "TP8", "P4", "P8", "O2"] +Mid_chs = ["Fpz", "AFz", "Fz", "FCz", "Cz", "CPz", "Pz", "POz", "Oz"] + +Brain_side = np.array(ch_names, dtype = "<U5") +Brain_side[np.array([i in Left_chs for i in ch_names])] = "Left" +Brain_side[np.array([i in Right_chs for i in ch_names])] = "Right" +Brain_side[np.array([i in Mid_chs for i in ch_names])] = "Mid" + +# Insert side type into dataframe: +PAF_data_df.insert(5, "Brain_side", list(Brain_side)*int(PAF_data_df.shape[0]/len(Brain_side))) +PTF_data_df.insert(5, "Brain_side", list(Brain_side)*int(PTF_data_df.shape[0]/len(Brain_side))) +PBF_data_df.insert(5, "Brain_side", list(Brain_side)*int(PBF_data_df.shape[0]/len(Brain_side))) + +# Define region of interest before saving +PAF_data_df = PAF_data_df[(PAF_data_df["Brain_region"] == "Parietal")] # Parietal region in peak alpha frequencys +PTF_data_df = PTF_data_df[(PTF_data_df["Brain_region"] == "Frontal") & + ((PTF_data_df["Brain_side"] == "Mid"))] # Frontal midline theta peak frequencys +PBF_data_df = PBF_data_df[(PBF_data_df["Brain_region"] == "Frontal")] # Frontal beta peak frequencys + + # Save the dataframes PAF_data_df.to_pickle(os.path.join(Feature_savepath,"PAF_data_FOOOF_df.pkl")) PAF_data_df_global.to_pickle(os.path.join(Feature_savepath,"PAF_data_FOOOF_global_df.pkl")) +PTF_data_df.to_pickle(os.path.join(Feature_savepath,"PTF_data_FOOOF_df.pkl")) +PTF_data_df_global.to_pickle(os.path.join(Feature_savepath,"PTF_data_FOOOF_global_df.pkl")) +PBF_data_df.to_pickle(os.path.join(Feature_savepath,"PBF_data_FOOOF_df.pkl")) +PBF_data_df_global.to_pickle(os.path.join(Feature_savepath,"PBF_data_FOOOF_global_df.pkl")) -# Convert to Pandas dataframe (only keep exponent parameter for OOF) -# The dimensions will each be a column with numbers and the last column will be the actual values -ori = OOF_data[:,:,:,1] -arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, ori.shape), indexing="ij"))) + [ori.ravel()]) -PAF_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Channel", "Value"]) -# Change from numerical coding to actual values +# # Convert to Pandas dataframe (only keep exponent parameter for OOF) +# # The dimensions will each be a column with numbers and the last column will be the actual values +# ori = OOF_data[:,:,:,1] +# arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, ori.shape), indexing="ij"))) + [ori.ravel()]) +# PAF_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Channel", "Value"]) +# # Change from numerical coding to actual values -index_values = [Subject_id,eye_status,ch_names] -temp_df = PAF_data_df.copy() # make temp df to not sequential overwrite what is changed -for col in range(len(index_values)): - col_name = PAF_data_df.columns[col] - for shape in range(ori.shape[col]): - temp_df.loc[PAF_data_df.iloc[:,col] == shape,col_name]\ - = index_values[col][shape] -OOF_data_df = temp_df # replace original df +# index_values = [Subject_id,eye_status,ch_names] +# temp_df = PAF_data_df.copy() # make temp df to not sequential overwrite what is changed +# for col in range(len(index_values)): +# col_name = PAF_data_df.columns[col] +# for shape in range(ori.shape[col]): +# temp_df.loc[PAF_data_df.iloc[:,col] == shape,col_name]\ +# = index_values[col][shape] +# OOF_data_df = temp_df # replace original df -# Add group status -Group_status = np.array(["CTRL"]*len(OOF_data_df["Subject_ID"])) -Group_status[np.array([i in cases for i in OOF_data_df["Subject_ID"]])] = "PTSD" -# Add to dataframe -OOF_data_df.insert(3, "Group_status", Group_status) +# # Add group status +# Group_status = np.array(["CTRL"]*len(OOF_data_df["Subject_ID"])) +# Group_status[np.array([i in cases for i in OOF_data_df["Subject_ID"]])] = "PTSD" +# # Add to dataframe +# OOF_data_df.insert(3, "Group_status", Group_status) -# Regional OOF -OOF_data_df.insert(4, "Brain_region", list(Brain_region)*int(PAF_data_df.shape[0]/len(Brain_region))) +# # Regional OOF +# OOF_data_df.insert(4, "Brain_region", list(Brain_region)*int(PAF_data_df.shape[0]/len(Brain_region))) -# Save the dataframes -OOF_data_df.to_pickle(os.path.join(Feature_savepath,"OOF_data_FOOOF_df.pkl")) +# # Save the dataframes +# OOF_data_df.to_pickle(os.path.join(Feature_savepath,"OOF_data_FOOOF_df.pkl")) +""" """ # %% Microstate analysis # The function takes the data as a numpy array (n_t, n_ch) @@ -928,10 +1060,26 @@ EO_p_hat = p_empirical(m_labels[1], n_maps) microstate_time_data = np.zeros((n_subjects,n_eye_status,n_maps)) microstate_transition_data = np.zeros((n_subjects,n_eye_status,n_maps,n_maps)) microstate_entropy_data = np.zeros((n_subjects,n_eye_status)) +microstate_orrurence_data = np.zeros((n_subjects,n_eye_status,n_maps)) +microstate_mean_duration_data = np.zeros((n_subjects,n_eye_status,n_maps)) for i in range(n_subjects): # Calculate ratio of time covered temp_EC_p_hat = p_empirical(micro_labels[i][0], n_maps) temp_EO_p_hat = p_empirical(micro_labels[i][1], n_maps) + + # Calcuate number of occurences for each microstate + for j in range(len(micro_labels[i][0])-1): + if micro_labels[i][0][j] != micro_labels[i][0][j+1]: + microstate_orrurence_data[i][0][micro_labels[i][0][j]] += 1 + for j in range(len(micro_labels[i][1])-1): + if micro_labels[i][1][j] != micro_labels[i][1][j+1]: + microstate_orrurence_data[i][1][micro_labels[i][1][j]] += 1 + + # Calculate mean duration of each microstate + for j in range(n_maps): + microstate_mean_duration_data[i][0][j] = sum(micro_labels[i][0] == j)/microstate_orrurence_data[i][0][j] + microstate_mean_duration_data[i][1][j] = sum(micro_labels[i][1] == j)/microstate_orrurence_data[i][1][j] + # Calculate transition matrix """ temp_EC_T_hat = T_empirical(micro_labels[i][0], n_maps, gaps_idx[i][0]) @@ -956,7 +1104,7 @@ np.save(Feature_savepath+"microstate_transition_data.npy", microstate_transition # Convert transition data to dataframe for further processing with other features # Transition matrix should be read as probability of row to column microstate_transition_data_arr =\ - microstate_transition_data.reshape((n_subjects,n_eye_status,n_maps*n_maps)) # flatten 4 x 4 matrix to 1D + microstate_transition_data.reshape((n_subjects,n_eye_status,n_maps*n_maps)) # flatten 5 x 5 matrix to 1D transition_info = ["M1->M1", "M1->M2", "M1->M3", "M1->M4", "M1->M5", "M2->M1", "M2->M2", "M2->M3", "M2->M4", "M2-M5", "M3->M1", "M3->M2", "M3->M3", "M3->M4", "M3->M5", @@ -985,9 +1133,16 @@ microstate_transition_data_df.insert(2, "Group_status", Group_status) microstate_transition_data_df.to_pickle(os.path.join(Feature_savepath,"microstate_transition_data_df.pkl")) # Convert time covered data to Pandas dataframe +# Convert orrurence data to Pandas dataframe +# Convert mean duration data to Pandas dataframe # The dimensions will each be a column with numbers and the last column will be the actual values arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, microstate_time_data.shape), indexing="ij"))) + [microstate_time_data.ravel()]) +arr_2 = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, microstate_orrurence_data.shape), indexing="ij"))) + [microstate_orrurence_data.ravel()]) +arr_3 = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, microstate_mean_duration_data.shape), indexing="ij"))) + [microstate_mean_duration_data.ravel()]) microstate_time_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "Microstate", "Value"]) +microstate_orrurence_df = pd.DataFrame(arr_2, columns = ["Subject_ID", "Eye_status", "Microstate", "Value"]) +microstate_mean_duration_df = pd.DataFrame(arr_3, columns = ["Subject_ID", "Eye_status", "Microstate", "Value"]) + # Change from numerical coding to actual values eye_status = list(final_epochs[0].event_id.keys()) microstates = [1,2,3,4,5] @@ -995,20 +1150,35 @@ microstates = [1,2,3,4,5] index_values = [Subject_id,eye_status,microstates] for col in range(len(index_values)): col_name = microstate_time_df.columns[col] + col_name_2 = microstate_orrurence_df.columns[col] + col_name_3 = microstate_mean_duration_df.columns[col] for shape in reversed(range(microstate_time_data.shape[col])): # notice this is the shape of original numpy array. Not shape of DF microstate_time_df.loc[microstate_time_df.iloc[:,col] == shape,col_name]\ = index_values[col][shape] + microstate_orrurence_df.loc[microstate_orrurence_df.iloc[:,col] == shape,col_name_2]\ + = index_values[col][shape] + microstate_mean_duration_df.loc[microstate_mean_duration_df.iloc[:,col] == shape,col_name_3]\ + = index_values[col][shape] # Reversed in inner loop is used to avoid sequencial data being overwritten. # E.g. if 0 is renamed to 1, then the next loop all 1's will be renamed to 2 # Add group status Group_status = np.array(["CTRL"]*len(microstate_time_df["Subject_ID"])) Group_status[np.array([i in cases for i in microstate_time_df["Subject_ID"]])] = "PTSD" +Group_status_2 = np.array(["CTRL"]*len(microstate_orrurence_df["Subject_ID"])) +Group_status_2[np.array([i in cases for i in microstate_orrurence_df["Subject_ID"]])] = "PTSD" +Group_status_3 = np.array(["CTRL"]*len(microstate_mean_duration_df["Subject_ID"])) +Group_status_3[np.array([i in cases for i in microstate_mean_duration_df["Subject_ID"]])] = "PTSD" + # Add to dataframe microstate_time_df.insert(2, "Group_status", Group_status) +microstate_orrurence_df.insert(2, "Group_status", Group_status_2) +microstate_mean_duration_df.insert(2, "Group_status", Group_status_3) # Save df microstate_time_df.to_pickle(os.path.join(Feature_savepath,"microstate_time_df.pkl")) +microstate_orrurence_df.to_pickle(os.path.join(Feature_savepath,"microstate_orrurence_df.pkl")) +microstate_mean_duration_df.to_pickle(os.path.join(Feature_savepath,"microstate_mean_duration_df.pkl")) # Transition data - mean # Get index for groups diff --git a/Features/Microstate_5_maps_10x5_k_means_results.pkl b/Features/Microstate_5_maps_10x5_k_means_results.pkl new file mode 100644 index 0000000000000000000000000000000000000000..0311fc0bd26ca5bae0c6470d50f5d7b5c037ed20 Binary files /dev/null and b/Features/Microstate_5_maps_10x5_k_means_results.pkl differ diff --git a/Features/PAF_data_FOOOF_df.pkl b/Features/PAF_data_FOOOF_df.pkl new file mode 100644 index 0000000000000000000000000000000000000000..f98da3beb36eea26b388805f113b7debbeacc331 Binary files /dev/null and b/Features/PAF_data_FOOOF_df.pkl differ diff --git a/Features/PAF_data_FOOOF_global_df.pkl b/Features/PAF_data_FOOOF_global_df.pkl new file mode 100644 index 0000000000000000000000000000000000000000..07402fb9d2a7a2db3b294ff0cf93fdc0e18b6946 Binary files /dev/null and b/Features/PAF_data_FOOOF_global_df.pkl differ diff --git a/Features/PAF_data_arr.pkl b/Features/PAF_data_arr.pkl new file mode 100644 index 0000000000000000000000000000000000000000..de27c9fb85c3338b5190667277def3cc783974af Binary files /dev/null and b/Features/PAF_data_arr.pkl differ diff --git a/Features/PBF_data_FOOOF_df.pkl b/Features/PBF_data_FOOOF_df.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1b2a0d77acf7bf66254864f61ace91d9175af09f Binary files /dev/null and b/Features/PBF_data_FOOOF_df.pkl differ diff --git a/Features/PBF_data_FOOOF_global_df.pkl b/Features/PBF_data_FOOOF_global_df.pkl new file mode 100644 index 0000000000000000000000000000000000000000..2bb56dfdb8dad05ca66ab43003e525d3fbbd7d40 Binary files /dev/null and b/Features/PBF_data_FOOOF_global_df.pkl differ diff --git a/Features/PBF_data_arr.pkl b/Features/PBF_data_arr.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1423f6d2591a5a47feb32422a2d1943b66148509 Binary files /dev/null and b/Features/PBF_data_arr.pkl differ diff --git a/Features/PTF_data_FOOOF_df.pkl b/Features/PTF_data_FOOOF_df.pkl new file mode 100644 index 0000000000000000000000000000000000000000..34dd4f8be8d267af5a866f201a3b110c81546657 Binary files /dev/null and b/Features/PTF_data_FOOOF_df.pkl differ diff --git a/Features/PTF_data_FOOOF_global_df.pkl b/Features/PTF_data_FOOOF_global_df.pkl new file mode 100644 index 0000000000000000000000000000000000000000..4e1b184fa3337f33d328950a513345f599af90eb Binary files /dev/null and b/Features/PTF_data_FOOOF_global_df.pkl differ diff --git a/Features/PTF_data_arr.pkl b/Features/PTF_data_arr.pkl new file mode 100644 index 0000000000000000000000000000000000000000..9d27e328b577834146a3c0b7a3d6f7082328ca5d Binary files /dev/null and b/Features/PTF_data_arr.pkl differ diff --git a/Features/Power_df.pkl b/Features/Power_df.pkl new file mode 100644 index 0000000000000000000000000000000000000000..805dba3cbb6d90d514d1a68df9fb21d0987618dd Binary files /dev/null and b/Features/Power_df.pkl differ diff --git a/Features/fBMS_df.pkl b/Features/fBMS_df.pkl new file mode 100644 index 0000000000000000000000000000000000000000..4b2a94e038c46123d1cb12742a1f77b6442f3e7f Binary files /dev/null and b/Features/fBMS_df.pkl differ diff --git a/Features/fBSV_df.pkl b/Features/fBSV_df.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1103c2e0de2a331c3ca0c06419486a35f67d7768 Binary files /dev/null and b/Features/fBSV_df.pkl differ diff --git a/Features/fMTMS_df.pkl b/Features/fMTMS_df.pkl new file mode 100644 index 0000000000000000000000000000000000000000..5b438ada72ba60f67b3353c8b848d7c0ad173ba1 Binary files /dev/null and b/Features/fMTMS_df.pkl differ diff --git a/Features/fMTSV_df.pkl b/Features/fMTSV_df.pkl new file mode 100644 index 0000000000000000000000000000000000000000..69d71119692d45f6d2247cb4089e2ce072d0fd6b Binary files /dev/null and b/Features/fMTSV_df.pkl differ diff --git a/Features/fTBR_df.pkl b/Features/fTBR_df.pkl new file mode 100644 index 0000000000000000000000000000000000000000..32e3cb66f1e0dbcd850ed7222eb96a9074d6a050 Binary files /dev/null and b/Features/fTBR_df.pkl differ diff --git a/Features/microstate_entropy_df.pkl b/Features/microstate_entropy_df.pkl new file mode 100644 index 0000000000000000000000000000000000000000..88035aaf7a9df2eb541ac6eccd5913c6d045a7b2 Binary files /dev/null and b/Features/microstate_entropy_df.pkl differ diff --git a/Features/microstate_mean_duration_df.pkl b/Features/microstate_mean_duration_df.pkl new file mode 100644 index 0000000000000000000000000000000000000000..95bf138f7022ad7f3ce748629a60b932e4f87689 Binary files /dev/null and b/Features/microstate_mean_duration_df.pkl differ diff --git a/Features/microstate_orrurence_df.pkl b/Features/microstate_orrurence_df.pkl new file mode 100644 index 0000000000000000000000000000000000000000..5a26da0a41c7cd8bc3a3300ad64696cdcb172753 Binary files /dev/null and b/Features/microstate_orrurence_df.pkl differ diff --git a/Features/microstate_time_df.pkl b/Features/microstate_time_df.pkl new file mode 100644 index 0000000000000000000000000000000000000000..417fa4f25537e93e27497666966770c8397e94cf Binary files /dev/null and b/Features/microstate_time_df.pkl differ diff --git a/Features/microstate_transition_data.npy b/Features/microstate_transition_data.npy new file mode 100644 index 0000000000000000000000000000000000000000..aa25438826e51748fa1388a445c581a739257613 Binary files /dev/null and b/Features/microstate_transition_data.npy differ diff --git a/Features/microstate_transition_data_df.pkl b/Features/microstate_transition_data_df.pkl new file mode 100644 index 0000000000000000000000000000000000000000..84a3b516bb3ce2ab1edc2142ff17176fb946e958 Binary files /dev/null and b/Features/microstate_transition_data_df.pkl differ diff --git a/Features/pAMS_df.pkl b/Features/pAMS_df.pkl new file mode 100644 index 0000000000000000000000000000000000000000..ba4c626eb6e3d31fd11ab249322a33190b0d9d86 Binary files /dev/null and b/Features/pAMS_df.pkl differ diff --git a/Features/pASV_df.pkl b/Features/pASV_df.pkl new file mode 100644 index 0000000000000000000000000000000000000000..755775624ac1b08167fb0eed3d225ed095a77260 Binary files /dev/null and b/Features/pASV_df.pkl differ diff --git a/Figures/Microstates/Microstates_EC.png b/Figures/Microstates/Microstates_EC.png new file mode 100644 index 0000000000000000000000000000000000000000..e70cc0ecf65c19f1b4fa046c2faac80ae57a1b4b Binary files /dev/null and b/Figures/Microstates/Microstates_EC.png differ diff --git a/Figures/Microstates/Microstates_EO.png b/Figures/Microstates/Microstates_EO.png new file mode 100644 index 0000000000000000000000000000000000000000..d9bb378230429dd0993d7a98e9dc0c44e444ce27 Binary files /dev/null and b/Figures/Microstates/Microstates_EO.png differ diff --git a/Figures/Microstates/Microstates_Spatial_Correlation_Label_State_EC.png b/Figures/Microstates/Microstates_Spatial_Correlation_Label_State_EC.png new file mode 100644 index 0000000000000000000000000000000000000000..14ae5e2ba7425f0dbaef9b8e397e6abc6d88c086 Binary files /dev/null and b/Figures/Microstates/Microstates_Spatial_Correlation_Label_State_EC.png differ diff --git a/Figures/Microstates/Microstates_Spatial_Correlation_Label_State_EO.png b/Figures/Microstates/Microstates_Spatial_Correlation_Label_State_EO.png new file mode 100644 index 0000000000000000000000000000000000000000..a3d1cf5829e3963cb019396847e9e16e39b8e7e4 Binary files /dev/null and b/Figures/Microstates/Microstates_Spatial_Correlation_Label_State_EO.png differ diff --git a/Gaps_idx.npy b/Gaps_idx.npy new file mode 100644 index 0000000000000000000000000000000000000000..c71b919836fa93180d8ab65e4a466662021135d0 Binary files /dev/null and b/Gaps_idx.npy differ diff --git a/Gaps_trials_idx.npy b/Gaps_trials_idx.npy new file mode 100644 index 0000000000000000000000000000000000000000..050cb3862b69533f462bb891315a166c143ae60a Binary files /dev/null and b/Gaps_trials_idx.npy differ diff --git a/MachineLearning_2.py b/MachineLearning_2.py new file mode 100644 index 0000000000000000000000000000000000000000..3c934ab4fdeb473a6c721d83ecb660c1af9ae6f1 --- /dev/null +++ b/MachineLearning_2.py @@ -0,0 +1,355 @@ +# Set working directory +import os +wkdir = "/Users/benj3542/Desktop/Uni/Noter/Semester_6/Bachelor/resting-state-eeg-analysis" +os.chdir(wkdir) + +# Load all libraries from the Preamble +from Preamble import * + +### Load questionnaire data +# For the purposes of this demonstration I will make a dummy dataframe +# The original code imported csv files with questionnaire data and group status +final_qdf = pd.DataFrame({"Subject_ID":[1,2], + "Age":[23,26], + "Gender":[0,0], + "Group_status":[0,1], + "PCL_total":[33,56], + "Q1":[1.2, 2.3], + "Q2":[1.7, 1.5], + "Qn":[2.1,1.0]}) + +# Define cases as >= 44 total PCL +# Type: numpy array with subject id +cases = np.array(final_qdf["Subject_ID"][final_qdf["PCL_total"]>=44]) +n_groups = 2 +Groups = ["CTRL", "PTSD"] +n_subjects = 2 + +# Define paths to data +Feature_savepath = "./Features/" + +# Make function to load EEG features +def load_features_df(): + # Load all features + power_df = pd.read_pickle(Feature_savepath+"Power_df.pkl") + fBMS_data_df = pd.read_pickle(Feature_savepath+"fBMS_df.pkl") + fBSV_data_df = pd.read_pickle(Feature_savepath+"fBSV_df.pkl") + fMTMS_data_df = pd.read_pickle(Feature_savepath+"fMTMS_df.pkl") + fMTSV_data_df = pd.read_pickle(Feature_savepath+"fMTSV_df.pkl") + pAMS_data_df = pd.read_pickle(Feature_savepath+"pAMS_df.pkl") + pASV_data_df = pd.read_pickle(Feature_savepath+"pASV_df.pkl") + + PBF_data_df = pd.read_pickle(Feature_savepath+"PBF_data_FOOOF_df.pkl") + PBF_data_df_global = pd.read_pickle(Feature_savepath+"PBF_data_FOOOF_global_df.pkl") + PTF_data_df = pd.read_pickle(Feature_savepath+"PTF_data_FOOOF_df.pkl") + PTF_data_df_global = pd.read_pickle(Feature_savepath+"PTF_data_FOOOF_global_df.pkl") + PAF_data_df = pd.read_pickle(Feature_savepath+"PAF_data_FOOOF_df.pkl") + PAF_data_df_global = pd.read_pickle(Feature_savepath+"PAF_data_FOOOF_global_df.pkl") + + microstate_transition_data_df = pd.read_pickle(Feature_savepath+"microstate_transition_data_df.pkl") + microstate_time_df = pd.read_pickle(Feature_savepath+"microstate_time_df.pkl") + microstate_entropy_df = pd.read_pickle(Feature_savepath+"microstate_entropy_df.pkl") + microstate_occurrence_df = pd.read_pickle(Feature_savepath+"microstate_orrurence_df.pkl") + microstate_mean_duration_df = pd.read_pickle(Feature_savepath+"microstate_mean_duration_df.pkl") + + + + # List of features + EEG_features_name_list = [['Power'], + ['Frontal Beta Values'], + ['Mean Frontal Beta Values'], + ['Frontal Midline Theta Values'], + ['Mean Frontal Midline Theta Values'], + ['Parietal Alpha Values'], + ['Mean Parietal Alpha Values'], + ['Peak Beta Frequency (FOOOF | Frontal)', + 'Global Beta Alpha Frequency'], + ['Peak Theta Frequency (FOOOF | Frontal, Midline)', + 'Global Thet Alpha Frequency'], + ['Peak Alpha Frequency (FOOOF | Parietal)', + 'Global Peak Alpha Frequency'], + ['Microstate Transition', + 'Microstate Ratio Time', + 'Microstate Entropy']] + + + # Prepare overall dataframe + EEG_features_df = fBMS_data_df.drop(["PSD"], axis=1) + + + # # Extract relevant columns and add them to the dataframe + # fBMS_data_df = fBMS_data_df["PSD"] + # fMTMS_data_df = fMTMS_data_df["PSD"] + # pAMS_data_df = pAMS_data_df["PSD"] + # PBF_data_df_global = PBF_data_df_global["Value"] + # PTF_data_df_global = PTF_data_df_global["Value"] + # PAF_data_df_global = PAF_data_df_global["Value"] + + # # Add column names to EEG_features_df and add the features + # EEG_features_df["Frontal Beta Mean Values"] = fBMS_data_df + # EEG_features_df["Frontal Midline Theta Mean Values"] = fMTMS_data_df + # EEG_features_df["Parietal Alpha Mean Values"] = pAMS_data_df + # EEG_features_df["Global Beta Peak Frequency"] = PBF_data_df_global + # EEG_features_df["Global Theta Peak Frequency"] = PTF_data_df_global + # EEG_features_df["Global Alpha Peak Frequency"] = PAF_data_df_global + + # Add microstate features + Microstate_time_1 = microstate_time_df[(microstate_time_df["Microstate"] == 1.0)]["Value"] + Microstate_time_2 = microstate_time_df[(microstate_time_df["Microstate"] == 2.0)]["Value"] + Microstate_time_3 = microstate_time_df[(microstate_time_df["Microstate"] == 3.0)]["Value"] + Microstate_time_4 = microstate_time_df[(microstate_time_df["Microstate"] == 4.0)]["Value"] + Microstate_time_5 = microstate_time_df[(microstate_time_df["Microstate"] == 5.0)]["Value"] + + Microstate_occurrence_1 = microstate_occurrence_df[(microstate_occurrence_df["Microstate"] == 1.0)]["Value"] + Microstate_occurrence_2 = microstate_occurrence_df[(microstate_occurrence_df["Microstate"] == 2.0)]["Value"] + Microstate_occurrence_3 = microstate_occurrence_df[(microstate_occurrence_df["Microstate"] == 3.0)]["Value"] + Microstate_occurrence_4 = microstate_occurrence_df[(microstate_occurrence_df["Microstate"] == 4.0)]["Value"] + Microstate_occurrence_5 = microstate_occurrence_df[(microstate_occurrence_df["Microstate"] == 5.0)]["Value"] + + Microstate_mean_duration_1 = microstate_mean_duration_df[(microstate_mean_duration_df["Microstate"] == 1.0)]["Value"] + Microstate_mean_duration_2 = microstate_mean_duration_df[(microstate_mean_duration_df["Microstate"] == 2.0)]["Value"] + Microstate_mean_duration_3 = microstate_mean_duration_df[(microstate_mean_duration_df["Microstate"] == 3.0)]["Value"] + Microstate_mean_duration_4 = microstate_mean_duration_df[(microstate_mean_duration_df["Microstate"] == 4.0)]["Value"] + Microstate_mean_duration_5 = microstate_mean_duration_df[(microstate_mean_duration_df["Microstate"] == 5.0)]["Value"] + + EEG_features_df = EEG_features_df.merge(Microstate_time_1.reset_index(drop = True), left_index=True, right_index=True) + EEG_features_df = EEG_features_df.rename(columns={"Value": "Microstate 1 |Time"}) + EEG_features_df = EEG_features_df.merge(Microstate_time_2.reset_index(drop = True), left_index=True, right_index=True) + EEG_features_df = EEG_features_df.rename(columns={"Value": "Microstate 2 |Time"}) + EEG_features_df = EEG_features_df.merge(Microstate_time_3.reset_index(drop = True), left_index=True, right_index=True) + EEG_features_df = EEG_features_df.rename(columns={"Value": "Microstate 3 |Time"}) + EEG_features_df = EEG_features_df.merge(Microstate_time_4.reset_index(drop = True), left_index=True, right_index=True) + EEG_features_df = EEG_features_df.rename(columns={"Value": "Microstate 4 |Time"}) + EEG_features_df = EEG_features_df.merge(Microstate_time_5.reset_index(drop = True), left_index=True, right_index=True) + EEG_features_df = EEG_features_df.rename(columns={"Value": "Microstate 5 |Time"}) + + EEG_features_df = EEG_features_df.merge(Microstate_occurrence_1.reset_index(drop = True), left_index=True, right_index=True) + EEG_features_df = EEG_features_df.rename(columns={"Value": "Microstate 1 |Occurrence"}) + EEG_features_df = EEG_features_df.merge(Microstate_occurrence_2.reset_index(drop = True), left_index=True, right_index=True) + EEG_features_df = EEG_features_df.rename(columns={"Value": "Microstate 2 |Occurrence"}) + EEG_features_df = EEG_features_df.merge(Microstate_occurrence_3.reset_index(drop = True), left_index=True, right_index=True) + EEG_features_df = EEG_features_df.rename(columns={"Value": "Microstate 3 |Occurrence"}) + EEG_features_df = EEG_features_df.merge(Microstate_occurrence_4.reset_index(drop = True), left_index=True, right_index=True) + EEG_features_df = EEG_features_df.rename(columns={"Value": "Microstate 4 |Occurrence"}) + EEG_features_df = EEG_features_df.merge(Microstate_occurrence_5.reset_index(drop = True), left_index=True, right_index=True) + EEG_features_df = EEG_features_df.rename(columns={"Value": "Microstate 5 |Occurrence"}) + + EEG_features_df = EEG_features_df.merge(Microstate_mean_duration_1.reset_index(drop = True), left_index=True, right_index=True) + EEG_features_df = EEG_features_df.rename(columns={"Value": "Microstate 1 |Mean Duration"}) + EEG_features_df = EEG_features_df.merge(Microstate_mean_duration_2.reset_index(drop = True), left_index=True, right_index=True) + EEG_features_df = EEG_features_df.rename(columns={"Value": "Microstate 2 |Mean Duration"}) + EEG_features_df = EEG_features_df.merge(Microstate_mean_duration_3.reset_index(drop = True), left_index=True, right_index=True) + EEG_features_df = EEG_features_df.rename(columns={"Value": "Microstate 3 |Mean Duration"}) + EEG_features_df = EEG_features_df.merge(Microstate_mean_duration_4.reset_index(drop = True), left_index=True, right_index=True) + EEG_features_df = EEG_features_df.rename(columns={"Value": "Microstate 4 |Mean Duration"}) + EEG_features_df = EEG_features_df.merge(Microstate_mean_duration_5.reset_index(drop = True), left_index=True, right_index=True) + EEG_features_df = EEG_features_df.rename(columns={"Value": "Microstate 5 |Mean Duration"}) + + # Save matrices for later use + Eyes_closed_PTSD = EEG_features_df[(EEG_features_df["Eye_status"] == "Eyes Closed") & (EEG_features_df["Group_status"] == "PTSD")].replace({"CTRL": 0, "PTSD": 1}) + Eyes_closed_CTRL = EEG_features_df[(EEG_features_df["Eye_status"] == "Eyes Closed") & (EEG_features_df["Group_status"] == "CTRL")].replace({"CTRL": 0, "PTSD": 1}) + + Eyes_open_PTSD = EEG_features_df[(EEG_features_df["Eye_status"] == "Eyes Open") & (EEG_features_df["Group_status"] == "PTSD")].replace({"CTRL": 0, "PTSD": 1}) + Eyes_open_CTRL = EEG_features_df[(EEG_features_df["Eye_status"] == "Eyes Open") & (EEG_features_df["Group_status"] == "CTRL")].replace({"CTRL": 0, "PTSD": 1}) + + Eyes_closed_PTSD = Eyes_closed_PTSD.drop(["Eye_status", "Subject_ID"], axis = 1).to_numpy() + Eyes_closed_CTRL = Eyes_closed_CTRL.drop(["Eye_status", "Subject_ID"], axis = 1).to_numpy() + + Eyes_open_PTSD = Eyes_open_PTSD.drop(["Eye_status", "Subject_ID"], axis = 1).to_numpy() + Eyes_open_CTRL = Eyes_open_CTRL.drop(["Eye_status", "Subject_ID"], axis = 1).to_numpy() + + # Change values to prepare for classification + Eyes_closed = EEG_features_df[(EEG_features_df["Eye_status"] == "Eyes Closed")].replace({"CTRL": 0, "PTSD": 1}) + Eyes_open = EEG_features_df[(EEG_features_df["Eye_status"] == "Eyes Open")].replace({"CTRL": 0, "PTSD": 1}) + + # Extract data as matrix + Eyes_closed_data = Eyes_closed.drop(["Eye_status", "Subject_ID"], axis = 1).to_numpy() + Eyes_open_data = Eyes_open.drop(["Eye_status", "Subject_ID"], axis = 1).to_numpy() + + return np.transpose(Eyes_closed_data), np.transpose(Eyes_open_data), np.transpose(Eyes_closed_PTSD), np.transpose(Eyes_closed_CTRL), np.transpose(Eyes_open_PTSD), np.transpose(Eyes_open_CTRL) + +Eyes_Cloased, Eyes_Open, Eyes_closed_PTSD, Eyes_closed_CTRL, Eyes_open_PSTD, Eyes_open_CTRL = load_features_df() + +# Get the data ready for classification and other analysis (extract target and features) +Eyes_Cloased_y = Eyes_Cloased[0] +Eyes_Cloased_X = np.transpose(Eyes_Cloased[1:]) + +Eyes_Open_y = Eyes_Open[0] +Eyes_Open_X = np.transpose(Eyes_Open[1:]) + + +# Do PCA +import numpy as np +import matplotlib.pyplot as plt + + +from sklearn import decomposition +from sklearn import datasets + +# unused but required import for doing 3d projections with matplotlib < 3.2 +import mpl_toolkits.mplot3d # noqa: F401 + +# import some data to play with +np.random.seed(5) + +def PCA(X, y, n_components): + fig = plt.figure(1, figsize=(4, 3)) + plt.clf() + + ax = fig.add_subplot(111, projection="3d", elev=48, azim=134) + ax.set_position([0, 0, 0.95, 1]) + + + plt.cla() + pca = decomposition.PCA(n_components=n_components) + pca.fit(X) + X = pca.transform(X) + + for name, label in [("CTRL", 0), ("PTSD", 1)]: + ax.text3D( + X[y == label, 0].mean(), + X[y == label, 1].mean(), + name, + horizontalalignment="center", + bbox=dict(alpha=0.5, edgecolor="w", facecolor="w"), + ) + # Reorder the labels to have colors matching the cluster results + y = np.choose(y, [1, 2, 0]).astype(float) + ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.nipy_spectral, edgecolor="k") + + ax.xaxis.set_ticklabels([]) + ax.yaxis.set_ticklabels([]) + ax.zaxis.set_ticklabels([]) + + plt.show() + + # Safe plot + Feature_savepath = "./Figures/" + plt.savefig(Feature_savepath + "PCA_3D.png", dpi = 300) + + return "PCA was complete without errors - check the plot in your chosen path" + +# PCA(Eyes_Cloased_X, Eyes_Cloased_y, 3) +# PCA(Eyes_Open_X, Eyes_Open_y, 3) + +# Cross validation of PTSD vs CTRL classification +from sklearn.model_selection import cross_val_score +from sklearn.model_selection import cross_val_predict +from sklearn import metrics +from sklearn.metrics import confusion_matrix +from sklearn.metrics import classification_report +from sklearn.metrics import accuracy_score +from sklearn.metrics import precision_score +from sklearn.metrics import recall_score +from sklearn.metrics import f1_score +from sklearn.metrics import roc_auc_score +from sklearn.metrics import roc_curve +from sklearn.metrics import auc + +from sklearn.model_selection import train_test_split + +from sklearn import svm +from sklearn.model_selection import GridSearchCV +from sklearn.model_selection import KFold +from numpy import mean +from numpy import std + +# SVM classifier +from sklearn.svm import SVC + +# Input of kernel, c and gamma should be a list + +def Nested_cross_validation(X, y, kernel, C, gamma, n_splits): + # Create a classifier: a support vector classifier with "rbf" kernel + svm = SVC(kernel="rbf") + + # Perform nested cross validation + # Set the parameters by cross-validation + tuned_parameters = {"kernel": kernel, "C": C, "gamma": gamma} + n_splits = n_splits + + # configure the cross-validation procedure + cv_outer = KFold(n_splits=10, shuffle=True, random_state=1) + + # enumerate splits + outer_results = list() + outer_results_2 = list() + outer_results_3 = list() + outer_results_4 = list() + for train_ix, test_ix in cv_outer.split(X): + # split data + X_train, X_test = X[train_ix, :], X[test_ix, :] + y_train, y_test = y[train_ix], y[test_ix] + # configure the cross-validation procedure + cv_inner = KFold(n_splits=n_splits, shuffle=True, random_state=1) + # define search space + search = GridSearchCV(svm, tuned_parameters, scoring="accuracy", cv=cv_inner, refit=True) + search_2 = GridSearchCV(svm, tuned_parameters, scoring="precision", cv=cv_inner, refit=True) + search_3 = GridSearchCV(svm, tuned_parameters, scoring="recall", cv=cv_inner, refit=True) + search_4 = GridSearchCV(svm, tuned_parameters, scoring="f1", cv=cv_inner, refit=True) + # execute search + result = search.fit(X_train, y_train) + result_2 = search_2.fit(X_train, y_train) + result_3 = search_3.fit(X_train, y_train) + result_4 = search_4.fit(X_train, y_train) + # get the best performing model fit on the whole training set + best_model = result.best_estimator_ + best_model_2 = result_2.best_estimator_ + best_model_3 = result_3.best_estimator_ + best_model_4 = result_4.best_estimator_ + # evaluate model on the hold out dataset + yhat = best_model.predict(X_test) + yhat_2 = best_model_2.predict(X_test) + yhat_3 = best_model_3.predict(X_test) + yhat_4 = best_model_4.predict(X_test) + # evaluate the model + acc = accuracy_score(y_test, yhat) + acc_2 = precision_score(y_test, yhat_2) + acc_3 = recall_score(y_test, yhat_3) + acc_4 = f1_score(y_test, yhat_4) + # store the result + outer_results.append(acc) + outer_results_2.append(acc_2) + outer_results_3.append(acc_3) + outer_results_4.append(acc_4) + # report progress + print(">%s %.3f" % (acc, mean(outer_results))) + print(">%s %.3f" % (acc_2, mean(outer_results_2))) + print(">%s %.3f" % (acc_3, mean(outer_results_3))) + print(">%s %.3f" % (acc_4, mean(outer_results_4))) + + # summarize the estimated performance of the model + print("Accuracy: %.3f (%.3f)" % (mean(outer_results), std(outer_results))) + print("Precision: %.3f (%.3f)" % (mean(outer_results_2), std(outer_results_2))) + print("Recall: %.3f (%.3f)" % (mean(outer_results_3), std(outer_results_3))) + print("F1: %.3f (%.3f)" % (mean(outer_results_4), std(outer_results_4))) + + return "Nested cross validation was complete without errors - check the printed results" + + +from scipy.stats import permutation_test + +# Permutation test +Eyes_closed_PTSD = np.transpose(Eyes_closed_PTSD[1:]) +Eyes_closed_CTRL = np.transpose(Eyes_closed_CTRL[1:]) +Eyes_open_PTSD = np.transpose(Eyes_open_PSTD[1:]) +Eyes_open_CTRL = np.transpose(Eyes_open_CTRL[1:]) + +def statistic(x, y, axis): + return np.mean(x, axis=axis) - np.mean(y, axis=axis) + +def Permutation_test(X, Y, Z, W): + # Variable for results + results_Eyes_Closed = [] + p_values_Eyes_Closed = [] + for i in range(len(X[0])): + res = permutation_test((X[:,i], Y[:,i]), statistic, permutation_type = 'independent', vectorized=True, n_resamples=np.inf, alternative='less') + results_Eyes_Closed.append(res.statistic) + p_values_Eyes_Closed(res.pvalue) + results_Eyes_Open = [] + p_values_Eyes_Open = [] + for i in range(len(Z[0])): + res = permutation_test((Z[:,i], W[:,i]), statistic, permutation_type = 'independent', vectorized=True, n_resamples=np.inf, alternative='less') + results_Eyes_Open.append(res.statistic) + p_values_Eyes_Open.append(res.pvalue) + return results_Eyes_Closed, p_values_Eyes_Closed, results_Eyes_Open, p_values_Eyes_Open diff --git a/Microstate_n_cluster_test_results.npy b/Microstate_n_cluster_test_results.npy new file mode 100644 index 0000000000000000000000000000000000000000..93124599f1d906816b04c49b307cbe74100beeb6 Binary files /dev/null and b/Microstate_n_cluster_test_results.npy differ diff --git a/PreprocessedData/1_preprocessed-epo.fif b/PreprocessedData/1_preprocessed-epo.fif new file mode 100644 index 0000000000000000000000000000000000000000..e39bba7a8b6b4eb30afa611ca69b9e2e84bf6cb2 Binary files /dev/null and b/PreprocessedData/1_preprocessed-epo.fif differ diff --git a/PreprocessedData/2_preprocessed-epo.fif b/PreprocessedData/2_preprocessed-epo.fif new file mode 100644 index 0000000000000000000000000000000000000000..b27ff089b52deb4c01e91b55169094a31beba77e Binary files /dev/null and b/PreprocessedData/2_preprocessed-epo.fif differ diff --git a/Preprocessing/bad_ch.pkl b/Preprocessing/bad_ch.pkl new file mode 100644 index 0000000000000000000000000000000000000000..cb11972e6939316c1528ad5749e5ac0af2028c79 Binary files /dev/null and b/Preprocessing/bad_ch.pkl differ diff --git a/Preprocessing/dropped_epochs.pkl b/Preprocessing/dropped_epochs.pkl new file mode 100644 index 0000000000000000000000000000000000000000..72a7009be8ab31a9c1439b75aa0c825c40c3769d Binary files /dev/null and b/Preprocessing/dropped_epochs.pkl differ diff --git a/__pycache__/Preamble.cpython-310.pyc b/__pycache__/Preamble.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..696c247a4cf74f8aa174bc61fa5eb7cb3fcad907 Binary files /dev/null and b/__pycache__/Preamble.cpython-310.pyc differ diff --git a/__pycache__/eeg_microstates3.cpython-310.pyc b/__pycache__/eeg_microstates3.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e6ab8e955a07d094f80dfcb7e608c83ede141781 Binary files /dev/null and b/__pycache__/eeg_microstates3.cpython-310.pyc differ diff --git a/__pycache__/feature_select.cpython-310.pyc b/__pycache__/feature_select.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..855de3ce368628fbaa1525ebb9c50a56b96b9d3b Binary files /dev/null and b/__pycache__/feature_select.cpython-310.pyc differ diff --git a/eeg_microstates b/eeg_microstates new file mode 160000 index 0000000000000000000000000000000000000000..0cca43fcc02772cb5171b48b5878f66303e0c903 --- /dev/null +++ b/eeg_microstates @@ -0,0 +1 @@ +Subproject commit 0cca43fcc02772cb5171b48b5878f66303e0c903