local changes to files for complete usage

343784a6 · s200431 · dda79002 · 343784a6 · 343784a6 · 343784a6
Commit 343784a6 authored Mar 20, 2023 by s200431
--- a/FeatureEstimation.py
+++ b/FeatureEstimation.py
@@ -43,7 +43,7 @@ os.chdir(wkdir)
 from Preamble import *

 # %% Load preprocessed epochs and questionnaire data
-load_path = "home/s200431/PreprocessedData"
+load_path = "/home/s200431/PreprocessedData"

 # Get filenames
 files = []
@@ -297,7 +297,7 @@ Drop_epochs_df = Drop_epochs_df.sort_values(by=["Subject_ID"]).reset_index(drop=
 ### Load questionnaire data
 # ISAF
 qdf_ISAF7 = pd.read_csv("/data/raw/FOR_DTU/Questionnaires_for_DTU.csv",
-                  sep=";", na_values=' ')
+                   na_values=' ')
 # Rename Subject_ID column
 qdf_ISAF7.rename({"ID_number": "Subject_ID"}, axis=1, inplace=True)
 # Sort Subject_id to match Subject_id for files
@@ -363,8 +363,15 @@ final_qdf2 = final_qdf2.fillna(final_qdf2.mean())
 final_qdf0.columns = final_qdf1.columns # fix colnames with t7
 final_qdf = pd.concat([final_qdf0,final_qdf1,final_qdf2], ignore_index=True)

+# Define folder for saving features
+Feature_savepath = "./Features/"
+Stat_savepath = "./Statistics/"
+Model_savepath = "./Model/"
+
 # Ensure all columns are integers
 final_qdf = final_qdf.astype("int")
+final_qdf.to_pickle(os.path.join(Feature_savepath,"final_qdf.pkl"))
+

 # Define cases as >= 44 total PCL
 # Type: numpy array with subject id
@@ -409,10 +416,6 @@ test_d3 = qdf_base["Subject_ID"][qdf_base["Subject_ID"].isin(Subject_id)]
 test_d4 = np.concatenate([test_d1.to_numpy(),test_d2.to_numpy(),test_d3.to_numpy()])
 assert all(np.equal(Subject_id,test_d4))

-# Define folder for saving features
-Feature_savepath = "./Features/"
-Stat_savepath = "./Statistics/"
-Model_savepath = "./Model/"

 # %% Power spectrum features
 Freq_Bands = {"delta": [1.25, 4.0],
@@ -472,6 +475,7 @@ with concurrent.futures.ProcessPoolExecutor() as executor:
    for n, result in executor.map(power_band_estimation, range(len(final_epochs))): # Function and arguments
        power_bands[n] = result
 """
+"""

 for i in range(len(power_bands)):
    n, results = power_band_estimation(i)
@@ -540,15 +544,15 @@ power_df = pd.DataFrame(data = {"Subject_ID": [ele for ele in Subject_id for i i
 # Fix Freq_band categorical order
 power_df["Freq_band"] = power_df["Freq_band"].astype("category").\
            cat.reorder_categories(list(Freq_Bands.keys()), ordered=True)
-"""
+
 # Fix Brain_region categorical order
 power_df["Brain_region"] = power_df["Brain_region"].astype("category").\
            cat.reorder_categories(Brain_region_labels, ordered=True)
 """
 # Save the dataframe
-power_df.to_pickle(os.path.join(Feature_savepath,"Power_df.pkl"))
-
+# power_df.to_pickle(os.path.join(Feature_savepath,"Power_df.pkl"))

+"""
 # %% Theta-beta ratio
 # Frontal theta/beta ratio has been implicated in cognitive control of attention
 power_df = pd.read_pickle(os.path.join(Feature_savepath,"Power_df.pkl"))
@@ -629,7 +633,7 @@ frontal_theta_beta_ratio.insert(3, "Measurement", dummy_variable )

 # frontal_theta_beta_ratio.to_pickle(os.path.join(Feature_savepath,"fTBR_df.pkl"))

-"""
+
 # %% Frequency bands asymmetry
 # Defined as ln(right) - ln(left)
 # Thus we should only work with the absolute values and undo the dB transformation
@@ -729,7 +733,7 @@ assert asymmetry[i,e,f,r] == asymmetry_df.iloc[random_point]["Asymmetry_score"]
 asymmetry_df.to_pickle(os.path.join(Feature_savepath,"asymmetry_df.pkl"))

 """
-
+"""
 # %% Using FOOOF
 # Peak alpha frequency (PAF) and 1/f exponent (OOF)
 # Using the FOOOF algorithm (Fitting oscillations and one over f)
@@ -1004,7 +1008,7 @@ PTF_data_df.to_pickle(os.path.join(Feature_savepath,"PTF_data_FOOOF_df.pkl"))
 PTF_data_df_global.to_pickle(os.path.join(Feature_savepath,"PTF_data_FOOOF_global_df.pkl"))
 PBF_data_df.to_pickle(os.path.join(Feature_savepath,"PBF_data_FOOOF_df.pkl"))
 PBF_data_df_global.to_pickle(os.path.join(Feature_savepath,"PBF_data_FOOOF_global_df.pkl"))
-
+"""
 # # Convert to Pandas dataframe (only keep exponent parameter for OOF)
 # # The dimensions will each be a column with numbers and the last column will be the actual values
 # ori = OOF_data[:,:,:,1]
@@ -1073,6 +1077,7 @@ micro_data_all = np.vstack(micro_data)

 # Determine the number of clusters
 # I use a slightly modified kmeans function which returns the cv_min
+"""
 global_gev = []
 cv_criterion = []
 for n_maps in range(2,7):
@@ -1093,6 +1098,7 @@ plt.plot(np.linspace(2,6,len(cv_criterion)),(cv_criterion/np.sum(cv_criterion)),
 plt.plot(np.linspace(2,6,len(cv_criterion)),(global_gev/np.sum(global_gev)), label="GEV")
 plt.legend()
 plt.ylabel("Normalized to total")
+"""
 # The lower CV the better.
 # But the higher GEV the better.
 # Based on the plots and the recommendation by vong Wegner & Laufs 2018
@@ -1117,6 +1123,11 @@ microstate_cluster_results = []
 c_time1 = time.localtime()
 c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
 print(c_time1)
+# Change datatype due to error with computational power in clustering 
+EC_down = np.array(EC_micro_data, dtype = object)
+#EC_down = EC_down.astype('float32')
+EO_down = np.array(EO_micro_data, dtype = object)
+#EO_down = EO_down.astype('float32')

 for r in range(n_run):
    maps = [0]*2
@@ -1126,7 +1137,7 @@ for r in range(n_run):
    # Eyes closed
    counter = 0
    maps_, x_, gfp_peaks_, gev_ = clustering(
-        np.vstack(np.array(EC_micro_data)), sfreq, ch_names, locs, mode, n_maps, doplot=False) # doplot=True is bugged
+        np.vstack(EC_down), sfreq, ch_names, locs, mode, n_maps, doplot=False) # doplot=True is bugged
    maps[counter] = maps_
    m_labels[counter] = x_
    gfp_peaks[counter] = gfp_peaks_
@@ -1134,7 +1145,7 @@ for r in range(n_run):
    counter += 1
    # Eyes open
    maps_, x_, gfp_peaks_, gev_ = clustering(
-        np.vstack(np.array(EO_micro_data)), sfreq, ch_names, locs, mode, n_maps, doplot=False) # doplot=True is bugged
+        np.vstack(EO_down), sfreq, ch_names, locs, mode, n_maps, doplot=False) # doplot=True is bugged
    maps[counter] = maps_
    m_labels[counter] = x_
    gfp_peaks[counter] = gfp_peaks_

--- a/MachineLearning_2.py
+++ b/MachineLearning_2.py
 # Set working directory
 import os
-wkdir = "/Users/benj3542/Desktop/Uni/Noter/Semester_6/Bachelor/resting-state-eeg-analysis"
+wkdir = "/home/s200431"
 os.chdir(wkdir)

 # Load all libraries from the Preamble
 from Preamble import *

+# Define paths to data
+Feature_savepath = "./Features/"
+
 ### Load questionnaire data
 # For the purposes of this demonstration I will make a dummy dataframe
 # The original code imported csv files with questionnaire data and group status
-final_qdf = pd.DataFrame({"Subject_ID":[1,2],
-                          "Age":[23,26],
-                          "Gender":[0,0],
-                          "Group_status":[0,1],
-                          "PCL_total":[33,56],
-                          "Q1":[1.2, 2.3],
-                          "Q2":[1.7, 1.5],
-                          "Qn":[2.1,1.0]})
+final_qdf = pd.read_pickle(Feature_savepath+"final_qdf.pkl")

 # Define cases as >= 44 total PCL
 # Type: numpy array with subject id
 cases = np.array(final_qdf["Subject_ID"][final_qdf["PCL_total"]>=44])
 n_groups = 2
 Groups = ["CTRL", "PTSD"]
-n_subjects = 2
-
-# Define paths to data
-Feature_savepath = "./Features/"
+n_subjects = 203

 # Make function to load EEG features
 def load_features_df():
@@ -171,12 +164,13 @@ def load_features_df():
 Eyes_Cloased, Eyes_Open, Eyes_closed_PTSD, Eyes_closed_CTRL, Eyes_open_PSTD, Eyes_open_CTRL = load_features_df()

 # Get the data ready for classification and other analysis (extract target and features)
-Eyes_Cloased_y = Eyes_Cloased[0]
-Eyes_Cloased_X = np.transpose(Eyes_Cloased[1:])
+Eyes_Closed_y = Eyes_Cloased[0]
+Eyes_Closed_X = np.transpose(Eyes_Cloased[1:])

 Eyes_Open_y = Eyes_Open[0]
 Eyes_Open_X = np.transpose(Eyes_Open[1:])

+print(Eyes_Closed_X, Eyes_Closed_y, len(Eyes_Closed_X), len(Eyes_Closed_y))

 # Do PCA 
 import numpy as np
@@ -188,39 +182,41 @@ from sklearn import datasets

 # unused but required import for doing 3d projections with matplotlib < 3.2
 import mpl_toolkits.mplot3d  # noqa: F401
+from mpl_toolkits.mplot3d import Axes3D
+from sklearn.preprocessing import StandardScaler

 # import some data to play with
 np.random.seed(5)

 def PCA(X, y, n_components):
-    fig = plt.figure(1, figsize=(4, 3))
+    fig = plt.figure(1, figsize=(6, 6))
+    ax = fig.add_subplot(111, projection = "3d")
    plt.clf()

-    ax = fig.add_subplot(111, projection="3d", elev=48, azim=134)
-    ax.set_position([0, 0, 0.95, 1])
-

    plt.cla()
+    scaler = StandardScaler()
+    X = scaler.fit_transform(X)
    pca = decomposition.PCA(n_components=n_components)
    pca.fit(X)
    X = pca.transform(X)

    for name, label in [("CTRL", 0), ("PTSD", 1)]:
-        ax.text3D(
-            X[y == label, 0].mean(),
-            X[y == label, 1].mean(),
-            name,
-            horizontalalignment="center",
-            bbox=dict(alpha=0.5, edgecolor="w", facecolor="w"),
+        plt.scatter(
+            X[y == label, 0],
+            X[y == label, 1],
+            X[y == label, 2],
+            label = name,
+            alpha = 0.5,
        )
    # Reorder the labels to have colors matching the cluster results
-    y = np.choose(y, [1, 2, 0]).astype(float)
-    ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.nipy_spectral, edgecolor="k")
-
-    ax.xaxis.set_ticklabels([])
-    ax.yaxis.set_ticklabels([])
-    ax.zaxis.set_ticklabels([])
+    #plt.legend(loc = "best", shadow = False, scatterpoints = 1)
+    ax.set_xlabel("PCA Component 1")
+    ax.set_ylabel("PCA Component 2")
+    ax.set_zlabel("PCA Component 3")
+    ax.set_title("PCA 3D")
    
+    ax.legend()
    plt.show()

    # Safe plot 
@@ -229,6 +225,8 @@ def PCA(X, y, n_components):

    return "PCA was complete without errors - check the plot in your chosen path"

+PCA(Eyes_Closed_X, Eyes_Closed_y, None)
+PCA(Eyes_Open_X, Eyes_Open_y, None)
 # PCA(Eyes_Cloased_X, Eyes_Cloased_y, 3)
 # PCA(Eyes_Open_X, Eyes_Open_y, 3)

@@ -258,18 +256,20 @@ from numpy import std
 from sklearn.svm import SVC

 # Input of kernel, c and gamma should be a list
+C = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
+gamma = ["scale", "auto"]

-def Nested_cross_validation(X, y, kernel, C, gamma, n_splits):
+def Nested_cross_validation(X, y, C, gamma, n_splits):
    # Create a classifier: a support vector classifier with "rbf" kernel
    svm = SVC(kernel="rbf")

    # Perform nested cross validation
    # Set the parameters by cross-validation
-    tuned_parameters = {"kernel": kernel, "C": C, "gamma": gamma}
+    tuned_parameters = {"C": C, "gamma": gamma}
    n_splits = n_splits

    # configure the cross-validation procedure
-    cv_outer = KFold(n_splits=10, shuffle=True, random_state=1)
+    cv_outer = KFold(n_splits=n_splits, shuffle=True, random_state=1)

    # enumerate splits
    outer_results = list()
@@ -326,6 +326,8 @@ def Nested_cross_validation(X, y, kernel, C, gamma, n_splits):

    return "Nested cross validation was complete without errors - check the printed results"

+#Nested_cross_validation(Eyes_Closed_X, Eyes_Closed_y, C, gamma, 10)
+#Nested_cross_validation(Eyes_Open_X, Eyes_Open_y, C, gamma, 10)

 from scipy.stats import permutation_test


--- a/Main.py
+++ b/Main.py
@@ -63,7 +63,7 @@ from mlxtend.plotting import plot_sequential_feature_selection as plot_sfs

 from nitime.viz import drawmatrix_channels

-from eeg_microstates3 import * # import downloaded package for microstates [von Wegner & Lauf, 2018]
+from eeg_microstates import * # import downloaded package for microstates [von Wegner & Lauf, 2018]

 #from EEG_functions_toolbox import * # compilation of useful/modified functions

@@ -1160,7 +1160,6 @@ for n in range(len(final_epochs)):
    final_epochs[n].save(fname = os.path.join(save_path,str("{}_HELSE_corrected".format(Subject_id[n]) + "-epo.fif")),
                    overwrite=True, verbose=0)

-"""
 # %% Load Baseline data
 data_path = "/data/sep2020/"

@@ -1835,7 +1834,7 @@ len(list(All_drop_epoch_df2.loc[All_drop_epoch_df2["Number_drop_epoch"]>30,"Subj

 # Manually check the dropped subjects
 # final_epochs[83].plot(scalings=100e-6, n_channels=19, n_epochs=15)
-
+"""
 # %% Load the pre-processed epochs
 load_path = "/home/s200431/PreprocessedData"


--- a/Preamble.py
+++ b/Preamble.py
@@ -77,7 +77,7 @@ from mpl_toolkits.mplot3d import Axes3D # registers 3D projections

 # Non-library scripts
 # EEG microstate package by von Wegner & Lauf, 2018
-from eeg_microstates3 import * # downloadeded from https://github.com/Frederic-vW/eeg_microstates
+from eeg_microstates import * # downloadeded from https://github.com/Frederic-vW/eeg_microstates
 # minimum Redundancy Maximum Relevance script by Kiran Karra
 from feature_select import * # downloaded from https://github.com/stochasticresearch/featureselect/blob/master/python/feature_select.py


--- a/__pycache__/Preamble.cpython-310.pyc
+++ b/__pycache__/Preamble.cpython-310.pyc
--- a/__pycache__/eeg_microstates.cpython-310.pyc
+++ b/__pycache__/eeg_microstates.cpython-310.pyc
--- a/__pycache__/eeg_microstates3.cpython-310.pyc
+++ b/__pycache__/eeg_microstates3.cpython-310.pyc
--- a/__pycache__/feature_select.cpython-310.pyc
+++ b/__pycache__/feature_select.cpython-310.pyc
--- a/eeg_microstates.py
+++ b/eeg_microstates.py