Skip to content
Snippets Groups Projects
Commit 343784a6 authored by s200431's avatar s200431
Browse files

local changes to files for complete usage

parent dda79002
Branches
No related tags found
No related merge requests found
......@@ -43,7 +43,7 @@ os.chdir(wkdir)
from Preamble import *
# %% Load preprocessed epochs and questionnaire data
load_path = "home/s200431/PreprocessedData"
load_path = "/home/s200431/PreprocessedData"
# Get filenames
files = []
......@@ -297,7 +297,7 @@ Drop_epochs_df = Drop_epochs_df.sort_values(by=["Subject_ID"]).reset_index(drop=
### Load questionnaire data
# ISAF
qdf_ISAF7 = pd.read_csv("/data/raw/FOR_DTU/Questionnaires_for_DTU.csv",
sep=";", na_values=' ')
na_values=' ')
# Rename Subject_ID column
qdf_ISAF7.rename({"ID_number": "Subject_ID"}, axis=1, inplace=True)
# Sort Subject_id to match Subject_id for files
......@@ -363,8 +363,15 @@ final_qdf2 = final_qdf2.fillna(final_qdf2.mean())
final_qdf0.columns = final_qdf1.columns # fix colnames with t7
final_qdf = pd.concat([final_qdf0,final_qdf1,final_qdf2], ignore_index=True)
# Define folder for saving features
Feature_savepath = "./Features/"
Stat_savepath = "./Statistics/"
Model_savepath = "./Model/"
# Ensure all columns are integers
final_qdf = final_qdf.astype("int")
final_qdf.to_pickle(os.path.join(Feature_savepath,"final_qdf.pkl"))
# Define cases as >= 44 total PCL
# Type: numpy array with subject id
......@@ -409,10 +416,6 @@ test_d3 = qdf_base["Subject_ID"][qdf_base["Subject_ID"].isin(Subject_id)]
test_d4 = np.concatenate([test_d1.to_numpy(),test_d2.to_numpy(),test_d3.to_numpy()])
assert all(np.equal(Subject_id,test_d4))
# Define folder for saving features
Feature_savepath = "./Features/"
Stat_savepath = "./Statistics/"
Model_savepath = "./Model/"
# %% Power spectrum features
Freq_Bands = {"delta": [1.25, 4.0],
......@@ -472,6 +475,7 @@ with concurrent.futures.ProcessPoolExecutor() as executor:
for n, result in executor.map(power_band_estimation, range(len(final_epochs))): # Function and arguments
power_bands[n] = result
"""
"""
for i in range(len(power_bands)):
n, results = power_band_estimation(i)
......@@ -540,15 +544,15 @@ power_df = pd.DataFrame(data = {"Subject_ID": [ele for ele in Subject_id for i i
# Fix Freq_band categorical order
power_df["Freq_band"] = power_df["Freq_band"].astype("category").\
cat.reorder_categories(list(Freq_Bands.keys()), ordered=True)
"""
# Fix Brain_region categorical order
power_df["Brain_region"] = power_df["Brain_region"].astype("category").\
cat.reorder_categories(Brain_region_labels, ordered=True)
"""
# Save the dataframe
power_df.to_pickle(os.path.join(Feature_savepath,"Power_df.pkl"))
# power_df.to_pickle(os.path.join(Feature_savepath,"Power_df.pkl"))
"""
# %% Theta-beta ratio
# Frontal theta/beta ratio has been implicated in cognitive control of attention
power_df = pd.read_pickle(os.path.join(Feature_savepath,"Power_df.pkl"))
......@@ -629,7 +633,7 @@ frontal_theta_beta_ratio.insert(3, "Measurement", dummy_variable )
# frontal_theta_beta_ratio.to_pickle(os.path.join(Feature_savepath,"fTBR_df.pkl"))
"""
# %% Frequency bands asymmetry
# Defined as ln(right) - ln(left)
# Thus we should only work with the absolute values and undo the dB transformation
......@@ -729,7 +733,7 @@ assert asymmetry[i,e,f,r] == asymmetry_df.iloc[random_point]["Asymmetry_score"]
asymmetry_df.to_pickle(os.path.join(Feature_savepath,"asymmetry_df.pkl"))
"""
"""
# %% Using FOOOF
# Peak alpha frequency (PAF) and 1/f exponent (OOF)
# Using the FOOOF algorithm (Fitting oscillations and one over f)
......@@ -1004,7 +1008,7 @@ PTF_data_df.to_pickle(os.path.join(Feature_savepath,"PTF_data_FOOOF_df.pkl"))
PTF_data_df_global.to_pickle(os.path.join(Feature_savepath,"PTF_data_FOOOF_global_df.pkl"))
PBF_data_df.to_pickle(os.path.join(Feature_savepath,"PBF_data_FOOOF_df.pkl"))
PBF_data_df_global.to_pickle(os.path.join(Feature_savepath,"PBF_data_FOOOF_global_df.pkl"))
"""
# # Convert to Pandas dataframe (only keep exponent parameter for OOF)
# # The dimensions will each be a column with numbers and the last column will be the actual values
# ori = OOF_data[:,:,:,1]
......@@ -1073,6 +1077,7 @@ micro_data_all = np.vstack(micro_data)
# Determine the number of clusters
# I use a slightly modified kmeans function which returns the cv_min
"""
global_gev = []
cv_criterion = []
for n_maps in range(2,7):
......@@ -1093,6 +1098,7 @@ plt.plot(np.linspace(2,6,len(cv_criterion)),(cv_criterion/np.sum(cv_criterion)),
plt.plot(np.linspace(2,6,len(cv_criterion)),(global_gev/np.sum(global_gev)), label="GEV")
plt.legend()
plt.ylabel("Normalized to total")
"""
# The lower CV the better.
# But the higher GEV the better.
# Based on the plots and the recommendation by vong Wegner & Laufs 2018
......@@ -1117,6 +1123,11 @@ microstate_cluster_results = []
c_time1 = time.localtime()
c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
print(c_time1)
# Change datatype due to error with computational power in clustering
EC_down = np.array(EC_micro_data, dtype = object)
#EC_down = EC_down.astype('float32')
EO_down = np.array(EO_micro_data, dtype = object)
#EO_down = EO_down.astype('float32')
for r in range(n_run):
maps = [0]*2
......@@ -1126,7 +1137,7 @@ for r in range(n_run):
# Eyes closed
counter = 0
maps_, x_, gfp_peaks_, gev_ = clustering(
np.vstack(np.array(EC_micro_data)), sfreq, ch_names, locs, mode, n_maps, doplot=False) # doplot=True is bugged
np.vstack(EC_down), sfreq, ch_names, locs, mode, n_maps, doplot=False) # doplot=True is bugged
maps[counter] = maps_
m_labels[counter] = x_
gfp_peaks[counter] = gfp_peaks_
......@@ -1134,7 +1145,7 @@ for r in range(n_run):
counter += 1
# Eyes open
maps_, x_, gfp_peaks_, gev_ = clustering(
np.vstack(np.array(EO_micro_data)), sfreq, ch_names, locs, mode, n_maps, doplot=False) # doplot=True is bugged
np.vstack(EO_down), sfreq, ch_names, locs, mode, n_maps, doplot=False) # doplot=True is bugged
maps[counter] = maps_
m_labels[counter] = x_
gfp_peaks[counter] = gfp_peaks_
......
# Set working directory
import os
wkdir = "/Users/benj3542/Desktop/Uni/Noter/Semester_6/Bachelor/resting-state-eeg-analysis"
wkdir = "/home/s200431"
os.chdir(wkdir)
# Load all libraries from the Preamble
from Preamble import *
# Define paths to data
Feature_savepath = "./Features/"
### Load questionnaire data
# For the purposes of this demonstration I will make a dummy dataframe
# The original code imported csv files with questionnaire data and group status
final_qdf = pd.DataFrame({"Subject_ID":[1,2],
"Age":[23,26],
"Gender":[0,0],
"Group_status":[0,1],
"PCL_total":[33,56],
"Q1":[1.2, 2.3],
"Q2":[1.7, 1.5],
"Qn":[2.1,1.0]})
final_qdf = pd.read_pickle(Feature_savepath+"final_qdf.pkl")
# Define cases as >= 44 total PCL
# Type: numpy array with subject id
cases = np.array(final_qdf["Subject_ID"][final_qdf["PCL_total"]>=44])
n_groups = 2
Groups = ["CTRL", "PTSD"]
n_subjects = 2
# Define paths to data
Feature_savepath = "./Features/"
n_subjects = 203
# Make function to load EEG features
def load_features_df():
......@@ -171,12 +164,13 @@ def load_features_df():
Eyes_Cloased, Eyes_Open, Eyes_closed_PTSD, Eyes_closed_CTRL, Eyes_open_PSTD, Eyes_open_CTRL = load_features_df()
# Get the data ready for classification and other analysis (extract target and features)
Eyes_Cloased_y = Eyes_Cloased[0]
Eyes_Cloased_X = np.transpose(Eyes_Cloased[1:])
Eyes_Closed_y = Eyes_Cloased[0]
Eyes_Closed_X = np.transpose(Eyes_Cloased[1:])
Eyes_Open_y = Eyes_Open[0]
Eyes_Open_X = np.transpose(Eyes_Open[1:])
print(Eyes_Closed_X, Eyes_Closed_y, len(Eyes_Closed_X), len(Eyes_Closed_y))
# Do PCA
import numpy as np
......@@ -188,39 +182,41 @@ from sklearn import datasets
# unused but required import for doing 3d projections with matplotlib < 3.2
import mpl_toolkits.mplot3d # noqa: F401
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler
# import some data to play with
np.random.seed(5)
def PCA(X, y, n_components):
fig = plt.figure(1, figsize=(4, 3))
fig = plt.figure(1, figsize=(6, 6))
ax = fig.add_subplot(111, projection = "3d")
plt.clf()
ax = fig.add_subplot(111, projection="3d", elev=48, azim=134)
ax.set_position([0, 0, 0.95, 1])
plt.cla()
scaler = StandardScaler()
X = scaler.fit_transform(X)
pca = decomposition.PCA(n_components=n_components)
pca.fit(X)
X = pca.transform(X)
for name, label in [("CTRL", 0), ("PTSD", 1)]:
ax.text3D(
X[y == label, 0].mean(),
X[y == label, 1].mean(),
name,
horizontalalignment="center",
bbox=dict(alpha=0.5, edgecolor="w", facecolor="w"),
plt.scatter(
X[y == label, 0],
X[y == label, 1],
X[y == label, 2],
label = name,
alpha = 0.5,
)
# Reorder the labels to have colors matching the cluster results
y = np.choose(y, [1, 2, 0]).astype(float)
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.nipy_spectral, edgecolor="k")
ax.xaxis.set_ticklabels([])
ax.yaxis.set_ticklabels([])
ax.zaxis.set_ticklabels([])
#plt.legend(loc = "best", shadow = False, scatterpoints = 1)
ax.set_xlabel("PCA Component 1")
ax.set_ylabel("PCA Component 2")
ax.set_zlabel("PCA Component 3")
ax.set_title("PCA 3D")
ax.legend()
plt.show()
# Safe plot
......@@ -229,6 +225,8 @@ def PCA(X, y, n_components):
return "PCA was complete without errors - check the plot in your chosen path"
PCA(Eyes_Closed_X, Eyes_Closed_y, None)
PCA(Eyes_Open_X, Eyes_Open_y, None)
# PCA(Eyes_Cloased_X, Eyes_Cloased_y, 3)
# PCA(Eyes_Open_X, Eyes_Open_y, 3)
......@@ -258,18 +256,20 @@ from numpy import std
from sklearn.svm import SVC
# Input of kernel, c and gamma should be a list
C = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
gamma = ["scale", "auto"]
def Nested_cross_validation(X, y, kernel, C, gamma, n_splits):
def Nested_cross_validation(X, y, C, gamma, n_splits):
# Create a classifier: a support vector classifier with "rbf" kernel
svm = SVC(kernel="rbf")
# Perform nested cross validation
# Set the parameters by cross-validation
tuned_parameters = {"kernel": kernel, "C": C, "gamma": gamma}
tuned_parameters = {"C": C, "gamma": gamma}
n_splits = n_splits
# configure the cross-validation procedure
cv_outer = KFold(n_splits=10, shuffle=True, random_state=1)
cv_outer = KFold(n_splits=n_splits, shuffle=True, random_state=1)
# enumerate splits
outer_results = list()
......@@ -326,6 +326,8 @@ def Nested_cross_validation(X, y, kernel, C, gamma, n_splits):
return "Nested cross validation was complete without errors - check the printed results"
#Nested_cross_validation(Eyes_Closed_X, Eyes_Closed_y, C, gamma, 10)
#Nested_cross_validation(Eyes_Open_X, Eyes_Open_y, C, gamma, 10)
from scipy.stats import permutation_test
......
......@@ -63,7 +63,7 @@ from mlxtend.plotting import plot_sequential_feature_selection as plot_sfs
from nitime.viz import drawmatrix_channels
from eeg_microstates3 import * # import downloaded package for microstates [von Wegner & Lauf, 2018]
from eeg_microstates import * # import downloaded package for microstates [von Wegner & Lauf, 2018]
#from EEG_functions_toolbox import * # compilation of useful/modified functions
......@@ -1160,7 +1160,6 @@ for n in range(len(final_epochs)):
final_epochs[n].save(fname = os.path.join(save_path,str("{}_HELSE_corrected".format(Subject_id[n]) + "-epo.fif")),
overwrite=True, verbose=0)
"""
# %% Load Baseline data
data_path = "/data/sep2020/"
......@@ -1835,7 +1834,7 @@ len(list(All_drop_epoch_df2.loc[All_drop_epoch_df2["Number_drop_epoch"]>30,"Subj
# Manually check the dropped subjects
# final_epochs[83].plot(scalings=100e-6, n_channels=19, n_epochs=15)
"""
# %% Load the pre-processed epochs
load_path = "/home/s200431/PreprocessedData"
......
......@@ -77,7 +77,7 @@ from mpl_toolkits.mplot3d import Axes3D # registers 3D projections
# Non-library scripts
# EEG microstate package by von Wegner & Lauf, 2018
from eeg_microstates3 import * # downloadeded from https://github.com/Frederic-vW/eeg_microstates
from eeg_microstates import * # downloadeded from https://github.com/Frederic-vW/eeg_microstates
# minimum Redundancy Maximum Relevance script by Kiran Karra
from feature_select import * # downloaded from https://github.com/stochasticresearch/featureselect/blob/master/python/feature_select.py
......
No preview for this file type
File added
No preview for this file type
No preview for this file type
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment