Skip to content
Snippets Groups Projects
Commit 1ad2894b authored by s200431's avatar s200431
Browse files

added local changes for preamble and preprocessing

parent 7a92dabc
Branches
No related tags found
No related merge requests found
......@@ -33,9 +33,10 @@ The script was written in Spyder. The outline panel can be used to navigate
the different parts easier (Default shortcut: Ctrl + Shift + O)
"""
# Set working directory
import os
wkdir = "/home/glia/EEG"
wkdir = "/Users/benj3542/Desktop/Uni/Noter/Semester_6/Bachelor/resting-state-eeg-analysis/"
os.chdir(wkdir)
# Load all libraries from the Preamble
......@@ -57,7 +58,7 @@ Subject_id = [0] * len(files)
for i in range(len(files)):
temp = files[i].split("\\")
temp = temp[-1].split("_")
Subject_id[i] = int(temp[0])
Subject_id[i] = int(temp[0][-1]) # Subject_id[i] = int(temp[0])
n_subjects = len(Subject_id)
......@@ -151,9 +152,15 @@ def power_band_estimation(n):
res = np.concatenate([abs_power_values,rel_power_values],axis=0)
return n, res
"""
with concurrent.futures.ProcessPoolExecutor() as executor:
for n, result in executor.map(power_band_estimation, range(len(final_epochs))): # Function and arguments
power_bands[n] = result
"""
for i in range(len(power_bands)):
n, results = power_band_estimation(i)
power_bands[i] = results
# Combine all data into one dataframe
# First the columns are prepared
......@@ -216,10 +223,11 @@ power_df = pd.DataFrame(data = {"Subject_ID": [ele for ele in Subject_id for i i
# Fix Freq_band categorical order
power_df["Freq_band"] = power_df["Freq_band"].astype("category").\
cat.reorder_categories(list(Freq_Bands.keys()), ordered=True)
"""
# Fix Brain_region categorical order
power_df["Brain_region"] = power_df["Brain_region"].astype("category").\
cat.reorder_categories(Brain_region_labels, ordered=True)
"""
# Save the dataframe
power_df.to_pickle(os.path.join(Feature_savepath,"Power_df.pkl"))
......@@ -234,16 +242,29 @@ n_eye_status = len(eye_status)
# Subset frontal absolute power
power_df_sub1 = power_df[(power_df["Quant_status"] == "Absolute")&
(power_df["Brain_region"] == "Frontal")]
# Subset frontal, midline absolute power
power_df_sub2 = power_df[(power_df["Quant_status"] == "Absolute")&
(power_df["Brain_region"] == "Frontal")&
(power_df["Brain_side"] == "Mid")]
# Calculate average frontal power
# Calculate average frontal power theta
frontal_theta_mean_subject = power_df_sub1[power_df_sub1["Freq_band"] == "theta"].\
groupby(["Subject_ID","Group_status","Eye_status"]).mean().reset_index()
# Calculate average frontal power beta
frontal_beta_mean_subject = power_df_sub1[power_df_sub1["Freq_band"] == "beta"].\
groupby(["Subject_ID","Group_status","Eye_status"]).mean().reset_index()
# Calculate average frontal, midline power theta
frontal_midline_theta_mean_subject = power_df_sub2[power_df_sub2["Freq_band"] == "theta"].\
groupby(["Subject_ID","Group_status","Eye_status"]).mean().reset_index()
# Convert from dB to raw power
frontal_theta_mean_subject["PSD"] = 10**(frontal_theta_mean_subject["PSD"]/10)
frontal_beta_mean_subject["PSD"] = 10**(frontal_beta_mean_subject["PSD"]/10)
frontal_midline_theta_mean_subject["PSD"] = 10**(frontal_midline_theta_mean_subject["PSD"]/10)
# Calculate mean for each group and take ratio for whole group
# To confirm trend observed in PSD plots
......@@ -266,6 +287,7 @@ frontal_theta_beta_ratio.insert(3, "Measurement", dummy_variable )
frontal_theta_beta_ratio.to_pickle(os.path.join(Feature_savepath,"fTBR_df.pkl"))
"""
# %% Frequency bands asymmetry
# Defined as ln(right) - ln(left)
# Thus we should only work with the absolute values and undo the dB transformation
......@@ -553,7 +575,7 @@ OOF_data_df.insert(4, "Brain_region", list(Brain_region)*int(PAF_data_df.shape[0
# Save the dataframes
OOF_data_df.to_pickle(os.path.join(Feature_savepath,"OOF_data_FOOOF_df.pkl"))
"""
# %% Microstate analysis
# The function takes the data as a numpy array (n_t, n_ch)
# The data is already re-referenced to common average
......@@ -616,11 +638,11 @@ plt.ylabel("Normalized to total")
# The lower CV the better.
# But the higher GEV the better.
# Based on the plots and the recommendation by vong Wegner & Laufs 2018
# we used 4 microstates
# we used 5 microstates
# In order to compare between groups, I fix the microstates by clustering on data from both groups
# Due to instability of maps when running multiple times, I increased n_maps from 4 to 6
n_maps = 4
n_maps = 5
mode = ["aahc", "kmeans", "kmedoids", "pca", "ica"][1]
# K-means is stochastic, thus I run it multiple times in order to find the maps with highest GEV
......@@ -670,7 +692,7 @@ c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
print("Started", c_time1, "\nFinished",c_time2)
# Save the results
with open(Feature_savepath+"Microstate_4_maps_10x5_k_means_results.pkl", "wb") as file:
with open(Feature_savepath+"Microstate_5_maps_10x5_k_means_results.pkl", "wb") as file:
pickle.dump(microstate_cluster_results, file)
# # Load
......@@ -690,7 +712,7 @@ gev = [microstate_cluster_results[Best_EC_idx][3][0],microstate_cluster_results[
# Plot the maps
plt.style.use('default')
labels = ["EC", "EO"]
labels = ["EC", "EO"] #Eyes-closed, Eyes-open
for i in range(len(labels)):
fig, axarr = plt.subplots(1, n_maps, figsize=(20,5))
fig.patch.set_facecolor('white')
......@@ -703,8 +725,8 @@ for i in range(len(labels)):
# Due the random initiation of K-means this have to be modified every time clusters are made!
# Assign map labels (e.g. 0, 2, 1, 3)
order = [0]*2
order[0] = [3,0,1,2] # EC
order[1] = [3,1,0,2] # EO
order[0] = [3,0,1,2,4] # EC
order[1] = [3,1,0,2,4] # EO
for i in range(len(order)):
maps[i] = maps[i][order[i],:] # re-order maps
gev[i] = gev[i][order[i]] # re-order GEV
......@@ -716,13 +738,13 @@ for i in range(len(order)):
# Thus the sign of the map does not really reflect which areas are positive or negative (absolute)
# But more which areas are different during each state (relatively)
# I can therefore change the sign of the map for the visualizaiton
sign_swap = [[1,-1,1,1],[1,1,1,-1]]
sign_swap = [[1,-1,1,1,1],[1,1,1,-1,1]]
for i in range(len(order)):
for m in range(n_maps):
maps[i][m] *= sign_swap[i][m]
# Plot the maps and save
save_path = "/home/glia/Analysis/Figures/Microstates/"
save_path = "/Users/benj3542/Desktop/Uni/Noter/Semester_6/Bachelor/resting-state-eeg-analysis/Figures/Microstates"
labels = ["EC", "EO"]
for i in range(len(labels)):
fig, axarr = plt.subplots(1, n_maps, figsize=(20,5))
......
......@@ -25,14 +25,14 @@ import sklearn # Machine learning
import nitime # Time series analysis
import nolds # DFA exponent
import statsmodels # multipletest
import pysparcl # Sparse Kmeans
#import pysparcl # Sparse Kmeans
import fooof # Peak Alpha Freq and 1/f exponents
import pandas as pd # Dataframes
import seaborn as sns # Plotting library
import autoreject # Automatic EEG artifact detection
# import autoreject # Automatic EEG artifact detection
import mlxtend # Sequential Forward Selection
from mne.time_frequency import psd_multitaper
from mne.time_frequency import *
from mne.preprocessing import (ICA, create_eog_epochs, create_ecg_epochs, corrmap)
from mne.stats import spatio_temporal_cluster_test, permutation_cluster_test
from mne.channels import find_ch_adjacency
......@@ -41,7 +41,7 @@ from mne.connectivity import spectral_connectivity
import nitime.analysis as nta
import nitime.timeseries as nts
import nitime.utils as ntsu
from nitime.viz import drawmatrix_channels, drawmatrix_channels_modified
from nitime.viz import * # (drawmatrix_channels, drawmatrix_channels_modified)
from sklearn import preprocessing
from sklearn import manifold
......@@ -74,7 +74,7 @@ from mpl_toolkits.mplot3d import Axes3D # registers 3D projections
# Non-library scripts
# EEG microstate package by von Wegner & Lauf, 2018
from eeg_microstates import * # downloadeded from https://github.com/Frederic-vW/eeg_microstates
from eeg_microstates3 import * # downloadeded from https://github.com/Frederic-vW/eeg_microstates
# minimum Redundancy Maximum Relevance script by Kiran Karra
from feature_select import * # downloaded from https://github.com/stochasticresearch/featureselect/blob/master/python/feature_select.py
......@@ -88,115 +88,116 @@ plt.style.use('ggplot') # plotting style
# Modified sparcl cluster_permute
# # For eeg_microstates.py
# def kmeans_return_all(data, n_maps, n_runs=10, maxerr=1e-6, maxiter=500):
# """Modified K-means clustering as detailed in:
# [1] Pascual-Marqui et al., IEEE TBME (1995) 42(7):658--665
# [2] Murray et al., Brain Topography(2008) 20:249--264.
# Variables named as in [1], step numbering as in Table I.
# Args:
# data: numpy.array, size = number of EEG channels
# n_maps: number of microstate maps
# n_runs: number of K-means runs (optional)
# maxerr: maximum error for convergence (optional)
# maxiter: maximum number of iterations (optional)
# doplot: plot the results, default=False (optional)
# Returns:
# maps: microstate maps (number of maps x number of channels)
# L: sequence of microstate labels
# gfp_peaks: indices of local GFP maxima
# gev: global explained variance (0..1)
# cv: value of the cross-validation criterion
# """
# n_t = data.shape[0]
# n_ch = data.shape[1]
# data = data - data.mean(axis=1, keepdims=True)
# # GFP peaks
# gfp = np.std(data, axis=1)
# gfp_peaks = locmax(gfp)
# gfp_values = gfp[gfp_peaks]
# gfp2 = np.sum(gfp_values**2) # normalizing constant in GEV
# n_gfp = gfp_peaks.shape[0]
# # clustering of GFP peak maps only
# V = data[gfp_peaks, :]
# sumV2 = np.sum(V**2)
# # store results for each k-means run
# cv_list = [] # cross-validation criterion for each k-means run
# gev_list = [] # GEV of each map for each k-means run
# gevT_list = [] # total GEV values for each k-means run
# maps_list = [] # microstate maps for each k-means run
# L_list = [] # microstate label sequence for each k-means run
# for run in range(n_runs):
# # initialize random cluster centroids (indices w.r.t. n_gfp)
# rndi = np.random.permutation(n_gfp)[:n_maps]
# maps = V[rndi, :]
# # normalize row-wise (across EEG channels)
# maps /= np.sqrt(np.sum(maps**2, axis=1, keepdims=True))
# # initialize
# n_iter = 0
# var0 = 1.0
# var1 = 0.0
# # convergence criterion: variance estimate (step 6)
# while ( (np.abs((var0-var1)/var0) > maxerr) & (n_iter < maxiter) ):
# # (step 3) microstate sequence (= current cluster assignment)
# C = np.dot(V, maps.T)
# C /= (n_ch*np.outer(gfp[gfp_peaks], np.std(maps, axis=1)))
# L = np.argmax(C**2, axis=1)
# # (step 4)
# for k in range(n_maps):
# Vt = V[L==k, :]
# # (step 4a)
# Sk = np.dot(Vt.T, Vt)
# # (step 4b)
# evals, evecs = np.linalg.eig(Sk)
# v = evecs[:, np.argmax(np.abs(evals))]
# maps[k, :] = v/np.sqrt(np.sum(v**2))
# # (step 5)
# var1 = var0
# var0 = sumV2 - np.sum(np.sum(maps[L, :]*V, axis=1)**2)
# var0 /= (n_gfp*(n_ch-1))
# n_iter += 1
# if (n_iter < maxiter):
# print("\t\tK-means run {:d}/{:d} converged after {:d} iterations.".format(run+1, n_runs, n_iter))
# else:
# print("\t\tK-means run {:d}/{:d} did NOT converge after {:d} iterations.".format(run+1, n_runs, maxiter))
# # CROSS-VALIDATION criterion for this run (step 8)
# C_ = np.dot(data, maps.T)
# C_ /= (n_ch*np.outer(gfp, np.std(maps, axis=1)))
# L_ = np.argmax(C_**2, axis=1)
# var = np.sum(data**2) - np.sum(np.sum(maps[L_, :]*data, axis=1)**2)
# var /= (n_t*(n_ch-1))
# cv = var * (n_ch-1)**2/(n_ch-n_maps-1.)**2
# # GEV (global explained variance) of cluster k
# gev = np.zeros(n_maps)
# for k in range(n_maps):
# r = L==k
# gev[k] = np.sum(gfp_values[r]**2 * C[r,k]**2)/gfp2
# gev_total = np.sum(gev)
# # store
# cv_list.append(cv)
# gev_list.append(gev)
# gevT_list.append(gev_total)
# maps_list.append(maps)
# L_list.append(L_)
# # select best run
# k_opt = np.argmin(cv_list)
# #k_opt = np.argmax(gevT_list)
# maps = maps_list[k_opt]
# # ms_gfp = ms_list[k_opt] # microstate sequence at GFP peaks
# gev = gev_list[k_opt]
# L_ = L_list[k_opt]
# # lowest cv criterion
# cv_min = np.min(cv_list)
# return maps, L_, gfp_peaks, gev, cv_min
def kmeans_return_all(data, n_maps, n_runs=10, maxerr=1e-6, maxiter=500):
"""
Modified K-means clustering as detailed in:
[1] Pascual-Marqui et al., IEEE TBME (1995) 42(7):658--665
[2] Murray et al., Brain Topography(2008) 20:249--264.
Variables named as in [1], step numbering as in Table I.
Args:
data: numpy.array, size = number of EEG channels
n_maps: number of microstate maps
n_runs: number of K-means runs (optional)
maxerr: maximum error for convergence (optional)
maxiter: maximum number of iterations (optional)
doplot: plot the results, default=False (optional)
Returns:
maps: microstate maps (number of maps x number of channels)
L: sequence of microstate labels
gfp_peaks: indices of local GFP maxima
gev: global explained variance (0..1)
cv: value of the cross-validation criterion
"""
n_t = data.shape[0]
n_ch = data.shape[1]
data = data - data.mean(axis=1, keepdims=True)
# GFP peaks
gfp = np.std(data, axis=1)
gfp_peaks = locmax(gfp)
gfp_values = gfp[gfp_peaks]
gfp2 = np.sum(gfp_values**2) # normalizing constant in GEV
n_gfp = gfp_peaks.shape[0]
# clustering of GFP peak maps only
V = data[gfp_peaks, :]
sumV2 = np.sum(V**2)
# store results for each k-means run
cv_list = [] # cross-validation criterion for each k-means run
gev_list = [] # GEV of each map for each k-means run
gevT_list = [] # total GEV values for each k-means run
maps_list = [] # microstate maps for each k-means run
L_list = [] # microstate label sequence for each k-means run
for run in range(n_runs):
# initialize random cluster centroids (indices w.r.t. n_gfp)
rndi = np.random.permutation(n_gfp)[:n_maps]
maps = V[rndi, :]
# normalize row-wise (across EEG channels)
maps /= np.sqrt(np.sum(maps**2, axis=1, keepdims=True))
# initialize
n_iter = 0
var0 = 1.0
var1 = 0.0
# convergence criterion: variance estimate (step 6)
while ( (np.abs((var0-var1)/var0) > maxerr) & (n_iter < maxiter) ):
# (step 3) microstate sequence (= current cluster assignment)
C = np.dot(V, maps.T)
C /= (n_ch*np.outer(gfp[gfp_peaks], np.std(maps, axis=1)))
L = np.argmax(C**2, axis=1)
# (step 4)
for k in range(n_maps):
Vt = V[L==k, :]
# (step 4a)
Sk = np.dot(Vt.T, Vt)
# (step 4b)
evals, evecs = np.linalg.eig(Sk)
v = evecs[:, np.argmax(np.abs(evals))]
maps[k, :] = v/np.sqrt(np.sum(v**2))
# (step 5)
var1 = var0
var0 = sumV2 - np.sum(np.sum(maps[L, :]*V, axis=1)**2)
var0 /= (n_gfp*(n_ch-1))
n_iter += 1
if (n_iter < maxiter):
print("\t\tK-means run {:d}/{:d} converged after {:d} iterations.".format(run+1, n_runs, n_iter))
else:
print("\t\tK-means run {:d}/{:d} did NOT converge after {:d} iterations.".format(run+1, n_runs, maxiter))
# CROSS-VALIDATION criterion for this run (step 8)
C_ = np.dot(data, maps.T)
C_ /= (n_ch*np.outer(gfp, np.std(maps, axis=1)))
L_ = np.argmax(C_**2, axis=1)
var = np.sum(data**2) - np.sum(np.sum(maps[L_, :]*data, axis=1)**2)
var /= (n_t*(n_ch-1))
cv = var * (n_ch-1)**2/(n_ch-n_maps-1.)**2
# GEV (global explained variance) of cluster k
gev = np.zeros(n_maps)
for k in range(n_maps):
r = L==k
gev[k] = np.sum(gfp_values[r]**2 * C[r,k]**2)/gfp2
gev_total = np.sum(gev)
# store
cv_list.append(cv)
gev_list.append(gev)
gevT_list.append(gev_total)
maps_list.append(maps)
L_list.append(L_)
# select best run
k_opt = np.argmin(cv_list)
#k_opt = np.argmax(gevT_list)
maps = maps_list[k_opt]
# ms_gfp = ms_list[k_opt] # microstate sequence at GFP peaks
gev = gev_list[k_opt]
L_ = L_list[k_opt]
# lowest cv criterion
cv_min = np.min(cv_list)
return maps, L_, gfp_peaks, gev, cv_min
# # For eeg_microstates.py
# def T_empirical(data, n_clusters, gap_idx = []):
......
......@@ -25,7 +25,7 @@ Link to the demonstration data: www.bci2000.org
# Set working directory
import os
wkdir = "/home/glia/EEG"
wkdir = "/Users/benj3542/Desktop/Uni/Noter/Semester_6/Bachelor/resting-state-eeg-analysis/"
os.chdir(wkdir)
# Load all libraries from the Preamble
......@@ -300,7 +300,8 @@ for i in range(len(corrected_epochs)):
reject_log0[e] = ar0[e].get_reject_log(corrected_epochs[i][ee])
# Plot and save Autorejected epochs
fig = reject_log0[e].plot(orientation="horizontal", show=False)
fig.savefig(os.path.join(save_path,"AR_" + str(Subject_id_concat[i]) + "_" + str(ee) + ".png"))
#fig.savefig(os.path.join(save_path,"AR_" + str(Subject_id_concat[i]) + "_" + str(ee) + ".png"))
fig.savefig(os.path.join(save_path,"AR_" + str(e) + "_" + str(ee) + ".png"))
# Close figure window
plt.close(fig)
# Save mean peak-to-peak voltage threshold used
......
This diff is collapsed.
#!/usr/bin/env python
'''
This file contains functions that enable MRMR based Feature Selection.
See the paper: Feature Selection Based on Mutual Information: Criteria of Max-Dependency, Max-Relevance,
and Min-Redundancy by H. Peng, F. Long, and C. Ding
Author: Kiran Karra [Virginia Tech]
<kiran.karra@gmail.com, kiran.karra@vt.edu>
Distribution Statement A: Approved for Public Release, Distribution Unlimited
'''
import os
import shutil
import json
from tempfile import mkdtemp
from tqdm import tqdm
from scipy.stats import randint as sp_randint
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import FeatureUnion, Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_curve
# from sklearn.externals import * #joblib
# from sklearn.externals.joblib import Memory
import joblib
from joblib import Memory
from sklearn.metrics import matthews_corrcoef
import gplearn
import gplearn.genetic
import gplearn.fitness
import numpy as np
from joblib import Parallel, delayed
from joblib import load, dump
# from identity_transformer import IdentityTransformer # wasent used
# import depmeas # couldnt find depmeas
def generic_combined_scorer(x1,o1,ii_1,x2,o2,ii_2,y,h):
s1 = h(x1,y)
s2 = h(x2,y)
o1[ii_1] = s1
o2[ii_2] = s2
"""
def feature_select(X,y,num_features_to_select=None,K_MAX=1000,estimator=depmeas.mi_tau,n_jobs=-1,verbose=True):
'''
Implements the MRMR algorithm for feature-selection: http://ieeexplore.ieee.org/document/1453511/
Inputs:
X - A feature-matrix, of shape (N,D) where N is the number of samples and D is the number
of features
y - A vector of shape (N,1) which represents the output. Each index in y is assumed to
correspond to the row with the same index in X.
num_features_to_select - the number of features to select from the provided X matrix. If None
are provided, then all the features that are available are ranked/ordered.
(default: None)
K_MAX - the maximum number of top-scoring features to consider.
estimator - a function handle to an estimator of association (that theoretically should
follow the DPI assumptions)
n_jobs - the numer of processes to use with parallel processing in the background
verbose - if True, show progress
Output:
a vector of indices sorted in descending order, where each index represents the "importance"
of the feature, as computed by the MRMR algorithm.
'''
num_dim = X.shape[1]
if(num_features_to_select is not None):
num_selected_features = min(num_dim,num_features_to_select)
else:
num_selected_features = num_dim
K_MAX_internal = min(num_dim,K_MAX)
initial_scores = Parallel(n_jobs=n_jobs)(delayed(estimator)(X[:,ii],y) for ii in range(num_dim))
# rank the scores in descending order
sorted_scores_idxs = np.flipud(np.argsort(initial_scores))
# subset the data down so that joblib doesn't have to
# transport large matrices to its workers
X_subset = X[:,sorted_scores_idxs[0:K_MAX_internal]]
# memory map this for parallelization speed
tmp_folder = mkdtemp()
# TODO: why is X_subset crashing when we increase K_MAX_in? Investigate in detail, but
# for now, do not use memory mapping for X_subset for stability
# X_subset_fname = os.path.join(tmp_folder, 'X_subset')
# dump(X_subset, X_subset_fname)
# X_subset = load(X_subset_fname, mmap_mode='r')
selected_feature_idxs = np.zeros(num_selected_features,dtype=int)
remaining_candidate_idxs = list(range(1,K_MAX_internal))
# mi_matrix = np.empty((K_MAX_internal,num_selected_features-1))
# mi_matrix[:] = np.nan
relevance_vec_fname = os.path.join(tmp_folder, 'relevance_vec')
feature_redundance_vec_fname = os.path.join(tmp_folder, 'feature_redundance_vec')
mi_matrix_fname = os.path.join(tmp_folder, 'mi_matrix')
relevance_vec = np.memmap(relevance_vec_fname, dtype=float,
shape=(K_MAX_internal,), mode='w+')
feature_redundance_vec = np.memmap(feature_redundance_vec_fname, dtype=float,
shape=(K_MAX_internal,), mode='w+')
mi_matrix = np.memmap(mi_matrix_fname, dtype=float,
shape=(K_MAX_internal,num_selected_features-1), mode='w+')
mi_matrix[:] = np.nan
# TODO: investigate whether its worth it to parallelize the nested for-loop?
with tqdm(total=num_selected_features,desc='Selecting Features ...',disable=(not verbose)) as pbar:
pbar.update(1)
for k in range(1,num_selected_features):
ncand = len(remaining_candidate_idxs)
last_selected_feature = k-1
Parallel(n_jobs=n_jobs)(delayed(generic_combined_scorer)(y,relevance_vec,ii,
X_subset[:,selected_feature_idxs[last_selected_feature]],
feature_redundance_vec,ii,X_subset[:,ii],
estimator)
for ii in remaining_candidate_idxs)
# copy the redundance into the mi_matrix, which accumulates our redundance as we compute
mi_matrix[remaining_candidate_idxs,last_selected_feature] = feature_redundance_vec[remaining_candidate_idxs]
redundance_vec = np.nanmean(mi_matrix[remaining_candidate_idxs,:], axis=1)
tmp_idx = np.argmax(relevance_vec[remaining_candidate_idxs]-redundance_vec)
selected_feature_idxs[k] = remaining_candidate_idxs[tmp_idx]
del remaining_candidate_idxs[tmp_idx]
pbar.update(1)
# map the selected features back to the original dimensions
selected_feature_idxs = sorted_scores_idxs[selected_feature_idxs]
# clean up
try:
shutil.rmtree(tmp_folder)
except:
pass
return selected_feature_idxs
"""
\ No newline at end of file
......@@ -6,12 +6,12 @@ nitime==0.8.1
nolds==0.5.2
statsmodels==0.11.1
spm1d==0.4.2
pysparcl==1.4.1
# pysparcl==1.4.1
fooof==1.0.0
pandas==1.0.3
seaborn==0.10.1
pingouin==0.3.11
sklearn==0.24.2
# sklearn==0.24.2 # installed newer version due to python 3.10 limitation
autoreject==0.2.1
mlxtend==0.17.0
mayavi==4.7.1
\ No newline at end of file
# mayavi==4.7.1 # installed 4.8.1
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment