Preamble with libraries

ae4cc071 · glia · 24dba169 · ae4cc071
Commit ae4cc071 authored Oct 24, 2022 by glia
--- a/Preamble.py
+++ b/Preamble.py
+# -*- coding: utf-8 -*-
+"""
+Updated Oct 18 2022
+
+@author: Qianliang Li (glia@dtu.dk)
+
+This preamble contains the code to load all the relevant libraries
+Refer to the requirements.txt for the specific versions used
+"""
+
+# Libraries
+import os
+import sys
+import re
+import warnings
+import time
+import pickle
+import concurrent.futures # for multiprocessing
+
+import numpy as np # Arrays and mathematical computations
+import matplotlib.pyplot as plt # Plotting
+import mne # EEG library
+import scipy # Signal processing
+import sklearn # Machine learning
+import nitime # Time series analysis
+import nolds # DFA exponent
+import statsmodels # multipletest
+import pysparcl # Sparse Kmeans
+import fooof # Peak Alpha Freq and 1/f exponents
+import pandas as pd # Dataframes
+import seaborn as sns # Plotting library
+import autoreject # Automatic EEG artifact detection
+import mlxtend # Sequential Forward Selection
+
+from mne.time_frequency import psd_multitaper
+from mne.preprocessing import (ICA, create_eog_epochs, create_ecg_epochs, corrmap)
+from mne.stats import spatio_temporal_cluster_test, permutation_cluster_test
+from mne.channels import find_ch_adjacency
+from mne.connectivity import spectral_connectivity
+
+import nitime.analysis as nta
+import nitime.timeseries as nts
+import nitime.utils as ntsu
+from nitime.viz import drawmatrix_channels, drawmatrix_channels_modified
+
+from sklearn import preprocessing
+from sklearn import manifold
+from sklearn.svm import LinearSVC, SVC
+from sklearn.decomposition import PCA
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.tree import plot_tree
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.linear_model import LogisticRegression, Ridge, LassoCV, RidgeCV, LogisticRegressionCV
+from sklearn.model_selection import StratifiedKFold, GridSearchCV, StratifiedGroupKFold
+from sklearn.pipeline import Pipeline
+from sklearn.feature_selection import RFECV
+from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, make_scorer
+
+import matplotlib.gridspec as gridspec
+from matplotlib import cm
+
+from statsmodels.tsa.stattools import adfuller
+from statsmodels.formula.api import mixedlm
+
+from autoreject import AutoReject
+
+from mlxtend.evaluate import permutation_test
+from mlxtend.feature_selection import SequentialFeatureSelector as SFS
+from mlxtend.plotting import plot_sequential_feature_selection as plot_sfs
+
+from tqdm import tqdm # progress bars
+from mayavi import mlab # Plotting with MNE
+from mpl_toolkits.mplot3d import Axes3D # registers 3D projections
+
+# Non-library scripts
+# EEG microstate package by von Wegner & Lauf, 2018
+from eeg_microstates import * # downloadeded from https://github.com/Frederic-vW/eeg_microstates
+# minimum Redundancy Maximum Relevance script by Kiran Karra
+from feature_select import * # downloaded from https://github.com/stochasticresearch/featureselect/blob/master/python/feature_select.py
+
+plt.style.use('ggplot') # plotting style
+
+### Some of the functions in the libraries were modified by defining
+# modified functions in the respective .py files in the different libraries
+
+# Modified Kmeans in eeg_microstates
+# Modified T_empirical in eeg_microstates
+# Modified sparcl cluster_permute
+
+# # For eeg_microstates.py
+# def kmeans_return_all(data, n_maps, n_runs=10, maxerr=1e-6, maxiter=500):
+#     """Modified K-means clustering as detailed in:
+#     [1] Pascual-Marqui et al., IEEE TBME (1995) 42(7):658--665
+#     [2] Murray et al., Brain Topography(2008) 20:249--264.
+#     Variables named as in [1], step numbering as in Table I.
+
+#     Args:
+#         data: numpy.array, size = number of EEG channels
+#         n_maps: number of microstate maps
+#         n_runs: number of K-means runs (optional)
+#         maxerr: maximum error for convergence (optional)
+#         maxiter: maximum number of iterations (optional)
+#         doplot: plot the results, default=False (optional)
+#     Returns:
+#         maps: microstate maps (number of maps x number of channels)
+#         L: sequence of microstate labels
+#         gfp_peaks: indices of local GFP maxima
+#         gev: global explained variance (0..1)
+#         cv: value of the cross-validation criterion
+#     """
+#     n_t = data.shape[0]
+#     n_ch = data.shape[1]
+#     data = data - data.mean(axis=1, keepdims=True)
+
+#     # GFP peaks
+#     gfp = np.std(data, axis=1)
+#     gfp_peaks = locmax(gfp)
+#     gfp_values = gfp[gfp_peaks]
+#     gfp2 = np.sum(gfp_values**2) # normalizing constant in GEV
+#     n_gfp = gfp_peaks.shape[0]
+
+#     # clustering of GFP peak maps only
+#     V = data[gfp_peaks, :]
+#     sumV2 = np.sum(V**2)
+
+#     # store results for each k-means run
+#     cv_list =   []  # cross-validation criterion for each k-means run
+#     gev_list =  []  # GEV of each map for each k-means run
+#     gevT_list = []  # total GEV values for each k-means run
+#     maps_list = []  # microstate maps for each k-means run
+#     L_list =    []  # microstate label sequence for each k-means run
+#     for run in range(n_runs):
+#         # initialize random cluster centroids (indices w.r.t. n_gfp)
+#         rndi = np.random.permutation(n_gfp)[:n_maps]
+#         maps = V[rndi, :]
+#         # normalize row-wise (across EEG channels)
+#         maps /= np.sqrt(np.sum(maps**2, axis=1, keepdims=True))
+#         # initialize
+#         n_iter = 0
+#         var0 = 1.0
+#         var1 = 0.0
+#         # convergence criterion: variance estimate (step 6)
+#         while ( (np.abs((var0-var1)/var0) > maxerr) & (n_iter < maxiter) ):
+#             # (step 3) microstate sequence (= current cluster assignment)
+#             C = np.dot(V, maps.T)
+#             C /= (n_ch*np.outer(gfp[gfp_peaks], np.std(maps, axis=1)))
+#             L = np.argmax(C**2, axis=1)
+#             # (step 4)
+#             for k in range(n_maps):
+#                 Vt = V[L==k, :]
+#                 # (step 4a)
+#                 Sk = np.dot(Vt.T, Vt)
+#                 # (step 4b)
+#                 evals, evecs = np.linalg.eig(Sk)
+#                 v = evecs[:, np.argmax(np.abs(evals))]
+#                 maps[k, :] = v/np.sqrt(np.sum(v**2))
+#             # (step 5)
+#             var1 = var0
+#             var0 = sumV2 - np.sum(np.sum(maps[L, :]*V, axis=1)**2)
+#             var0 /= (n_gfp*(n_ch-1))
+#             n_iter += 1
+#         if (n_iter < maxiter):
+#             print("\t\tK-means run {:d}/{:d} converged after {:d} iterations.".format(run+1, n_runs, n_iter))
+#         else:
+#             print("\t\tK-means run {:d}/{:d} did NOT converge after {:d} iterations.".format(run+1, n_runs, maxiter))
+
+#         # CROSS-VALIDATION criterion for this run (step 8)
+#         C_ = np.dot(data, maps.T)
+#         C_ /= (n_ch*np.outer(gfp, np.std(maps, axis=1)))
+#         L_ = np.argmax(C_**2, axis=1)
+#         var = np.sum(data**2) - np.sum(np.sum(maps[L_, :]*data, axis=1)**2)
+#         var /= (n_t*(n_ch-1))
+#         cv = var * (n_ch-1)**2/(n_ch-n_maps-1.)**2
+
+#         # GEV (global explained variance) of cluster k
+#         gev = np.zeros(n_maps)
+#         for k in range(n_maps):
+#             r = L==k
+#             gev[k] = np.sum(gfp_values[r]**2 * C[r,k]**2)/gfp2
+#         gev_total = np.sum(gev)
+
+#         # store
+#         cv_list.append(cv)
+#         gev_list.append(gev)
+#         gevT_list.append(gev_total)
+#         maps_list.append(maps)
+#         L_list.append(L_)
+
+#     # select best run
+#     k_opt = np.argmin(cv_list)
+#     #k_opt = np.argmax(gevT_list)
+#     maps = maps_list[k_opt]
+#     # ms_gfp = ms_list[k_opt] # microstate sequence at GFP peaks
+#     gev = gev_list[k_opt]
+#     L_ = L_list[k_opt]
+#     # lowest cv criterion
+#     cv_min = np.min(cv_list)
+
+#     return maps, L_, gfp_peaks, gev, cv_min
+
+# # For eeg_microstates.py
+# def T_empirical(data, n_clusters, gap_idx = []):
+#     """Modified empirical transition matrix to take gap_idx argument
+
+#     Args:
+#         data: numpy.array, size = length of microstate sequence
+#         n_clusters: number of microstate clusters
+#         gap_idx: index for gaps in data which should be excluded in T
+#     Returns:
+#         T: empirical transition matrix
+#     """
+#     T = np.zeros((n_clusters, n_clusters))
+#     n = len(data)
+#     for i in range(n-1):
+#         # Do not count transitions between gaps in data
+#         if i in gap_idx:
+#             continue
+#         else:
+#             T[data[i], data[i+1]] += 1.0
+#     p_row = np.sum(T, axis=1)
+#     for i in range(n_clusters):
+#         if ( p_row[i] != 0.0 ):
+#             for j in range(n_clusters):
+#                 T[i,j] /= p_row[i]  # normalize row sums to 1.0
+#     return T    
+
+# # From sparcl.cluster.permute
+# def permute_modified(x, k=None, nperms=25, wbounds=None, nvals=10, centers=None,
+#             verbose=False):
+#     # I added sdgaps output
+#     n, p = x.shape
+#     if k is None and centers is None:
+#         raise ValueError('k and centers are None.')
+#     if k is not None and centers is not None:
+#         if centers.shape[0] != k or centers.shape[1] != p:
+#             raise ValueError('Invalid shape of centers.')
+#     if wbounds is None:
+#         wbounds = np.exp(
+#             np.linspace(np.log(1.2), np.log(np.sqrt(p) * 0.9), nvals))
+#     if wbounds.min() <= 1 or len(wbounds) < 2:
+#         raise ValueError('len(wbounds) and each wbound must be > 1.')
+
+#     permx = np.zeros((nperms, n, p))
+#     nnonzerows = None
+#     for i in range(nperms):
+#         for j in range(p):
+#             permx[i, :, j] = np.random.permutation(x[:, j])
+#     tots = None
+#     out = kmeans(x, k, wbounds, centers=centers, verbose=verbose)
+
+#     for i in range(len(out)):
+#         nnonzerows = utils._cbind(nnonzerows, np.sum(out[i]['ws'] != 0))
+#         bcss = subfunc._get_wcss(x, out[i]['cs'])[1]
+#         tots = utils._cbind(tots, np.sum(out[i]['ws'] * bcss))
+#     permtots = np.zeros((len(wbounds), nperms))
+#     for i in range(nperms):
+#         perm_out = kmeans(
+#             permx[i], k, wbounds, centers=centers, verbose=verbose)
+#         for j in range(len(perm_out)):
+#             perm_bcss = subfunc._get_wcss(permx[i], perm_out[j]['cs'])[1]
+#             permtots[j, i] = np.sum(perm_out[j]['ws'] * perm_bcss)
+    
+#     sdgaps = np.std(np.log(permtots),axis=1)
+#     gaps = np.log(tots) - np.log(permtots).mean(axis=1)
+#     bestw = wbounds[gaps.argmax()]
+#     out = {'bestw': bestw, 'gaps': gaps, 'sdgaps': sdgaps, 'wbounds': wbounds,
+#            'nnonzerows': nnonzerows}
+#     return out
\ No newline at end of file