Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# -*- coding: utf-8 -*-
"""
Updated Oct 18 2022
@author: Qianliang Li (glia@dtu.dk)
This preamble contains the code to load all the relevant libraries
Refer to the requirements.txt for the specific versions used
"""
# Libraries
import os
import sys
import re
import warnings
import time
import pickle
import concurrent.futures # for multiprocessing
import numpy as np # Arrays and mathematical computations
import matplotlib.pyplot as plt # Plotting
import mne # EEG library
import scipy # Signal processing
import sklearn # Machine learning
import nitime # Time series analysis
import nolds # DFA exponent
import statsmodels # multipletest
import pysparcl # Sparse Kmeans
import fooof # Peak Alpha Freq and 1/f exponents
import pandas as pd # Dataframes
import seaborn as sns # Plotting library
import autoreject # Automatic EEG artifact detection
import mlxtend # Sequential Forward Selection
from mne.time_frequency import psd_multitaper
from mne.preprocessing import (ICA, create_eog_epochs, create_ecg_epochs, corrmap)
from mne.stats import spatio_temporal_cluster_test, permutation_cluster_test
from mne.channels import find_ch_adjacency
from mne.connectivity import spectral_connectivity
import nitime.analysis as nta
import nitime.timeseries as nts
import nitime.utils as ntsu
from nitime.viz import drawmatrix_channels, drawmatrix_channels_modified
from sklearn import preprocessing
from sklearn import manifold
from sklearn.svm import LinearSVC, SVC
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import plot_tree
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression, Ridge, LassoCV, RidgeCV, LogisticRegressionCV
from sklearn.model_selection import StratifiedKFold, GridSearchCV, StratifiedGroupKFold
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import RFECV
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, make_scorer
import matplotlib.gridspec as gridspec
from matplotlib import cm
from statsmodels.tsa.stattools import adfuller
from statsmodels.formula.api import mixedlm
from autoreject import AutoReject
from mlxtend.evaluate import permutation_test
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from mlxtend.plotting import plot_sequential_feature_selection as plot_sfs
from tqdm import tqdm # progress bars
from mayavi import mlab # Plotting with MNE
from mpl_toolkits.mplot3d import Axes3D # registers 3D projections
# Non-library scripts
# EEG microstate package by von Wegner & Lauf, 2018
from eeg_microstates import * # downloaded from https://github.com/Frederic-vW/eeg_microstates
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
# minimum Redundancy Maximum Relevance script by Kiran Karra
from feature_select import * # downloaded from https://github.com/stochasticresearch/featureselect/blob/master/python/feature_select.py
plt.style.use('ggplot') # plotting style
### Some of the functions in the libraries were modified by defining
# modified functions in the respective .py files in the different libraries
# Modified Kmeans in eeg_microstates
# Modified T_empirical in eeg_microstates
# Modified sparcl cluster_permute
# # For eeg_microstates.py
# def kmeans_return_all(data, n_maps, n_runs=10, maxerr=1e-6, maxiter=500):
# """Modified K-means clustering as detailed in:
# [1] Pascual-Marqui et al., IEEE TBME (1995) 42(7):658--665
# [2] Murray et al., Brain Topography(2008) 20:249--264.
# Variables named as in [1], step numbering as in Table I.
# Args:
# data: numpy.array, size = number of EEG channels
# n_maps: number of microstate maps
# n_runs: number of K-means runs (optional)
# maxerr: maximum error for convergence (optional)
# maxiter: maximum number of iterations (optional)
# doplot: plot the results, default=False (optional)
# Returns:
# maps: microstate maps (number of maps x number of channels)
# L: sequence of microstate labels
# gfp_peaks: indices of local GFP maxima
# gev: global explained variance (0..1)
# cv: value of the cross-validation criterion
# """
# n_t = data.shape[0]
# n_ch = data.shape[1]
# data = data - data.mean(axis=1, keepdims=True)
# # GFP peaks
# gfp = np.std(data, axis=1)
# gfp_peaks = locmax(gfp)
# gfp_values = gfp[gfp_peaks]
# gfp2 = np.sum(gfp_values**2) # normalizing constant in GEV
# n_gfp = gfp_peaks.shape[0]
# # clustering of GFP peak maps only
# V = data[gfp_peaks, :]
# sumV2 = np.sum(V**2)
# # store results for each k-means run
# cv_list = [] # cross-validation criterion for each k-means run
# gev_list = [] # GEV of each map for each k-means run
# gevT_list = [] # total GEV values for each k-means run
# maps_list = [] # microstate maps for each k-means run
# L_list = [] # microstate label sequence for each k-means run
# for run in range(n_runs):
# # initialize random cluster centroids (indices w.r.t. n_gfp)
# rndi = np.random.permutation(n_gfp)[:n_maps]
# maps = V[rndi, :]
# # normalize row-wise (across EEG channels)
# maps /= np.sqrt(np.sum(maps**2, axis=1, keepdims=True))
# # initialize
# n_iter = 0
# var0 = 1.0
# var1 = 0.0
# # convergence criterion: variance estimate (step 6)
# while ( (np.abs((var0-var1)/var0) > maxerr) & (n_iter < maxiter) ):
# # (step 3) microstate sequence (= current cluster assignment)
# C = np.dot(V, maps.T)
# C /= (n_ch*np.outer(gfp[gfp_peaks], np.std(maps, axis=1)))
# L = np.argmax(C**2, axis=1)
# # (step 4)
# for k in range(n_maps):
# Vt = V[L==k, :]
# # (step 4a)
# Sk = np.dot(Vt.T, Vt)
# # (step 4b)
# evals, evecs = np.linalg.eig(Sk)
# v = evecs[:, np.argmax(np.abs(evals))]
# maps[k, :] = v/np.sqrt(np.sum(v**2))
# # (step 5)
# var1 = var0
# var0 = sumV2 - np.sum(np.sum(maps[L, :]*V, axis=1)**2)
# var0 /= (n_gfp*(n_ch-1))
# n_iter += 1
# if (n_iter < maxiter):
# print("\t\tK-means run {:d}/{:d} converged after {:d} iterations.".format(run+1, n_runs, n_iter))
# else:
# print("\t\tK-means run {:d}/{:d} did NOT converge after {:d} iterations.".format(run+1, n_runs, maxiter))
# # CROSS-VALIDATION criterion for this run (step 8)
# C_ = np.dot(data, maps.T)
# C_ /= (n_ch*np.outer(gfp, np.std(maps, axis=1)))
# L_ = np.argmax(C_**2, axis=1)
# var = np.sum(data**2) - np.sum(np.sum(maps[L_, :]*data, axis=1)**2)
# var /= (n_t*(n_ch-1))
# cv = var * (n_ch-1)**2/(n_ch-n_maps-1.)**2
# # GEV (global explained variance) of cluster k
# gev = np.zeros(n_maps)
# for k in range(n_maps):
# r = L==k
# gev[k] = np.sum(gfp_values[r]**2 * C[r,k]**2)/gfp2
# gev_total = np.sum(gev)
# # store
# cv_list.append(cv)
# gev_list.append(gev)
# gevT_list.append(gev_total)
# maps_list.append(maps)
# L_list.append(L_)
# # select best run
# k_opt = np.argmin(cv_list)
# #k_opt = np.argmax(gevT_list)
# maps = maps_list[k_opt]
# # ms_gfp = ms_list[k_opt] # microstate sequence at GFP peaks
# gev = gev_list[k_opt]
# L_ = L_list[k_opt]
# # lowest cv criterion
# cv_min = np.min(cv_list)
# return maps, L_, gfp_peaks, gev, cv_min
# # For eeg_microstates.py
# def T_empirical(data, n_clusters, gap_idx = []):
# """Modified empirical transition matrix to take gap_idx argument
# Args:
# data: numpy.array, size = length of microstate sequence
# n_clusters: number of microstate clusters
# gap_idx: index for gaps in data which should be excluded in T
# Returns:
# T: empirical transition matrix
# """
# T = np.zeros((n_clusters, n_clusters))
# n = len(data)
# for i in range(n-1):
# # Do not count transitions between gaps in data
# if i in gap_idx:
# continue
# else:
# T[data[i], data[i+1]] += 1.0
# p_row = np.sum(T, axis=1)
# for i in range(n_clusters):
# if ( p_row[i] != 0.0 ):
# for j in range(n_clusters):
# T[i,j] /= p_row[i] # normalize row sums to 1.0
# return T
# # From sparcl.cluster.permute
# def permute_modified(x, k=None, nperms=25, wbounds=None, nvals=10, centers=None,
# verbose=False):
# # I added sdgaps output
# n, p = x.shape
# if k is None and centers is None:
# raise ValueError('k and centers are None.')
# if k is not None and centers is not None:
# if centers.shape[0] != k or centers.shape[1] != p:
# raise ValueError('Invalid shape of centers.')
# if wbounds is None:
# wbounds = np.exp(
# np.linspace(np.log(1.2), np.log(np.sqrt(p) * 0.9), nvals))
# if wbounds.min() <= 1 or len(wbounds) < 2:
# raise ValueError('len(wbounds) and each wbound must be > 1.')
# permx = np.zeros((nperms, n, p))
# nnonzerows = None
# for i in range(nperms):
# for j in range(p):
# permx[i, :, j] = np.random.permutation(x[:, j])
# tots = None
# out = kmeans(x, k, wbounds, centers=centers, verbose=verbose)
# for i in range(len(out)):
# nnonzerows = utils._cbind(nnonzerows, np.sum(out[i]['ws'] != 0))
# bcss = subfunc._get_wcss(x, out[i]['cs'])[1]
# tots = utils._cbind(tots, np.sum(out[i]['ws'] * bcss))
# permtots = np.zeros((len(wbounds), nperms))
# for i in range(nperms):
# perm_out = kmeans(
# permx[i], k, wbounds, centers=centers, verbose=verbose)
# for j in range(len(perm_out)):
# perm_bcss = subfunc._get_wcss(permx[i], perm_out[j]['cs'])[1]
# permtots[j, i] = np.sum(perm_out[j]['ws'] * perm_bcss)
# sdgaps = np.std(np.log(permtots),axis=1)
# gaps = np.log(tots) - np.log(permtots).mean(axis=1)
# bestw = wbounds[gaps.argmax()]
# out = {'bestw': bestw, 'gaps': gaps, 'sdgaps': sdgaps, 'wbounds': wbounds,
# 'nnonzerows': nnonzerows}
# return out