Skip to content
Snippets Groups Projects
Preamble.py 10.2 KiB
Newer Older
  • Learn to ignore specific revisions
  • glia's avatar
    glia committed
    # -*- coding: utf-8 -*-
    """
    Updated Oct 18 2022
    
    @author: Qianliang Li (glia@dtu.dk)
    
    This preamble contains the code to load all the relevant libraries
    Refer to the requirements.txt for the specific versions used
    """
    
    # Libraries
    import os
    import sys
    import re
    import warnings
    import time
    import pickle
    import concurrent.futures # for multiprocessing
    
    import numpy as np # Arrays and mathematical computations
    import matplotlib.pyplot as plt # Plotting
    import mne # EEG library
    import scipy # Signal processing
    import sklearn # Machine learning
    import nitime # Time series analysis
    import nolds # DFA exponent
    import statsmodels # multipletest
    import pysparcl # Sparse Kmeans
    import fooof # Peak Alpha Freq and 1/f exponents
    import pandas as pd # Dataframes
    import seaborn as sns # Plotting library
    import autoreject # Automatic EEG artifact detection
    import mlxtend # Sequential Forward Selection
    
    from mne.time_frequency import psd_multitaper
    from mne.preprocessing import (ICA, create_eog_epochs, create_ecg_epochs, corrmap)
    from mne.stats import spatio_temporal_cluster_test, permutation_cluster_test
    from mne.channels import find_ch_adjacency
    from mne.connectivity import spectral_connectivity
    
    import nitime.analysis as nta
    import nitime.timeseries as nts
    import nitime.utils as ntsu
    from nitime.viz import drawmatrix_channels, drawmatrix_channels_modified
    
    from sklearn import preprocessing
    from sklearn import manifold
    from sklearn.svm import LinearSVC, SVC
    from sklearn.decomposition import PCA
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.tree import plot_tree
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.linear_model import LogisticRegression, Ridge, LassoCV, RidgeCV, LogisticRegressionCV
    from sklearn.model_selection import StratifiedKFold, GridSearchCV, StratifiedGroupKFold
    from sklearn.pipeline import Pipeline
    from sklearn.feature_selection import RFECV
    from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, make_scorer
    
    import matplotlib.gridspec as gridspec
    from matplotlib import cm
    
    from statsmodels.tsa.stattools import adfuller
    from statsmodels.formula.api import mixedlm
    
    from autoreject import AutoReject
    
    from mlxtend.evaluate import permutation_test
    from mlxtend.feature_selection import SequentialFeatureSelector as SFS
    from mlxtend.plotting import plot_sequential_feature_selection as plot_sfs
    
    from tqdm import tqdm # progress bars
    from mayavi import mlab # Plotting with MNE
    from mpl_toolkits.mplot3d import Axes3D # registers 3D projections
    
    # Non-library scripts
    # EEG microstate package by von Wegner & Lauf, 2018
    
    glia's avatar
    glia committed
    from eeg_microstates import * # downloaded from https://github.com/Frederic-vW/eeg_microstates
    
    glia's avatar
    glia committed
    # minimum Redundancy Maximum Relevance script by Kiran Karra
    from feature_select import * # downloaded from https://github.com/stochasticresearch/featureselect/blob/master/python/feature_select.py
    
    plt.style.use('ggplot') # plotting style
    
    ### Some of the functions in the libraries were modified by defining
    # modified functions in the respective .py files in the different libraries
    
    # Modified Kmeans in eeg_microstates
    # Modified T_empirical in eeg_microstates
    # Modified sparcl cluster_permute
    
    # # For eeg_microstates.py
    # def kmeans_return_all(data, n_maps, n_runs=10, maxerr=1e-6, maxiter=500):
    #     """Modified K-means clustering as detailed in:
    #     [1] Pascual-Marqui et al., IEEE TBME (1995) 42(7):658--665
    #     [2] Murray et al., Brain Topography(2008) 20:249--264.
    #     Variables named as in [1], step numbering as in Table I.
    
    #     Args:
    #         data: numpy.array, size = number of EEG channels
    #         n_maps: number of microstate maps
    #         n_runs: number of K-means runs (optional)
    #         maxerr: maximum error for convergence (optional)
    #         maxiter: maximum number of iterations (optional)
    #         doplot: plot the results, default=False (optional)
    #     Returns:
    #         maps: microstate maps (number of maps x number of channels)
    #         L: sequence of microstate labels
    #         gfp_peaks: indices of local GFP maxima
    #         gev: global explained variance (0..1)
    #         cv: value of the cross-validation criterion
    #     """
    #     n_t = data.shape[0]
    #     n_ch = data.shape[1]
    #     data = data - data.mean(axis=1, keepdims=True)
    
    #     # GFP peaks
    #     gfp = np.std(data, axis=1)
    #     gfp_peaks = locmax(gfp)
    #     gfp_values = gfp[gfp_peaks]
    #     gfp2 = np.sum(gfp_values**2) # normalizing constant in GEV
    #     n_gfp = gfp_peaks.shape[0]
    
    #     # clustering of GFP peak maps only
    #     V = data[gfp_peaks, :]
    #     sumV2 = np.sum(V**2)
    
    #     # store results for each k-means run
    #     cv_list =   []  # cross-validation criterion for each k-means run
    #     gev_list =  []  # GEV of each map for each k-means run
    #     gevT_list = []  # total GEV values for each k-means run
    #     maps_list = []  # microstate maps for each k-means run
    #     L_list =    []  # microstate label sequence for each k-means run
    #     for run in range(n_runs):
    #         # initialize random cluster centroids (indices w.r.t. n_gfp)
    #         rndi = np.random.permutation(n_gfp)[:n_maps]
    #         maps = V[rndi, :]
    #         # normalize row-wise (across EEG channels)
    #         maps /= np.sqrt(np.sum(maps**2, axis=1, keepdims=True))
    #         # initialize
    #         n_iter = 0
    #         var0 = 1.0
    #         var1 = 0.0
    #         # convergence criterion: variance estimate (step 6)
    #         while ( (np.abs((var0-var1)/var0) > maxerr) & (n_iter < maxiter) ):
    #             # (step 3) microstate sequence (= current cluster assignment)
    #             C = np.dot(V, maps.T)
    #             C /= (n_ch*np.outer(gfp[gfp_peaks], np.std(maps, axis=1)))
    #             L = np.argmax(C**2, axis=1)
    #             # (step 4)
    #             for k in range(n_maps):
    #                 Vt = V[L==k, :]
    #                 # (step 4a)
    #                 Sk = np.dot(Vt.T, Vt)
    #                 # (step 4b)
    #                 evals, evecs = np.linalg.eig(Sk)
    #                 v = evecs[:, np.argmax(np.abs(evals))]
    #                 maps[k, :] = v/np.sqrt(np.sum(v**2))
    #             # (step 5)
    #             var1 = var0
    #             var0 = sumV2 - np.sum(np.sum(maps[L, :]*V, axis=1)**2)
    #             var0 /= (n_gfp*(n_ch-1))
    #             n_iter += 1
    #         if (n_iter < maxiter):
    #             print("\t\tK-means run {:d}/{:d} converged after {:d} iterations.".format(run+1, n_runs, n_iter))
    #         else:
    #             print("\t\tK-means run {:d}/{:d} did NOT converge after {:d} iterations.".format(run+1, n_runs, maxiter))
    
    #         # CROSS-VALIDATION criterion for this run (step 8)
    #         C_ = np.dot(data, maps.T)
    #         C_ /= (n_ch*np.outer(gfp, np.std(maps, axis=1)))
    #         L_ = np.argmax(C_**2, axis=1)
    #         var = np.sum(data**2) - np.sum(np.sum(maps[L_, :]*data, axis=1)**2)
    #         var /= (n_t*(n_ch-1))
    #         cv = var * (n_ch-1)**2/(n_ch-n_maps-1.)**2
    
    #         # GEV (global explained variance) of cluster k
    #         gev = np.zeros(n_maps)
    #         for k in range(n_maps):
    #             r = L==k
    #             gev[k] = np.sum(gfp_values[r]**2 * C[r,k]**2)/gfp2
    #         gev_total = np.sum(gev)
    
    #         # store
    #         cv_list.append(cv)
    #         gev_list.append(gev)
    #         gevT_list.append(gev_total)
    #         maps_list.append(maps)
    #         L_list.append(L_)
    
    #     # select best run
    #     k_opt = np.argmin(cv_list)
    #     #k_opt = np.argmax(gevT_list)
    #     maps = maps_list[k_opt]
    #     # ms_gfp = ms_list[k_opt] # microstate sequence at GFP peaks
    #     gev = gev_list[k_opt]
    #     L_ = L_list[k_opt]
    #     # lowest cv criterion
    #     cv_min = np.min(cv_list)
    
    #     return maps, L_, gfp_peaks, gev, cv_min
    
    # # For eeg_microstates.py
    # def T_empirical(data, n_clusters, gap_idx = []):
    #     """Modified empirical transition matrix to take gap_idx argument
    
    #     Args:
    #         data: numpy.array, size = length of microstate sequence
    #         n_clusters: number of microstate clusters
    #         gap_idx: index for gaps in data which should be excluded in T
    #     Returns:
    #         T: empirical transition matrix
    #     """
    #     T = np.zeros((n_clusters, n_clusters))
    #     n = len(data)
    #     for i in range(n-1):
    #         # Do not count transitions between gaps in data
    #         if i in gap_idx:
    #             continue
    #         else:
    #             T[data[i], data[i+1]] += 1.0
    #     p_row = np.sum(T, axis=1)
    #     for i in range(n_clusters):
    #         if ( p_row[i] != 0.0 ):
    #             for j in range(n_clusters):
    #                 T[i,j] /= p_row[i]  # normalize row sums to 1.0
    #     return T    
    
    # # From sparcl.cluster.permute
    # def permute_modified(x, k=None, nperms=25, wbounds=None, nvals=10, centers=None,
    #             verbose=False):
    #     # I added sdgaps output
    #     n, p = x.shape
    #     if k is None and centers is None:
    #         raise ValueError('k and centers are None.')
    #     if k is not None and centers is not None:
    #         if centers.shape[0] != k or centers.shape[1] != p:
    #             raise ValueError('Invalid shape of centers.')
    #     if wbounds is None:
    #         wbounds = np.exp(
    #             np.linspace(np.log(1.2), np.log(np.sqrt(p) * 0.9), nvals))
    #     if wbounds.min() <= 1 or len(wbounds) < 2:
    #         raise ValueError('len(wbounds) and each wbound must be > 1.')
    
    #     permx = np.zeros((nperms, n, p))
    #     nnonzerows = None
    #     for i in range(nperms):
    #         for j in range(p):
    #             permx[i, :, j] = np.random.permutation(x[:, j])
    #     tots = None
    #     out = kmeans(x, k, wbounds, centers=centers, verbose=verbose)
    
    #     for i in range(len(out)):
    #         nnonzerows = utils._cbind(nnonzerows, np.sum(out[i]['ws'] != 0))
    #         bcss = subfunc._get_wcss(x, out[i]['cs'])[1]
    #         tots = utils._cbind(tots, np.sum(out[i]['ws'] * bcss))
    #     permtots = np.zeros((len(wbounds), nperms))
    #     for i in range(nperms):
    #         perm_out = kmeans(
    #             permx[i], k, wbounds, centers=centers, verbose=verbose)
    #         for j in range(len(perm_out)):
    #             perm_bcss = subfunc._get_wcss(permx[i], perm_out[j]['cs'])[1]
    #             permtots[j, i] = np.sum(perm_out[j]['ws'] * perm_bcss)
        
    #     sdgaps = np.std(np.log(permtots),axis=1)
    #     gaps = np.log(tots) - np.log(permtots).mean(axis=1)
    #     bestw = wbounds[gaps.argmax()]
    #     out = {'bestw': bestw, 'gaps': gaps, 'sdgaps': sdgaps, 'wbounds': wbounds,
    #            'nnonzerows': nnonzerows}
    #     return out