Skip to content
Snippets Groups Projects
Commit ae4cc071 authored by glia's avatar glia
Browse files

Preamble with libraries

parent 24dba169
No related branches found
No related tags found
No related merge requests found
# -*- coding: utf-8 -*-
"""
Updated Oct 18 2022
@author: Qianliang Li (glia@dtu.dk)
This preamble contains the code to load all the relevant libraries
Refer to the requirements.txt for the specific versions used
"""
# Libraries
import os
import sys
import re
import warnings
import time
import pickle
import concurrent.futures # for multiprocessing
import numpy as np # Arrays and mathematical computations
import matplotlib.pyplot as plt # Plotting
import mne # EEG library
import scipy # Signal processing
import sklearn # Machine learning
import nitime # Time series analysis
import nolds # DFA exponent
import statsmodels # multipletest
import pysparcl # Sparse Kmeans
import fooof # Peak Alpha Freq and 1/f exponents
import pandas as pd # Dataframes
import seaborn as sns # Plotting library
import autoreject # Automatic EEG artifact detection
import mlxtend # Sequential Forward Selection
from mne.time_frequency import psd_multitaper
from mne.preprocessing import (ICA, create_eog_epochs, create_ecg_epochs, corrmap)
from mne.stats import spatio_temporal_cluster_test, permutation_cluster_test
from mne.channels import find_ch_adjacency
from mne.connectivity import spectral_connectivity
import nitime.analysis as nta
import nitime.timeseries as nts
import nitime.utils as ntsu
from nitime.viz import drawmatrix_channels, drawmatrix_channels_modified
from sklearn import preprocessing
from sklearn import manifold
from sklearn.svm import LinearSVC, SVC
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import plot_tree
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression, Ridge, LassoCV, RidgeCV, LogisticRegressionCV
from sklearn.model_selection import StratifiedKFold, GridSearchCV, StratifiedGroupKFold
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import RFECV
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, make_scorer
import matplotlib.gridspec as gridspec
from matplotlib import cm
from statsmodels.tsa.stattools import adfuller
from statsmodels.formula.api import mixedlm
from autoreject import AutoReject
from mlxtend.evaluate import permutation_test
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from mlxtend.plotting import plot_sequential_feature_selection as plot_sfs
from tqdm import tqdm # progress bars
from mayavi import mlab # Plotting with MNE
from mpl_toolkits.mplot3d import Axes3D # registers 3D projections
# Non-library scripts
# EEG microstate package by von Wegner & Lauf, 2018
from eeg_microstates import * # downloadeded from https://github.com/Frederic-vW/eeg_microstates
# minimum Redundancy Maximum Relevance script by Kiran Karra
from feature_select import * # downloaded from https://github.com/stochasticresearch/featureselect/blob/master/python/feature_select.py
plt.style.use('ggplot') # plotting style
### Some of the functions in the libraries were modified by defining
# modified functions in the respective .py files in the different libraries
# Modified Kmeans in eeg_microstates
# Modified T_empirical in eeg_microstates
# Modified sparcl cluster_permute
# # For eeg_microstates.py
# def kmeans_return_all(data, n_maps, n_runs=10, maxerr=1e-6, maxiter=500):
# """Modified K-means clustering as detailed in:
# [1] Pascual-Marqui et al., IEEE TBME (1995) 42(7):658--665
# [2] Murray et al., Brain Topography(2008) 20:249--264.
# Variables named as in [1], step numbering as in Table I.
# Args:
# data: numpy.array, size = number of EEG channels
# n_maps: number of microstate maps
# n_runs: number of K-means runs (optional)
# maxerr: maximum error for convergence (optional)
# maxiter: maximum number of iterations (optional)
# doplot: plot the results, default=False (optional)
# Returns:
# maps: microstate maps (number of maps x number of channels)
# L: sequence of microstate labels
# gfp_peaks: indices of local GFP maxima
# gev: global explained variance (0..1)
# cv: value of the cross-validation criterion
# """
# n_t = data.shape[0]
# n_ch = data.shape[1]
# data = data - data.mean(axis=1, keepdims=True)
# # GFP peaks
# gfp = np.std(data, axis=1)
# gfp_peaks = locmax(gfp)
# gfp_values = gfp[gfp_peaks]
# gfp2 = np.sum(gfp_values**2) # normalizing constant in GEV
# n_gfp = gfp_peaks.shape[0]
# # clustering of GFP peak maps only
# V = data[gfp_peaks, :]
# sumV2 = np.sum(V**2)
# # store results for each k-means run
# cv_list = [] # cross-validation criterion for each k-means run
# gev_list = [] # GEV of each map for each k-means run
# gevT_list = [] # total GEV values for each k-means run
# maps_list = [] # microstate maps for each k-means run
# L_list = [] # microstate label sequence for each k-means run
# for run in range(n_runs):
# # initialize random cluster centroids (indices w.r.t. n_gfp)
# rndi = np.random.permutation(n_gfp)[:n_maps]
# maps = V[rndi, :]
# # normalize row-wise (across EEG channels)
# maps /= np.sqrt(np.sum(maps**2, axis=1, keepdims=True))
# # initialize
# n_iter = 0
# var0 = 1.0
# var1 = 0.0
# # convergence criterion: variance estimate (step 6)
# while ( (np.abs((var0-var1)/var0) > maxerr) & (n_iter < maxiter) ):
# # (step 3) microstate sequence (= current cluster assignment)
# C = np.dot(V, maps.T)
# C /= (n_ch*np.outer(gfp[gfp_peaks], np.std(maps, axis=1)))
# L = np.argmax(C**2, axis=1)
# # (step 4)
# for k in range(n_maps):
# Vt = V[L==k, :]
# # (step 4a)
# Sk = np.dot(Vt.T, Vt)
# # (step 4b)
# evals, evecs = np.linalg.eig(Sk)
# v = evecs[:, np.argmax(np.abs(evals))]
# maps[k, :] = v/np.sqrt(np.sum(v**2))
# # (step 5)
# var1 = var0
# var0 = sumV2 - np.sum(np.sum(maps[L, :]*V, axis=1)**2)
# var0 /= (n_gfp*(n_ch-1))
# n_iter += 1
# if (n_iter < maxiter):
# print("\t\tK-means run {:d}/{:d} converged after {:d} iterations.".format(run+1, n_runs, n_iter))
# else:
# print("\t\tK-means run {:d}/{:d} did NOT converge after {:d} iterations.".format(run+1, n_runs, maxiter))
# # CROSS-VALIDATION criterion for this run (step 8)
# C_ = np.dot(data, maps.T)
# C_ /= (n_ch*np.outer(gfp, np.std(maps, axis=1)))
# L_ = np.argmax(C_**2, axis=1)
# var = np.sum(data**2) - np.sum(np.sum(maps[L_, :]*data, axis=1)**2)
# var /= (n_t*(n_ch-1))
# cv = var * (n_ch-1)**2/(n_ch-n_maps-1.)**2
# # GEV (global explained variance) of cluster k
# gev = np.zeros(n_maps)
# for k in range(n_maps):
# r = L==k
# gev[k] = np.sum(gfp_values[r]**2 * C[r,k]**2)/gfp2
# gev_total = np.sum(gev)
# # store
# cv_list.append(cv)
# gev_list.append(gev)
# gevT_list.append(gev_total)
# maps_list.append(maps)
# L_list.append(L_)
# # select best run
# k_opt = np.argmin(cv_list)
# #k_opt = np.argmax(gevT_list)
# maps = maps_list[k_opt]
# # ms_gfp = ms_list[k_opt] # microstate sequence at GFP peaks
# gev = gev_list[k_opt]
# L_ = L_list[k_opt]
# # lowest cv criterion
# cv_min = np.min(cv_list)
# return maps, L_, gfp_peaks, gev, cv_min
# # For eeg_microstates.py
# def T_empirical(data, n_clusters, gap_idx = []):
# """Modified empirical transition matrix to take gap_idx argument
# Args:
# data: numpy.array, size = length of microstate sequence
# n_clusters: number of microstate clusters
# gap_idx: index for gaps in data which should be excluded in T
# Returns:
# T: empirical transition matrix
# """
# T = np.zeros((n_clusters, n_clusters))
# n = len(data)
# for i in range(n-1):
# # Do not count transitions between gaps in data
# if i in gap_idx:
# continue
# else:
# T[data[i], data[i+1]] += 1.0
# p_row = np.sum(T, axis=1)
# for i in range(n_clusters):
# if ( p_row[i] != 0.0 ):
# for j in range(n_clusters):
# T[i,j] /= p_row[i] # normalize row sums to 1.0
# return T
# # From sparcl.cluster.permute
# def permute_modified(x, k=None, nperms=25, wbounds=None, nvals=10, centers=None,
# verbose=False):
# # I added sdgaps output
# n, p = x.shape
# if k is None and centers is None:
# raise ValueError('k and centers are None.')
# if k is not None and centers is not None:
# if centers.shape[0] != k or centers.shape[1] != p:
# raise ValueError('Invalid shape of centers.')
# if wbounds is None:
# wbounds = np.exp(
# np.linspace(np.log(1.2), np.log(np.sqrt(p) * 0.9), nvals))
# if wbounds.min() <= 1 or len(wbounds) < 2:
# raise ValueError('len(wbounds) and each wbound must be > 1.')
# permx = np.zeros((nperms, n, p))
# nnonzerows = None
# for i in range(nperms):
# for j in range(p):
# permx[i, :, j] = np.random.permutation(x[:, j])
# tots = None
# out = kmeans(x, k, wbounds, centers=centers, verbose=verbose)
# for i in range(len(out)):
# nnonzerows = utils._cbind(nnonzerows, np.sum(out[i]['ws'] != 0))
# bcss = subfunc._get_wcss(x, out[i]['cs'])[1]
# tots = utils._cbind(tots, np.sum(out[i]['ws'] * bcss))
# permtots = np.zeros((len(wbounds), nperms))
# for i in range(nperms):
# perm_out = kmeans(
# permx[i], k, wbounds, centers=centers, verbose=verbose)
# for j in range(len(perm_out)):
# perm_bcss = subfunc._get_wcss(permx[i], perm_out[j]['cs'])[1]
# permtots[j, i] = np.sum(perm_out[j]['ws'] * perm_bcss)
# sdgaps = np.std(np.log(permtots),axis=1)
# gaps = np.log(tots) - np.log(permtots).mean(axis=1)
# bestw = wbounds[gaps.argmax()]
# out = {'bestw': bestw, 'gaps': gaps, 'sdgaps': sdgaps, 'wbounds': wbounds,
# 'nnonzerows': nnonzerows}
# return out
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment