From 1ad2894b788d1d9e8e0b548b988b887880ee704c Mon Sep 17 00:00:00 2001
From: benj3542 <s200431@student.dtu.dk>
Date: Wed, 22 Feb 2023 15:34:59 +0100
Subject: [PATCH] added local changes for preamble and preprocessing

---
 FeatureEstimation.py |   48 +-
 Preamble.py          |  211 ++---
 Preprocessing.py     |    5 +-
 eeg_microstates3.py  | 1849 ++++++++++++++++++++++++++++++++++++++++++
 feature_select.py    |  148 ++++
 requirements.txt     |    6 +-
 6 files changed, 2144 insertions(+), 123 deletions(-)
 create mode 100644 eeg_microstates3.py
 create mode 100644 feature_select.py

diff --git a/FeatureEstimation.py b/FeatureEstimation.py
index 7ea34d0..612ce0d 100644
--- a/FeatureEstimation.py
+++ b/FeatureEstimation.py
@@ -33,9 +33,10 @@ The script was written in Spyder. The outline panel can be used to navigate
 the different parts easier (Default shortcut: Ctrl + Shift + O)
 """
 
+
 # Set working directory
 import os
-wkdir = "/home/glia/EEG"
+wkdir = "/Users/benj3542/Desktop/Uni/Noter/Semester_6/Bachelor/resting-state-eeg-analysis/"
 os.chdir(wkdir)
 
 # Load all libraries from the Preamble
@@ -57,7 +58,7 @@ Subject_id = [0] * len(files)
 for i in range(len(files)):
     temp = files[i].split("\\")
     temp = temp[-1].split("_")
-    Subject_id[i] = int(temp[0])
+    Subject_id[i] = int(temp[0][-1]) # Subject_id[i] = int(temp[0])
 
 n_subjects = len(Subject_id)
 
@@ -151,9 +152,15 @@ def power_band_estimation(n):
     res = np.concatenate([abs_power_values,rel_power_values],axis=0)
     return n, res
 
+"""
 with concurrent.futures.ProcessPoolExecutor() as executor:
     for n, result in executor.map(power_band_estimation, range(len(final_epochs))): # Function and arguments
         power_bands[n] = result
+"""
+
+for i in range(len(power_bands)):
+    n, results = power_band_estimation(i)
+    power_bands[i] = results
 
 # Combine all data into one dataframe
 # First the columns are prepared
@@ -216,10 +223,11 @@ power_df = pd.DataFrame(data = {"Subject_ID": [ele for ele in Subject_id for i i
 # Fix Freq_band categorical order
 power_df["Freq_band"] = power_df["Freq_band"].astype("category").\
             cat.reorder_categories(list(Freq_Bands.keys()), ordered=True)
+"""
 # Fix Brain_region categorical order
 power_df["Brain_region"] = power_df["Brain_region"].astype("category").\
             cat.reorder_categories(Brain_region_labels, ordered=True)
-
+"""
 # Save the dataframe
 power_df.to_pickle(os.path.join(Feature_savepath,"Power_df.pkl"))
 
@@ -234,16 +242,29 @@ n_eye_status = len(eye_status)
 # Subset frontal absolute power
 power_df_sub1 = power_df[(power_df["Quant_status"] == "Absolute")&
                          (power_df["Brain_region"] == "Frontal")]
+# Subset frontal, midline absolute power
+power_df_sub2 = power_df[(power_df["Quant_status"] == "Absolute")&
+                            (power_df["Brain_region"] == "Frontal")&
+                            (power_df["Brain_side"] == "Mid")]
+
 
-# Calculate average frontal power
+# Calculate average frontal power theta
 frontal_theta_mean_subject = power_df_sub1[power_df_sub1["Freq_band"] == "theta"].\
     groupby(["Subject_ID","Group_status","Eye_status"]).mean().reset_index()
+
+# Calculate average frontal power beta
 frontal_beta_mean_subject = power_df_sub1[power_df_sub1["Freq_band"] == "beta"].\
     groupby(["Subject_ID","Group_status","Eye_status"]).mean().reset_index()
 
+# Calculate average frontal, midline power theta
+frontal_midline_theta_mean_subject = power_df_sub2[power_df_sub2["Freq_band"] == "theta"].\
+    groupby(["Subject_ID","Group_status","Eye_status"]).mean().reset_index()
+
+
 # Convert from dB to raw power
 frontal_theta_mean_subject["PSD"] = 10**(frontal_theta_mean_subject["PSD"]/10)
 frontal_beta_mean_subject["PSD"] = 10**(frontal_beta_mean_subject["PSD"]/10)
+frontal_midline_theta_mean_subject["PSD"] = 10**(frontal_midline_theta_mean_subject["PSD"]/10)
 
 # Calculate mean for each group and take ratio for whole group
 # To confirm trend observed in PSD plots
@@ -266,6 +287,7 @@ frontal_theta_beta_ratio.insert(3, "Measurement", dummy_variable )
 
 frontal_theta_beta_ratio.to_pickle(os.path.join(Feature_savepath,"fTBR_df.pkl"))
 
+"""
 # %% Frequency bands asymmetry
 # Defined as ln(right) - ln(left)
 # Thus we should only work with the absolute values and undo the dB transformation
@@ -553,7 +575,7 @@ OOF_data_df.insert(4, "Brain_region", list(Brain_region)*int(PAF_data_df.shape[0
 
 # Save the dataframes
 OOF_data_df.to_pickle(os.path.join(Feature_savepath,"OOF_data_FOOOF_df.pkl"))
-
+"""
 # %% Microstate analysis
 # The function takes the data as a numpy array (n_t, n_ch)
 # The data is already re-referenced to common average
@@ -616,11 +638,11 @@ plt.ylabel("Normalized to total")
 # The lower CV the better.
 # But the higher GEV the better.
 # Based on the plots and the recommendation by vong Wegner & Laufs 2018
-# we used 4 microstates
+# we used 5 microstates
 
 # In order to compare between groups, I fix the microstates by clustering on data from both groups
 # Due to instability of maps when running multiple times, I increased n_maps from 4 to 6
-n_maps = 4
+n_maps = 5
 mode = ["aahc", "kmeans", "kmedoids", "pca", "ica"][1]
 
 # K-means is stochastic, thus I run it multiple times in order to find the maps with highest GEV
@@ -670,7 +692,7 @@ c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
 print("Started", c_time1, "\nFinished",c_time2)
 
 # Save the results
-with open(Feature_savepath+"Microstate_4_maps_10x5_k_means_results.pkl", "wb") as file:
+with open(Feature_savepath+"Microstate_5_maps_10x5_k_means_results.pkl", "wb") as file:
     pickle.dump(microstate_cluster_results, file)
 
 # # Load
@@ -690,7 +712,7 @@ gev = [microstate_cluster_results[Best_EC_idx][3][0],microstate_cluster_results[
 
 # Plot the maps
 plt.style.use('default')
-labels = ["EC", "EO"]
+labels = ["EC", "EO"] #Eyes-closed, Eyes-open
 for i in range(len(labels)):    
     fig, axarr = plt.subplots(1, n_maps, figsize=(20,5))
     fig.patch.set_facecolor('white')
@@ -703,8 +725,8 @@ for i in range(len(labels)):
 # Due the random initiation of K-means this have to be modified every time clusters are made!
 # Assign map labels (e.g. 0, 2, 1, 3)
 order = [0]*2
-order[0] = [3,0,1,2] # EC
-order[1] = [3,1,0,2] # EO
+order[0] = [3,0,1,2,4] # EC
+order[1] = [3,1,0,2,4] # EO
 for i in range(len(order)):
     maps[i] = maps[i][order[i],:] # re-order maps
     gev[i] = gev[i][order[i]] # re-order GEV
@@ -716,13 +738,13 @@ for i in range(len(order)):
 # Thus the sign of the map does not really reflect which areas are positive or negative (absolute)
 # But more which areas are different during each state (relatively)
 # I can therefore change the sign of the map for the visualizaiton
-sign_swap = [[1,-1,1,1],[1,1,1,-1]]
+sign_swap = [[1,-1,1,1,1],[1,1,1,-1,1]]
 for i in range(len(order)):
     for m in range(n_maps):
         maps[i][m] *= sign_swap[i][m]
 
 # Plot the maps and save
-save_path = "/home/glia/Analysis/Figures/Microstates/"
+save_path = "/Users/benj3542/Desktop/Uni/Noter/Semester_6/Bachelor/resting-state-eeg-analysis/Figures/Microstates"
 labels = ["EC", "EO"]
 for i in range(len(labels)):    
     fig, axarr = plt.subplots(1, n_maps, figsize=(20,5))
diff --git a/Preamble.py b/Preamble.py
index d6d17b7..e85613f 100644
--- a/Preamble.py
+++ b/Preamble.py
@@ -25,14 +25,14 @@ import sklearn # Machine learning
 import nitime # Time series analysis
 import nolds # DFA exponent
 import statsmodels # multipletest
-import pysparcl # Sparse Kmeans
+#import pysparcl # Sparse Kmeans
 import fooof # Peak Alpha Freq and 1/f exponents
 import pandas as pd # Dataframes
 import seaborn as sns # Plotting library
-import autoreject # Automatic EEG artifact detection
+# import autoreject # Automatic EEG artifact detection
 import mlxtend # Sequential Forward Selection
 
-from mne.time_frequency import psd_multitaper
+from mne.time_frequency import *
 from mne.preprocessing import (ICA, create_eog_epochs, create_ecg_epochs, corrmap)
 from mne.stats import spatio_temporal_cluster_test, permutation_cluster_test
 from mne.channels import find_ch_adjacency
@@ -41,7 +41,7 @@ from mne.connectivity import spectral_connectivity
 import nitime.analysis as nta
 import nitime.timeseries as nts
 import nitime.utils as ntsu
-from nitime.viz import drawmatrix_channels, drawmatrix_channels_modified
+from nitime.viz import * # (drawmatrix_channels, drawmatrix_channels_modified)
 
 from sklearn import preprocessing
 from sklearn import manifold
@@ -74,7 +74,7 @@ from mpl_toolkits.mplot3d import Axes3D # registers 3D projections
 
 # Non-library scripts
 # EEG microstate package by von Wegner & Lauf, 2018
-from eeg_microstates import * # downloadeded from https://github.com/Frederic-vW/eeg_microstates
+from eeg_microstates3 import * # downloadeded from https://github.com/Frederic-vW/eeg_microstates
 # minimum Redundancy Maximum Relevance script by Kiran Karra
 from feature_select import * # downloaded from https://github.com/stochasticresearch/featureselect/blob/master/python/feature_select.py
 
@@ -88,115 +88,116 @@ plt.style.use('ggplot') # plotting style
 # Modified sparcl cluster_permute
 
 # # For eeg_microstates.py
-# def kmeans_return_all(data, n_maps, n_runs=10, maxerr=1e-6, maxiter=500):
-#     """Modified K-means clustering as detailed in:
-#     [1] Pascual-Marqui et al., IEEE TBME (1995) 42(7):658--665
-#     [2] Murray et al., Brain Topography(2008) 20:249--264.
-#     Variables named as in [1], step numbering as in Table I.
+def kmeans_return_all(data, n_maps, n_runs=10, maxerr=1e-6, maxiter=500):
+    """
+    Modified K-means clustering as detailed in:
+    [1] Pascual-Marqui et al., IEEE TBME (1995) 42(7):658--665
+    [2] Murray et al., Brain Topography(2008) 20:249--264.
+    Variables named as in [1], step numbering as in Table I.
 
-#     Args:
-#         data: numpy.array, size = number of EEG channels
-#         n_maps: number of microstate maps
-#         n_runs: number of K-means runs (optional)
-#         maxerr: maximum error for convergence (optional)
-#         maxiter: maximum number of iterations (optional)
-#         doplot: plot the results, default=False (optional)
-#     Returns:
-#         maps: microstate maps (number of maps x number of channels)
-#         L: sequence of microstate labels
-#         gfp_peaks: indices of local GFP maxima
-#         gev: global explained variance (0..1)
-#         cv: value of the cross-validation criterion
-#     """
-#     n_t = data.shape[0]
-#     n_ch = data.shape[1]
-#     data = data - data.mean(axis=1, keepdims=True)
+    Args:
+        data: numpy.array, size = number of EEG channels
+        n_maps: number of microstate maps
+        n_runs: number of K-means runs (optional)
+        maxerr: maximum error for convergence (optional)
+        maxiter: maximum number of iterations (optional)
+        doplot: plot the results, default=False (optional)
+    Returns:
+        maps: microstate maps (number of maps x number of channels)
+        L: sequence of microstate labels
+        gfp_peaks: indices of local GFP maxima
+        gev: global explained variance (0..1)
+        cv: value of the cross-validation criterion
+    """
+    n_t = data.shape[0]
+    n_ch = data.shape[1]
+    data = data - data.mean(axis=1, keepdims=True)
 
-#     # GFP peaks
-#     gfp = np.std(data, axis=1)
-#     gfp_peaks = locmax(gfp)
-#     gfp_values = gfp[gfp_peaks]
-#     gfp2 = np.sum(gfp_values**2) # normalizing constant in GEV
-#     n_gfp = gfp_peaks.shape[0]
+    # GFP peaks
+    gfp = np.std(data, axis=1)
+    gfp_peaks = locmax(gfp)
+    gfp_values = gfp[gfp_peaks]
+    gfp2 = np.sum(gfp_values**2) # normalizing constant in GEV
+    n_gfp = gfp_peaks.shape[0]
 
-#     # clustering of GFP peak maps only
-#     V = data[gfp_peaks, :]
-#     sumV2 = np.sum(V**2)
+    # clustering of GFP peak maps only
+    V = data[gfp_peaks, :]
+    sumV2 = np.sum(V**2)
 
-#     # store results for each k-means run
-#     cv_list =   []  # cross-validation criterion for each k-means run
-#     gev_list =  []  # GEV of each map for each k-means run
-#     gevT_list = []  # total GEV values for each k-means run
-#     maps_list = []  # microstate maps for each k-means run
-#     L_list =    []  # microstate label sequence for each k-means run
-#     for run in range(n_runs):
-#         # initialize random cluster centroids (indices w.r.t. n_gfp)
-#         rndi = np.random.permutation(n_gfp)[:n_maps]
-#         maps = V[rndi, :]
-#         # normalize row-wise (across EEG channels)
-#         maps /= np.sqrt(np.sum(maps**2, axis=1, keepdims=True))
-#         # initialize
-#         n_iter = 0
-#         var0 = 1.0
-#         var1 = 0.0
-#         # convergence criterion: variance estimate (step 6)
-#         while ( (np.abs((var0-var1)/var0) > maxerr) & (n_iter < maxiter) ):
-#             # (step 3) microstate sequence (= current cluster assignment)
-#             C = np.dot(V, maps.T)
-#             C /= (n_ch*np.outer(gfp[gfp_peaks], np.std(maps, axis=1)))
-#             L = np.argmax(C**2, axis=1)
-#             # (step 4)
-#             for k in range(n_maps):
-#                 Vt = V[L==k, :]
-#                 # (step 4a)
-#                 Sk = np.dot(Vt.T, Vt)
-#                 # (step 4b)
-#                 evals, evecs = np.linalg.eig(Sk)
-#                 v = evecs[:, np.argmax(np.abs(evals))]
-#                 maps[k, :] = v/np.sqrt(np.sum(v**2))
-#             # (step 5)
-#             var1 = var0
-#             var0 = sumV2 - np.sum(np.sum(maps[L, :]*V, axis=1)**2)
-#             var0 /= (n_gfp*(n_ch-1))
-#             n_iter += 1
-#         if (n_iter < maxiter):
-#             print("\t\tK-means run {:d}/{:d} converged after {:d} iterations.".format(run+1, n_runs, n_iter))
-#         else:
-#             print("\t\tK-means run {:d}/{:d} did NOT converge after {:d} iterations.".format(run+1, n_runs, maxiter))
+    # store results for each k-means run
+    cv_list =   []  # cross-validation criterion for each k-means run
+    gev_list =  []  # GEV of each map for each k-means run
+    gevT_list = []  # total GEV values for each k-means run
+    maps_list = []  # microstate maps for each k-means run
+    L_list =    []  # microstate label sequence for each k-means run
+    for run in range(n_runs):
+        # initialize random cluster centroids (indices w.r.t. n_gfp)
+        rndi = np.random.permutation(n_gfp)[:n_maps]
+        maps = V[rndi, :]
+        # normalize row-wise (across EEG channels)
+        maps /= np.sqrt(np.sum(maps**2, axis=1, keepdims=True))
+        # initialize
+        n_iter = 0
+        var0 = 1.0
+        var1 = 0.0
+        # convergence criterion: variance estimate (step 6)
+        while ( (np.abs((var0-var1)/var0) > maxerr) & (n_iter < maxiter) ):
+            # (step 3) microstate sequence (= current cluster assignment)
+            C = np.dot(V, maps.T)
+            C /= (n_ch*np.outer(gfp[gfp_peaks], np.std(maps, axis=1)))
+            L = np.argmax(C**2, axis=1)
+            # (step 4)
+            for k in range(n_maps):
+                Vt = V[L==k, :]
+                # (step 4a)
+                Sk = np.dot(Vt.T, Vt)
+                # (step 4b)
+                evals, evecs = np.linalg.eig(Sk)
+                v = evecs[:, np.argmax(np.abs(evals))]
+                maps[k, :] = v/np.sqrt(np.sum(v**2))
+            # (step 5)
+            var1 = var0
+            var0 = sumV2 - np.sum(np.sum(maps[L, :]*V, axis=1)**2)
+            var0 /= (n_gfp*(n_ch-1))
+            n_iter += 1
+        if (n_iter < maxiter):
+            print("\t\tK-means run {:d}/{:d} converged after {:d} iterations.".format(run+1, n_runs, n_iter))
+        else:
+            print("\t\tK-means run {:d}/{:d} did NOT converge after {:d} iterations.".format(run+1, n_runs, maxiter))
 
-#         # CROSS-VALIDATION criterion for this run (step 8)
-#         C_ = np.dot(data, maps.T)
-#         C_ /= (n_ch*np.outer(gfp, np.std(maps, axis=1)))
-#         L_ = np.argmax(C_**2, axis=1)
-#         var = np.sum(data**2) - np.sum(np.sum(maps[L_, :]*data, axis=1)**2)
-#         var /= (n_t*(n_ch-1))
-#         cv = var * (n_ch-1)**2/(n_ch-n_maps-1.)**2
+        # CROSS-VALIDATION criterion for this run (step 8)
+        C_ = np.dot(data, maps.T)
+        C_ /= (n_ch*np.outer(gfp, np.std(maps, axis=1)))
+        L_ = np.argmax(C_**2, axis=1)
+        var = np.sum(data**2) - np.sum(np.sum(maps[L_, :]*data, axis=1)**2)
+        var /= (n_t*(n_ch-1))
+        cv = var * (n_ch-1)**2/(n_ch-n_maps-1.)**2
 
-#         # GEV (global explained variance) of cluster k
-#         gev = np.zeros(n_maps)
-#         for k in range(n_maps):
-#             r = L==k
-#             gev[k] = np.sum(gfp_values[r]**2 * C[r,k]**2)/gfp2
-#         gev_total = np.sum(gev)
+        # GEV (global explained variance) of cluster k
+        gev = np.zeros(n_maps)
+        for k in range(n_maps):
+            r = L==k
+            gev[k] = np.sum(gfp_values[r]**2 * C[r,k]**2)/gfp2
+        gev_total = np.sum(gev)
 
-#         # store
-#         cv_list.append(cv)
-#         gev_list.append(gev)
-#         gevT_list.append(gev_total)
-#         maps_list.append(maps)
-#         L_list.append(L_)
+        # store
+        cv_list.append(cv)
+        gev_list.append(gev)
+        gevT_list.append(gev_total)
+        maps_list.append(maps)
+        L_list.append(L_)
 
-#     # select best run
-#     k_opt = np.argmin(cv_list)
-#     #k_opt = np.argmax(gevT_list)
-#     maps = maps_list[k_opt]
-#     # ms_gfp = ms_list[k_opt] # microstate sequence at GFP peaks
-#     gev = gev_list[k_opt]
-#     L_ = L_list[k_opt]
-#     # lowest cv criterion
-#     cv_min = np.min(cv_list)
+    # select best run
+    k_opt = np.argmin(cv_list)
+    #k_opt = np.argmax(gevT_list)
+    maps = maps_list[k_opt]
+    # ms_gfp = ms_list[k_opt] # microstate sequence at GFP peaks
+    gev = gev_list[k_opt]
+    L_ = L_list[k_opt]
+    # lowest cv criterion
+    cv_min = np.min(cv_list)
 
-#     return maps, L_, gfp_peaks, gev, cv_min
+    return maps, L_, gfp_peaks, gev, cv_min
 
 # # For eeg_microstates.py
 # def T_empirical(data, n_clusters, gap_idx = []):
diff --git a/Preprocessing.py b/Preprocessing.py
index 7e15f86..2779510 100644
--- a/Preprocessing.py
+++ b/Preprocessing.py
@@ -25,7 +25,7 @@ Link to the demonstration data: www.bci2000.org
 
 # Set working directory
 import os
-wkdir = "/home/glia/EEG"
+wkdir = "/Users/benj3542/Desktop/Uni/Noter/Semester_6/Bachelor/resting-state-eeg-analysis/"
 os.chdir(wkdir)
 
 # Load all libraries from the Preamble
@@ -300,7 +300,8 @@ for i in range(len(corrected_epochs)):
         reject_log0[e] = ar0[e].get_reject_log(corrected_epochs[i][ee])
         # Plot and save Autorejected epochs
         fig = reject_log0[e].plot(orientation="horizontal", show=False)
-        fig.savefig(os.path.join(save_path,"AR_" + str(Subject_id_concat[i]) + "_" + str(ee) + ".png"))
+        #fig.savefig(os.path.join(save_path,"AR_" + str(Subject_id_concat[i]) + "_" + str(ee) + ".png"))
+        fig.savefig(os.path.join(save_path,"AR_" + str(e) + "_" + str(ee) + ".png"))
         # Close figure window
         plt.close(fig)
         # Save mean peak-to-peak voltage threshold used
diff --git a/eeg_microstates3.py b/eeg_microstates3.py
new file mode 100644
index 0000000..290cbc3
--- /dev/null
+++ b/eeg_microstates3.py
@@ -0,0 +1,1849 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# Information theoretic analysis EEG microstates
+# Ref.: von Wegner et al., NeuroImage 2017, 158:99-111
+# FvW (12/2014)
+
+"""
+- .format() => f""
+- function documentation
+- ms sequence temporal smoothing
+- all clustering algorithms
+- ms re-ordering: TextWidget due to plt.ion/plt.ioff + console input() problems
+- dictionaries
+"""
+
+import argparse, os, sys, time
+import matplotlib.pyplot as plt
+from matplotlib.widgets import TextBox
+import numpy as np
+from scipy.interpolate import griddata
+from scipy.signal import butter, filtfilt, welch
+from scipy.stats import chi2, chi2_contingency
+from sklearn.decomposition import PCA, FastICA, TruncatedSVD
+from statsmodels.stats.multitest import multipletests
+# in case you have a compiled Cython implementation:
+# from mutinf.mutinf_cy import mutinf_cy_c_wrapper, mutinf_cy
+
+
+def read_xyz(filename):
+    """Read EEG electrode locations in xyz format
+
+    Args:
+        filename: full path to the '.xyz' file
+    Returns:
+        locs: n_channels x 3 (numpy.array)
+    """
+    ch_names = []
+    locs = []
+    with open(filename, 'r') as f:
+        l = f.readline()  # header line
+        while l:
+            l = f.readline().strip().split("\t")
+            if (l != ['']):
+                ch_names.append(l[0])
+                locs.append([float(l[1]), float(l[2]), float(l[3])])
+            else:
+                l = None
+    return ch_names, np.array(locs)
+
+
+def read_edf(filename):
+    """Basic EDF file format reader
+
+    EDF specifications: http://www.edfplus.info/specs/edf.html
+
+    Args:
+        filename: full path to the '.edf' file
+    Returns:
+        chs: list of channel names
+        fs: sampling frequency in [Hz]
+        data: EEG data as numpy.array (samples x channels)
+    """
+
+    def readn(n):
+        """read n bytes."""
+        return np.fromfile(fp, sep='', dtype=np.int8, count=n)
+
+    def bytestr(bytes, i):
+        """convert byte array to string."""
+        return np.array([bytes[k] for k in range(i*8, (i+1)*8)]).tostring()
+
+    fp = open(filename, 'r')
+    x = np.fromfile(fp, sep='', dtype=np.uint8, count=256).tostring()
+    header = {}
+    header['version'] = x[0:8]
+    header['patientID'] = x[8:88]
+    header['recordingID'] = x[88:168]
+    header['startdate'] = x[168:176]
+    header['starttime'] = x[176:184]
+    header['length'] = int(x[184:192]) # header length (bytes)
+    header['reserved'] = x[192:236]
+    header['records'] = int(x[236:244]) # number of records
+    header['duration'] = float(x[244:252]) # duration of each record [sec]
+    header['channels'] = int(x[252:256]) # ns - number of signals
+    n_ch = header['channels']  # number of EEG channels
+    header['channelname'] = (readn(16*n_ch)).tostring()
+    header['transducer'] = (readn(80*n_ch)).tostring().split()
+    header['physdime'] = (readn(8*n_ch)).tostring().split()
+    header['physmin'] = []
+    b = readn(8*n_ch)
+    for i in range(n_ch):
+        header['physmin'].append(float(bytestr(b, i)))
+    header['physmax'] = []
+    b = readn(8*n_ch)
+    for i in range(n_ch):
+        header['physmax'].append(float(bytestr(b, i)))
+    header['digimin'] = []
+    b = readn(8*n_ch)
+    for i in range(n_ch):
+        header['digimin'].append(int(bytestr(b, i)))
+    header['digimax'] = []
+    b = readn(8*n_ch)
+    for i in range(n_ch):
+        header['digimax'].append(int(bytestr(b, i)))
+    header['prefilt'] = (readn(80*n_ch)).tostring().split()
+    header['samples_per_record'] = []
+    b = readn(8*n_ch)
+    for i in range(n_ch):
+        header['samples_per_record'].append(float(bytestr(b, i)))
+    nr = header['records']
+    n_per_rec = int(header['samples_per_record'][0])
+    n_total = int(nr*n_per_rec*n_ch)
+    fp.seek(header['length'],os.SEEK_SET)  # header end = data start
+    data = np.fromfile(fp, sep='', dtype=np.int16, count=n_total)  # count=-1
+    fp.close()
+
+    # re-order
+    #print("EDF reader:")
+    #print("[+] n_per_rec: {:d}".format(n_per_rec))
+    #print("[+] n_ch: {:d}".format(n_ch))
+    #print("[+] nr: {:d}".format(nr))
+    #print("[+] n_total: {:d}".format(n_total))
+    #print(data.shape)
+    data = np.reshape(data,(n_per_rec,n_ch,nr),order='F')
+    data = np.transpose(data,(0,2,1))
+    data = np.reshape(data,(n_per_rec*nr,n_ch),order='F')
+
+    # convert to physical dimensions
+    for k in range(data.shape[1]):
+        d_min = float(header['digimin'][k])
+        d_max = float(header['digimax'][k])
+        p_min = float(header['physmin'][k])
+        p_max = float(header['physmax'][k])
+        if ((d_max-d_min) > 0):
+            data[:,k] = p_min+(data[:,k]-d_min)/(d_max-d_min)*(p_max-p_min)
+
+    #print(header)
+    return header['channelname'].split(),\
+           header['samples_per_record'][0]/header['duration'],\
+           data
+
+
+def bp_filter(data, f_lo, f_hi, fs):
+    """Digital band pass filter (6-th order Butterworth)
+
+    Args:
+        data: numpy.array, time along axis 0
+        (f_lo, f_hi): frequency band to extract [Hz]
+        fs: sampling frequency [Hz]
+    Returns:
+        data_filt: band-pass filtered data, same shape as data
+    """
+    data_filt = np.zeros_like(data)
+    f_ny = fs/2.  # Nyquist frequency
+    b_lo = f_lo/f_ny  # normalized frequency [0..1]
+    b_hi = f_hi/f_ny  # normalized frequency [0..1]
+    # band-pass filter parameters
+    p_lp = {"N":6, "Wn":b_hi, "btype":"lowpass", "analog":False, "output":"ba"}
+    p_hp = {"N":6, "Wn":b_lo, "btype":"highpass", "analog":False, "output":"ba"}
+    bp_b1, bp_a1 = butter(**p_lp)
+    bp_b2, bp_a2 = butter(**p_hp)
+    data_filt = filtfilt(bp_b1, bp_a1, data, axis=0)
+    data_filt = filtfilt(bp_b2, bp_a2, data_filt, axis=0)
+    return data_filt
+
+
+def plot_data(data, fs):
+    """Plot the data
+
+    Args:
+        data: numpy.array
+        fs: sampling frequency [Hz]
+    """
+    t = np.arange(len(data))/fs # time axis in seconds
+    fig = plt.figure(1, figsize=(20,4))
+    plt.plot(t, data, '-k', linewidth=1)
+    plt.xlabel("time [s]", fontsize=24)
+    plt.ylabel("potential [$\mu$V]", fontsize=24)
+    plt.tight_layout()
+    plt.show()
+
+
+def plot_psd(data, fs, n_seg=1024):
+    """Plot the power spectral density (Welch's method)
+
+    Args:
+        data: numpy.array
+        fs: sampling frequency [Hz]
+        n_seg: samples per segment, default=1024
+    """
+    freqs, psd = welch(data, fs, nperseg=n_seg)
+    fig = plt.figure(1, figsize=(16,8))
+    plt.semilogy(freqs, psd, 'k', linewidth=3)
+    #plt.loglog(freqs, psd, 'k', linewidth=3)
+    #plt.xlim(freqs.min(), freqs.max())
+    #plt.ylim(psd.min(), psd.max())
+    plt.title("Power spectral density (Welch's method  n={:d})".format(n_seg))
+    plt.tight_layout()
+    plt.show()
+
+
+def topo(data, n_grid=64):
+    """Interpolate EEG topography onto a regularly spaced grid
+
+    Args:
+        data: numpy.array, size = number of EEG channels
+        n_grid: integer, interpolate to n_grid x n_grid array, default=64
+    Returns:
+        data_interpol: cubic interpolation of EEG topography, n_grid x n_grid
+                       contains nan values
+    """
+    channels, locs = read_xyz('cap.xyz')
+    n_channels = len(channels)
+    #locs /= np.sqrt(np.sum(locs**2,axis=1))[:,np.newaxis]
+    locs /= np.linalg.norm(locs, 2, axis=1, keepdims=True)
+    c = findstr('Cz', channels)[0]
+    # print 'center electrode for interpolation: ' + channels[c]
+    #w = np.sqrt(np.sum((locs-locs[c])**2, axis=1))
+    w = np.linalg.norm(locs-locs[c], 2, axis=1)
+    #arclen = 2*np.arcsin(w/2)
+    arclen = np.arcsin(w/2.*np.sqrt(4.-w*w))
+    phi_re = locs[:,0]-locs[c][0]
+    phi_im = locs[:,1]-locs[c][1]
+    #print(type(phi_re), phi_re)
+    #print(type(phi_im), phi_im)
+    tmp = phi_re + 1j*phi_im
+    #tmp = map(complex, locs[:,0]-locs[c][0], locs[:,1]-locs[c][1])
+    #print(type(tmp), tmp)
+    phi = np.angle(tmp)
+    #phi = np.angle(map(complex, locs[:,0]-locs[c][0], locs[:,1]-locs[c][1]))
+    X = arclen*np.real(np.exp(1j*phi))
+    Y = arclen*np.imag(np.exp(1j*phi))
+    r = max([max(X),max(Y)])
+    Xi = np.linspace(-r,r,n_grid)
+    Yi = np.linspace(-r,r,n_grid)
+    data_ip = griddata((X, Y), data, (Xi[None,:], Yi[:,None]), method='cubic')
+    return data_ip
+
+
+def eeg2map(data):
+    """Interpolate and normalize EEG topography, ignoring nan values
+
+    Args:
+        data: numpy.array, size = number of EEG channels
+        n_grid: interger, interpolate to n_grid x n_grid array, default=64
+    Returns:
+        top_norm: normalized topography, n_grid x n_grid
+    """
+    n_grid = 64
+    top = topo(data, n_grid)
+    mn = np.nanmin(top)
+    mx = np.nanmax(top)
+    top_norm = (top-mn)/(mx-mn)
+    return top_norm
+
+
+def clustering(data, fs, chs, locs, mode, n_clusters, n_win=3, \
+               interpol=True, doplot=False):
+    """EEG microstate clustering algorithms.
+
+    Args:
+        data: numpy.array (n_t, n_ch)
+        fs  : sampling frequency [Hz]
+        chs : list of channel name strings
+        locs: numpy.array (n_ch, 2) of electrode locations (x, y)
+        mode: clustering algorithm
+        n_clusters: number of clusters
+        n_win: smoothing window size 2*n_win+1 [t-n_win:t+n_win]
+        doplot: boolean
+    Returns:
+        maps: n_maps x n_channels NumPy array
+        L: microstate sequence (integers)
+        gfp_peaks: locations of local GFP maxima
+        gev: global explained variance
+    """
+    #print("[+] Clustering algorithm: {:s}".format(mode))
+
+    # --- number of EEG channels ---
+    n_ch = data.shape[1]
+    #print("[+] EEG channels: n_ch = {:d}".format(n_ch))
+
+    # --- normalized data ---
+    data_norm = data - data.mean(axis=1, keepdims=True)
+    data_norm /= data_norm.std(axis=1, keepdims=True)
+
+    # --- GFP peaks ---
+    gfp = np.nanstd(data, axis=1)
+    gfp2 = np.sum(gfp**2) # normalizing constant
+    gfp_peaks = locmax(gfp)
+    data_cluster = data[gfp_peaks,:]
+    #data_cluster = data_cluster[:100,:]
+    data_cluster_norm = data_cluster - data_cluster.mean(axis=1, keepdims=True)
+    data_cluster_norm /= data_cluster_norm.std(axis=1, keepdims=True)
+    print("[+] Data format for clustering [GFP peaks, channels]: {:d} x {:d}"\
+         .format(data_cluster.shape[0], data_cluster.shape[1]))
+
+    start = time.time()
+
+    if (mode == "aahc"):
+        print("\n[+] Clustering algorithm: AAHC.")
+        maps = aahc(data, n_clusters, doplot=False)
+
+    if (mode == "kmeans"):
+        print("\n[+] Clustering algorithm: mod. K-MEANS.")
+        maps = kmeans(data, n_maps=n_clusters, n_runs=5)
+
+    if (mode == "kmedoids"):
+        print("\n[+] Clustering algorithm: K-MEDOIDS.")
+        C = np.corrcoef(data_cluster_norm)
+        C = C**2 # ignore EEG polarity
+        kmed_maps = kmedoids(S=C, K=n_clusters, nruns=10, maxits=500)
+        maps = [int(data_cluster[kmed_maps[k],:]) for k in range(n_clusters)]
+        maps = np.array(maps)
+        del C, kmed_maps
+
+    if (mode == "pca"):
+        print("\n[+] Clustering algorithm: PCA.")
+        params = {
+            "n_components": n_clusters,
+            "copy": True,
+            "whiten": True,
+            "svd_solver": "auto",
+        }
+        pca = PCA(**params) # SKLEARN
+        pca.fit(data_cluster_norm)
+        maps = np.array([pca.components_[k,:] for k in range(n_clusters)])
+        '''
+        print("PCA explained variance: ", str(pca.explained_variance_ratio_))
+        print("PCA total explained variance: ", \
+              str(np.sum(pca.explained_variance_ratio_)))
+        '''
+        del pca, params
+
+        ''' SKLEARN:
+        params = {
+            "n_components": n_clusters,
+            "algorithm": "randomized",
+            "n_iter": 5,
+            "random_state": None,
+            "tol": 0.0,
+        }
+        svd = TruncatedSVD(**params)
+        svd.fit(data_cluster_norm)
+        maps = svd.components_
+        print("explained variance (%): ")
+        print(explained_variance_ratio_)
+        #print("explained variance: ")
+        print(explained_variance_)
+        del svd, params
+        '''
+
+    if (mode == "ica"):
+        print("\n[+] Clustering algorithm: Fast-ICA.")
+        #''' Fast-ICA: algorithm= parallel;deflation, fun=logcosh;exp;cube
+        params = {
+            "n_components": n_clusters,
+            "algorithm": "parallel",
+            "whiten": True,
+            "fun": "exp",
+            "max_iter": 200,
+        }
+        ica = FastICA(**params) # SKLEARN
+        S_ = ica.fit_transform(data_cluster_norm)  # reconstructed signals
+        A_ = ica.mixing_  # estimated mixing matrix
+        IC_ = ica.components_
+        print("data: " + str(data_cluster_norm.shape))
+        print("ICs: " + str(IC_.shape))
+        print("mixing matrix: " + str(A_.shape))
+        maps = np.array([ica.components_[k,:] for k in range(n_clusters)])
+        del ica, params
+
+    end = time.time()
+    delta_t = end - start
+    print(f"[+] Computation time: {delta_t:.2f} sec")
+
+    # --- microstate sequence ---
+    print("\n[+] Microstate back-fitting:")
+    print("data_norm: ", data_norm.shape)
+    print("data_cluster_norm: ", data_cluster_norm.shape)
+    print("maps: ", maps.shape)
+
+    if interpol:
+        C = np.dot(data_cluster_norm, maps.T)/n_ch
+        L_gfp = np.argmax(C**2, axis=1) # microstates at GFP peak
+        del C
+        n_t = data_norm.shape[0]
+        L = np.zeros(n_t)
+        for t in range(n_t):
+            if t in gfp_peaks:
+                i = gfp_peaks.tolist().index(t)
+                L[t] = L_gfp[i]
+            else:
+                i = np.argmin(np.abs(t-gfp_peaks))
+                L[t] = L_gfp[i]
+        L = L.astype('int')
+    else:
+        C = np.dot(data_norm, maps.T)/n_ch
+        L = np.argmax(C**2, axis=1)
+        del C
+
+    # visualize microstate sequence
+    if False:
+        t_ = np.arange(n_t)
+        fig, ax = plt.subplots(2, 1, figsize=(15,4), sharex=True)
+        ax[0].plot(t_, gfp, '-k', lw=2)
+        for p in gfp_peaks:
+            ax[0].axvline(t_[p], c='k', lw=0.5, alpha=0.3)
+        ax[0].plot(t_[gfp_peaks], gfp[gfp_peaks], 'or', ms=10)
+        ax[1].plot(L)
+        plt.show()
+
+    ''' --- temporal smoothing ---
+    L2 = np.copy(L)
+    for i in range(n_win, len(L)-n_win):
+        s = np.array([np.sum(L[i-n_win:i+n_win]==j) for j in range(n_clusters)])
+        L2[i] = np.argmax(s)
+    L = L2.copy()
+    del L2
+    '''
+
+    # --- GEV ---
+    maps_norm = maps - maps.mean(axis=1, keepdims=True)
+    maps_norm /= maps_norm.std(axis=1, keepdims=True)
+
+    # --- correlation data, maps ---
+    C = np.dot(data_norm, maps_norm.T)/n_ch
+    #print("C.shape: " + str(C.shape))
+    #print("C.min: {C.min():.2f}   Cmax: {C.max():.2f}")
+
+    # --- GEV_k & GEV ---
+    gev = np.zeros(n_clusters)
+    for k in range(n_clusters):
+        r = L==k
+        gev[k] = np.sum(gfp[r]**2 * C[r,k]**2)/gfp2
+    print(f"\n[+] Global explained variance GEV = {gev.sum():.3f}")
+    for k in range(n_clusters):
+        print(f"GEV_{k:d}: {gev[k]:.3f}")
+
+    if doplot:
+        #plt.ion()
+        # matplotlib's perceptually uniform sequential colormaps:
+        # magma, inferno, plasma, viridis
+        #cm = plt.cm.magma
+        cm = plt.cm.seismic
+        fig, axarr = plt.subplots(1, n_clusters, figsize=(20,5))
+        for imap in range(n_clusters):
+            axarr[imap].imshow(eeg2map(maps[imap, :]), cmap=cm, origin='lower')
+            axarr[imap].set_xticks([])
+            axarr[imap].set_xticklabels([])
+            axarr[imap].set_yticks([])
+            axarr[imap].set_yticklabels([])
+        title = f"Microstate maps ({mode.upper():s})"
+        axarr[0].set_title(title, fontsize=16, fontweight="bold")
+
+        # dummy function, callback from TextBox, does nothing
+        def f_dummy(text): pass
+
+        axbox = plt.axes([0.1, 0.05, 0.1, 0.1]) #  l, b, w, h
+        text_box = TextBox(axbox, 'Ordering: ', initial="[0, 1, 2, 3]")
+        text_box.on_submit(f_dummy)
+        plt.show()
+        order_str = text_box.text
+
+        #plt.ion()
+        #plt.show()
+        #plt.pause(0.001)
+
+        #txt = input("enter something: ")
+        #print("txt: ", txt)
+        #plt.draw()
+        #plt.ioff()
+
+        # --- assign map labels manually ---
+        #order_str = raw_input("\n\t\tAssign map labels (e.g. 0, 2, 1, 3): ")
+        #order_str = input("\n\t\tAssign map labels (e.g. 0, 2, 1, 3): ")
+        #plt.ioff()
+        order_str = order_str.replace("[", "")
+        order_str = order_str.replace("]", "")
+        order_str = order_str.replace(",", "")
+        order_str = order_str.replace(" ", "")
+        if (len(order_str) != n_clusters):
+            if (len(order_str)==0):
+                print("Empty input string.")
+            else:
+                input_str = ", ".join(order_str)
+                print(f"Parsed manual input: {input_str:s}")
+                print("Number of labels does not equal number of clusters.")
+            print("Continue using the original assignment...\n")
+        else:
+            order = np.zeros(n_clusters, dtype=int)
+            for i, s in enumerate(order_str):
+                order[i] = int(s)
+            print("Re-ordered labels: {:s}".format(", ".join(order_str)))
+            # re-order return variables
+            maps = maps[order,:]
+            for i in range(len(L)):
+                L[i] = order[L[i]]
+            gev = gev[order]
+            # Figure
+            fig, axarr = plt.subplots(1, n_clusters, figsize=(20,5))
+            for imap in range(n_clusters):
+                axarr[imap].imshow(eeg2map(maps[imap,:]),cmap=cm,origin='lower')
+                axarr[imap].set_xticks([])
+                axarr[imap].set_xticklabels([])
+                axarr[imap].set_yticks([])
+                axarr[imap].set_yticklabels([])
+            title = "re-ordered microstate maps"
+            axarr[0].set_title(title, fontsize=16, fontweight="bold")
+            plt.show()
+            #plt.ioff()
+
+    return maps, L, gfp_peaks, gev
+
+
+def aahc(data, N_clusters, doplot=False):
+    """Atomize and Agglomerative Hierarchical Clustering Algorithm
+    AAHC (Murray et al., Brain Topography, 2008)
+
+    Args:
+        data: EEG data to cluster, numpy.array (n_samples, n_channels)
+        N_clusters: desired number of clusters
+        doplot: boolean, plot maps
+    Returns:
+        maps: n_maps x n_channels (numpy.array)
+    """
+
+    def extract_row(A, k):
+        v = A[k,:]
+        A_ = np.vstack((A[:k,:],A[k+1:,:]))
+        return A_, v
+
+    def extract_item(A, k):
+        a = A[k]
+        A_ = A[:k] + A[k+1:]
+        return A_, a
+
+    #print("\n\t--- AAHC ---")
+    nt, nch = data.shape
+
+    # --- get GFP peaks ---
+    gfp = data.std(axis=1)
+    gfp_peaks = locmax(gfp)
+    #gfp_peaks = gfp_peaks[:100]
+    #n_gfp = gfp_peaks.shape[0]
+    gfp2 = np.sum(gfp**2) # normalizing constant in GEV
+
+    # --- initialize clusters ---
+    maps = data[gfp_peaks,:]
+    # --- store original gfp peaks and indices ---
+    cluster_data = data[gfp_peaks,:]
+    #n_maps = n_gfp
+    n_maps = maps.shape[0]
+    print(f"[+] Initial number of clusters: {n_maps:d}\n")
+
+    # --- cluster indices w.r.t. original size, normalized GFP peak data ---
+    Ci = [[k] for k in range(n_maps)]
+
+    # --- main loop: atomize + agglomerate ---
+    while (n_maps > N_clusters):
+        blank_ = 80*" "
+        print(f"\r{blank_:s}\r\t\tAAHC > n: {n_maps:d} => {n_maps-1:d}", end="")
+        #stdout.write(s); stdout.flush()
+        #print("\n\tAAHC > n: {:d} => {:d}".format(n_maps, n_maps-1))
+
+        # --- correlations of the data sequence with each cluster ---
+        m_x, s_x = data.mean(axis=1, keepdims=True), data.std(axis=1)
+        m_y, s_y = maps.mean(axis=1, keepdims=True), maps.std(axis=1)
+        s_xy = 1.*nch*np.outer(s_x, s_y)
+        C = np.dot(data-m_x, np.transpose(maps-m_y)) / s_xy
+
+        # --- microstate sequence, ignore polarity ---
+        L = np.argmax(C**2, axis=1)
+
+        # --- GEV (global explained variance) of cluster k ---
+        gev = np.zeros(n_maps)
+        for k in range(n_maps):
+            r = L==k
+            gev[k] = np.sum(gfp[r]**2 * C[r,k]**2)/gfp2
+
+        # --- merge cluster with the minimum GEV ---
+        imin = np.argmin(gev)
+        #print("\tre-cluster: {:d}".format(imin))
+
+        # --- N => N-1 ---
+        maps, _ = extract_row(maps, imin)
+        Ci, reC = extract_item(Ci, imin)
+        re_cluster = []  # indices of updated clusters
+        #C_sgn = np.zeros(nt)
+        for k in reC:  # map index to re-assign
+            c = cluster_data[k,:]
+            m_x, s_x = maps.mean(axis=1, keepdims=True), maps.std(axis=1)
+            m_y, s_y = c.mean(), c.std()
+            s_xy = 1.*nch*s_x*s_y
+            C = np.dot(maps-m_x, c-m_y)/s_xy
+            inew = np.argmax(C**2) # ignore polarity
+            #C_sgn[k] = C[inew]
+            re_cluster.append(inew)
+            Ci[inew].append(k)
+        n_maps = len(Ci)
+
+        # --- update clusters ---
+        re_cluster = list(set(re_cluster)) # unique list of updated clusters
+
+        ''' re-clustering by modified mean
+        for i in re_cluster:
+            idx = Ci[i]
+            c = np.zeros(nch) # new cluster average
+            # add to new cluster, polarity according to corr. sign
+            for k in idx:
+                if (C_sgn[k] >= 0):
+                    c += cluster_data[k,:]
+                else:
+                    c -= cluster_data[k,:]
+            c /= len(idx)
+            maps[i] = c
+            #maps[i] = (c-np.mean(c))/np.std(c) # normalize the new cluster
+        del C_sgn
+        '''
+
+        # re-clustering by eigenvector method
+        for i in re_cluster:
+            idx = Ci[i]
+            Vt = cluster_data[idx,:]
+            Sk = np.dot(Vt.T, Vt)
+            evals, evecs = np.linalg.eig(Sk)
+            c = evecs[:, np.argmax(np.abs(evals))]
+            c = np.real(c)
+            maps[i] = c/np.sqrt(np.sum(c**2))
+
+    print()
+    return maps
+
+
+def kmeans(data, n_maps, n_runs=10, maxerr=1e-6, maxiter=500):
+    """Modified K-means clustering as detailed in:
+    [1] Pascual-Marqui et al., IEEE TBME (1995) 42(7):658--665
+    [2] Murray et al., Brain Topography(2008) 20:249--264.
+    Variables named as in [1], step numbering as in Table I.
+
+    Args:
+        data: numpy.array, size = number of EEG channels
+        n_maps: number of microstate maps
+        n_runs: number of K-means runs (optional)
+        maxerr: maximum error for convergence (optional)
+        maxiter: maximum number of iterations (optional)
+    Returns:
+        maps: microstate maps (number of maps x number of channels)
+        L: sequence of microstate labels
+        gfp_peaks: indices of local GFP maxima
+        gev: global explained variance (0..1)
+        cv: value of the cross-validation criterion
+    """
+    n_t = data.shape[0]
+    n_ch = data.shape[1]
+    data = data - data.mean(axis=1, keepdims=True)
+
+    # GFP peaks
+    gfp = np.std(data, axis=1)
+    gfp_peaks = locmax(gfp)
+    gfp_values = gfp[gfp_peaks]
+    gfp2 = np.sum(gfp_values**2) # normalizing constant in GEV
+    n_gfp = gfp_peaks.shape[0]
+
+    # clustering of GFP peak maps only
+    V = data[gfp_peaks, :]
+    sumV2 = np.sum(V**2)
+
+    # store results for each k-means run
+    cv_list =   []  # cross-validation criterion for each k-means run
+    gev_list =  []  # GEV of each map for each k-means run
+    gevT_list = []  # total GEV values for each k-means run
+    maps_list = []  # microstate maps for each k-means run
+    L_list =    []  # microstate label sequence for each k-means run
+    for run in range(n_runs):
+        # initialize random cluster centroids (indices w.r.t. n_gfp)
+        rndi = np.random.permutation(n_gfp)[:n_maps]
+        maps = V[rndi, :]
+        # normalize row-wise (across EEG channels)
+        maps /= np.sqrt(np.sum(maps**2, axis=1, keepdims=True))
+        # initialize
+        n_iter = 0
+        var0 = 1.0
+        var1 = 0.0
+        # convergence criterion: variance estimate (step 6)
+        while ( (np.abs((var0-var1)/var0) > maxerr) & (n_iter < maxiter) ):
+            # (step 3) microstate sequence (= current cluster assignment)
+            C = np.dot(V, maps.T)
+            C /= (n_ch*np.outer(gfp[gfp_peaks], np.std(maps, axis=1)))
+            L = np.argmax(C**2, axis=1)
+            # (step 4)
+            for k in range(n_maps):
+                Vt = V[L==k, :]
+                # (step 4a)
+                Sk = np.dot(Vt.T, Vt)
+                # (step 4b)
+                evals, evecs = np.linalg.eig(Sk)
+                v = evecs[:, np.argmax(np.abs(evals))]
+                v = v.real
+                maps[k, :] = v/np.sqrt(np.sum(v**2))
+            # (step 5)
+            var1 = var0
+            var0 = sumV2 - np.sum(np.sum(maps[L, :]*V, axis=1)**2)
+            var0 /= (n_gfp*(n_ch-1))
+            n_iter += 1
+        if (n_iter < maxiter):
+            print((f"\tK-means run {run+1:d}/{n_runs:d} converged after "
+                   f"{n_iter:d} iterations."))
+        else:
+            print((f"\tK-means run {run+1:d}/{n_runs:d} did NOT converge "
+                   f"after {maxiter:d} iterations."))
+
+        # CROSS-VALIDATION criterion for this run (step 8)
+        C_ = np.dot(data, maps.T)
+        C_ /= (n_ch*np.outer(gfp, np.std(maps, axis=1)))
+        L_ = np.argmax(C_**2, axis=1)
+        var = np.sum(data**2) - np.sum(np.sum(maps[L_, :]*data, axis=1)**2)
+        var /= (n_t*(n_ch-1))
+        cv = var * (n_ch-1)**2/(n_ch-n_maps-1.)**2
+
+        # GEV (global explained variance) of cluster k
+        gev = np.zeros(n_maps)
+        for k in range(n_maps):
+            r = L==k
+            gev[k] = np.sum(gfp_values[r]**2 * C[r,k]**2)/gfp2
+        gev_total = np.sum(gev)
+
+        # store
+        cv_list.append(cv)
+        gev_list.append(gev)
+        gevT_list.append(gev_total)
+        maps_list.append(maps)
+        L_list.append(L_)
+
+    # select best run
+    k_opt = np.argmin(cv_list)
+    #k_opt = np.argmax(gevT_list)
+    maps = maps_list[k_opt]
+    # ms_gfp = ms_list[k_opt] # microstate sequence at GFP peaks
+    gev = gev_list[k_opt]
+    L_ = L_list[k_opt]
+
+    return maps
+
+
+def kmedoids(S, K, nruns, maxits):
+    """Octave/Matlab: Copyright Brendan J. Frey and Delbert Dueck, Aug 2006
+    http://www.psi.toronto.edu/~frey/apm/kcc.m
+    Simplified by Kevin Murphy
+    Python 2.7.x - FvW, 02/2014
+
+    Args:
+        filename: full path to the '.xyz' file
+    Returns:
+        locs: n_channels x 3 (numpy.array)
+    """
+    n = S.shape[0]
+    dpsim = np.zeros((maxits,nruns))
+    idx = np.zeros((n,nruns))
+    for rep in xrange(nruns):
+        tmp = np.random.permutation(range(n))
+        mu = tmp[:K]
+        t = 0
+        done = (t==maxits)
+        while ( not done ):
+            t += 1
+            muold = mu
+            dpsim[t,rep] = 0
+            # Find class assignments
+            cl = np.argmax(S[:,mu], axis=1) # max pos. of each row
+            # Set assignments of exemplars to themselves
+            cl[mu] = range(K)
+            for j in xrange(K): # For each class, find new exemplar
+                I = np.where(cl==j)[0]
+                S_I_rowsum = np.sum(S[I][:,I],axis=0)
+                Scl = max(S_I_rowsum)
+                ii = np.argmax(S_I_rowsum)
+                dpsim[t,rep] = dpsim[t,rep] + Scl
+                mu[j] = I[ii]
+            if all(muold==mu) | (t==maxits):
+                done = 1
+        idx[:,rep] = mu[cl]
+        dpsim[t+1:,rep] = dpsim[t,rep]
+    return np.unique(idx)
+
+
+def p_empirical(data, n_clusters):
+    """Empirical symbol distribution
+
+    Args:
+        data: numpy.array, size = length of microstate sequence
+        n_clusters: number of microstate clusters
+    Returns:
+        p: empirical distribution
+    """
+    p = np.zeros(n_clusters)
+    n = len(data)
+    for i in range(n):
+        p[data[i]] += 1.0
+    p /= n
+    return p
+
+
+def T_empirical(data, n_clusters):
+    """Empirical transition matrix
+
+    Args:
+        data: numpy.array, size = length of microstate sequence
+        n_clusters: number of microstate clusters
+    Returns:
+        T: empirical transition matrix
+    """
+    T = np.zeros((n_clusters, n_clusters))
+    n = len(data)
+    for i in range(n-1):
+        T[data[i], data[i+1]] += 1.0
+    p_row = np.sum(T, axis=1)
+    for i in range(n_clusters):
+        if ( p_row[i] != 0.0 ):
+            for j in range(n_clusters):
+                T[i,j] /= p_row[i]  # normalize row sums to 1.0
+    return T
+
+
+def p_equilibrium(T):
+    '''
+    get equilibrium distribution from transition matrix:
+    lambda = 1 - (left) eigenvector
+    '''
+    evals, evecs = np.linalg.eig(T.transpose())
+    i = np.where(np.isclose(evals, 1.0, atol=1e-6))[0][0] # locate max eigenval.
+    p_eq = np.abs(evecs[:,i]) # make eigenvec. to max. eigenval. non-negative
+    p_eq /= p_eq.sum() # normalized eigenvec. to max. eigenval.
+    return p_eq # stationary distribution
+
+
+def max_entropy(n_clusters):
+    """Maximum Shannon entropy of a sequence with n_clusters
+
+    Args:
+        n_clusters: number of microstate clusters
+    Returns:
+        h_max: maximum Shannon entropy
+    """
+    h_max = np.log(float(n_clusters))
+    return h_max
+
+
+def shuffle(data):
+    """Randomly shuffled copy of data (i.i.d. surrogate)
+
+    Args:
+        data: numpy array, 1D
+    Returns:
+        data_copy: shuffled data copy
+    """
+    data_c = data.copy()
+    np.random.shuffle(data_c)
+    return data_c
+
+
+def surrogate_mc(p, T, n_clusters, n):
+    """Surrogate Markov chain with symbol distribution p and
+    transition matrix T
+
+    Args:
+        p: empirical symbol distribution
+        T: empirical transition matrix
+        n_clusters: number of clusters/symbols
+        n: length of surrogate microstate sequence
+    Returns:
+        mc: surrogate Markov chain
+    """
+
+    # NumPy vectorized code
+    psum = np.cumsum(p)
+    Tsum = np.cumsum(T, axis=1)
+    # initial state according to p:
+    mc = [np.min(np.argwhere(np.random.rand() < psum))]
+    # next state according to T:
+    for i in range(1, n):
+        mc.append(np.min(np.argwhere(np.random.rand() < Tsum[mc[i-1]])))
+
+    ''' alternative implementation using loops
+    r = np.random.rand() # ~U[0,1], random threshold
+    s = 0.
+    y = p[s]
+    while (y < r):
+        s += 1.
+        y += p[s]
+    mc = [s] # initial state according to p
+
+    # iterate ...
+    for i in xrange(1,n):
+        r = np.random.rand() # ~U[0,1], random threshold
+        s = mc[i-1] # currrent state
+        t = 0. # trial state
+        y = T[s][t] # transition rate to trial state
+        while ( y < r ):
+            t += 1. # next trial state
+            y += T[s][t]
+        mc.append(t) # store current state
+    '''
+
+    return np.array(mc)
+
+
+def mutinf(x, ns, lmax):
+    """Time-lagged mutual information of symbolic sequence x with
+    ns different symbols, up to maximum lag lmax.
+    *** Symbols must be 0, 1, 2, ... to use as indices directly! ***
+
+    Args:
+        x: symbolic sequence, symbols = [0, 1, 2, ...]
+        ns: number of symbols
+        lmax: maximum time lag
+    Returns:
+        mi: time-lagged mutual information
+    """
+
+    n = len(x)
+    mi = np.zeros(lmax)
+    for l in range(lmax):
+        if ((l+1)%10 == 0):
+            print(f"mutual information lag: {l+1:d}\r", end="")
+            #sys.stdout.write(s)
+            #sys.stdout.flush()
+        nmax = n-l
+        h1 = H_1(x[:nmax], ns)
+        h2 = H_1(x[l:l+nmax], ns)
+        h12 = H_2(x[:nmax], x[l:l+nmax], ns)
+        mi[l] = h1 + h2 - h12
+    print("")
+    return mi
+
+
+def mutinf_i(x, ns, lmax):
+    """Time-lagged mutual information for each symbol of the symbolic
+    sequence x with  ns different symbols, up to maximum lag lmax.
+    *** Symbols must be 0, 1, 2, ... to use as indices directly! ***
+
+    Args:
+        x: symbolic sequence, symbols = [0, 1, 2, ...]
+        ns: number of symbols
+        lmax: maximum time lag
+    Returns:
+        mi: time-lagged mutual informations for each symbol, ns x lmax
+    """
+
+    n = len(x)
+    mi = np.zeros((ns,lmax))
+    for l in range(lmax):
+        if (l%10 == 0):
+            print(f"\r\tmutual information lag: {l:d}\r", end="")
+            #sys.stdout.write(s)
+            #sys.stdout.flush()
+        nmax = n - l
+
+        # compute distributions
+        p1 = np.zeros(ns)
+        p2 = np.zeros(ns)
+        p12 = np.zeros((ns,ns))
+        for i in range(nmax):
+            i1 = int(x[i])
+            i2 = int(x[i+l])
+            p1[i1] += 1.0 # p( X_t = i1 )
+            p2[i2] += 1.0 # p( X_t+l = i2 )
+            p12[i1,i2] += 1.0 # p( X_t+l=i2 , X_t=i1 )
+        p1 /= nmax
+        p2 /= nmax
+
+        # normalize the transition matrix p( X_t+l=i2 | X_t=i1 )
+        rowsum = np.sum(p12, axis=1)
+        for i, j in np.ndindex(p12.shape):
+            p12[i,j] /= rowsum[i]
+
+        # compute entropies
+        H2 = np.sum(p2[p2>0] * np.log(p2[p2>0]))
+        for i in range(ns):
+            H12 = 0.0
+            for j in range(ns):
+                if ( p12[i,j] > 0.0 ):
+                    H12 += ( p12[i,j] * np.log( p12[i,j] ) )
+            mi[i,l] = -H2 + p1[i] * H12
+    return mi
+
+
+def mutinf_CI(p, T, n, alpha, nrep, lmax):
+    """Return an array for the computation of confidence intervals (CI) for
+    the time-lagged mutual information of length lmax.
+    Null Hypothesis: Markov Chain of length n, equilibrium distribution p,
+    transition matrix T, using nrep repetitions."""
+
+    ns = len(p)
+    mi_array = np.zeros((nrep,lmax))
+    for r in range(nrep):
+        print(f"\nsurrogate MC # {r+1:d}/{nrep:d}")
+        x_mc = surrogate_mc(p, T, ns, n)
+        mi_mc = mutinf(x_mc, ns, lmax)
+        #mi_mc = mutinf_cy(X_mc, ns, lmax)
+        mi_array[r,:] = mi_mc
+    return mi_array
+
+
+def excess_entropy_rate(x, ns, kmax, doplot=False):
+    # y = ax+b: line fit to joint entropy for range of histories k
+    # a = entropy rate (slope)
+    # b = excess entropy (intersect.)
+    h_ = np.zeros(kmax)
+    for k in range(kmax): h_[k] = H_k(x, ns, k+1)
+    ks = np.arange(1,kmax+1)
+    a, b = np.polyfit(ks, h_, 1)
+    # --- Figure ---
+    if doplot:
+        plt.figure(figsize=(6,6))
+        plt.plot(ks, h_, '-sk')
+        plt.plot(ks, a*ks+b, '-b')
+        plt.xlabel("k")
+        plt.ylabel("$H_k$")
+        plt.title("Entropy rate")
+        plt.tight_layout()
+        plt.show()
+    return (a, b)
+
+
+def mc_entropy_rate(p, T):
+    """Markov chain entropy rate.
+    - \sum_i sum_j p_i T_ij log(T_ij)
+    """
+    h = 0.
+    for i, j in np.ndindex(T.shape):
+        if (T[i,j] > 0):
+            h -= ( p[i]*T[i,j]*np.log(T[i,j]) )
+    return h
+
+
+def aif_peak1(mi, fs, doplot=False):
+    '''compute time-lagged mut. inf. (AIF) and 1st peak.'''
+    dt = 1000./fs # sampling interval [ms]
+    mi_filt = np.convolve(mi, np.ones(3)/3., mode='same')
+    #mi_filt = np.convolve(mi, np.ones(5)/5., mode='same')
+    mx0 = 8 # 8
+    jmax = mx0 + locmax(mi_filt[mx0:])[0]
+    mx_mi = dt*jmax
+    if doplot:
+        offset = 5
+        tmax = 100
+        fig = plt.figure(1, figsize=(22,4))
+        t = dt*np.arange(tmax)
+        plt.plot(t[offset:tmax], mi[offset:tmax], '-ok', label='AIF')
+        plt.plot(t[offset:tmax], mi_filt[offset:tmax], '-b', label='smoothed AIF')
+        plt.plot(mx_mi, mi[jmax], 'or', markersize=15, label='peak-1')
+        plt.xlabel("time lag [ms]")
+        plt.ylabel("mut. inf. [bits]")
+        plt.legend(loc=0)
+        #plt.title("mutual information of map sequence")
+        #plt.title(s, fontsize=16, fontweight='bold')
+        plt.show()
+        input()
+
+    return jmax, mx_mi
+
+
+def H_1(x, ns):
+    """Shannon entropy of the symbolic sequence x with ns symbols.
+
+    Args:
+        x: symbolic sequence, symbols = [0, 1, 2, ...]
+        ns: number of symbols
+    Returns:
+        h: Shannon entropy of x
+    """
+
+    n = len(x)
+    p = np.zeros(ns) # symbol distribution
+    for t in range(n):
+        p[x[t]] += 1.0
+    p /= n
+    h = -np.sum(p[p>0]*np.log(p[p>0]))
+    return h
+
+
+def H_2(x, y, ns):
+    """Joint Shannon entropy of the symbolic sequences X, Y with ns symbols.
+
+    Args:
+        x, y: symbolic sequences, symbols = [0, 1, 2, ...]
+        ns: number of symbols
+    Returns:
+        h: Shannon entropy of x
+    """
+
+    if (len(x) != len(y)):
+        print("H_2 warning: sequences of different lengths, using the shorter...")
+    n = min([len(x), len(y)])
+    p = np.zeros((ns, ns)) # joint distribution
+    for t in range(n):
+        p[x[t],y[t]] += 1.0
+    p /= n
+    h = -np.sum(p[p>0]*np.log(p[p>0]))
+    return h
+
+
+def H_k(x, ns, k):
+    """Shannon's joint entropy from x[n+p:n-m]
+    x: symbolic time series
+    ns: number of symbols
+    k: length of k-history
+    """
+
+    N = len(x)
+    f = np.zeros(tuple(k*[ns]))
+    for t in range(N-k): f[tuple(x[t:t+k])] += 1.0
+    f /= (N-k) # normalize distribution
+    hk = -np.sum(f[f>0]*np.log(f[f>0]))
+    #m = np.sum(f>0)
+    #hk = hk + (m-1)/(2*N) # Miller-Madow bias correction
+    return hk
+
+
+def testMarkov0(x, ns, alpha, verbose=True):
+    """Test zero-order Markovianity of symbolic sequence x with ns symbols.
+    Null hypothesis: zero-order MC (iid) <=>
+    p(X[t]), p(X[t+1]) independent
+    cf. Kullback, Technometrics (1962)
+
+    Args:
+        x: symbolic sequence, symbols = [0, 1, 2, ...]
+        ns: number of symbols
+        alpha: significance level
+    Returns:
+        p: p-value of the Chi2 test for independence
+    """
+
+    if verbose:
+        print("ZERO-ORDER MARKOVIANITY:")
+    n = len(x)
+    f_ij = np.zeros((ns,ns))
+    f_i = np.zeros(ns)
+    f_j = np.zeros(ns)
+    # calculate f_ij p( x[t]=i, p( x[t+1]=j ) )
+    for t in range(n-1):
+        i = x[t]
+        j = x[t+1]
+        f_ij[i,j] += 1.0
+        f_i[i] += 1.0
+        f_j[j] += 1.0
+    T = 0.0 # statistic
+    for i, j in np.ndindex(f_ij.shape):
+        f = f_ij[i,j]*f_i[i]*f_j[j]
+        if (f > 0):
+            num_ = n*f_ij[i,j]
+            den_ = f_i[i]*f_j[j]
+            T += (f_ij[i,j] * np.log(num_/den_))
+    T *= 2.0
+    df = (ns-1.0) * (ns-1.0)
+    #p = chi2test(T, df, alpha)
+    p = chi2.sf(T, df, loc=0, scale=1)
+    if verbose:
+        print(f"p: {p:.2e} | t: {T:.3f} | df: {df:.1f}")
+    return p
+
+
+def testMarkov1(X, ns, alpha, verbose=True):
+    """Test first-order Markovianity of symbolic sequence X with ns symbols.
+    Null hypothesis:
+    first-order MC <=>
+    p(X[t+1] | X[t]) = p(X[t+1] | X[t], X[t-1])
+    cf. Kullback, Technometrics (1962), Tables 8.1, 8.2, 8.6.
+
+    Args:
+        x: symbolic sequence, symbols = [0, 1, 2, ...]
+        ns: number of symbols
+        alpha: significance level
+    Returns:
+        p: p-value of the Chi2 test for independence
+    """
+
+    if verbose:
+        print("\nFIRST-ORDER MARKOVIANITY:")
+    n = len(X)
+    f_ijk = np.zeros((ns,ns,ns))
+    f_ij = np.zeros((ns,ns))
+    f_jk = np.zeros((ns,ns))
+    f_j = np.zeros(ns)
+    for t in range(n-2):
+        i = X[t]
+        j = X[t+1]
+        k = X[t+2]
+        f_ijk[i,j,k] += 1.0
+        f_ij[i,j] += 1.0
+        f_jk[j,k] += 1.0
+        f_j[j] += 1.0
+    T = 0.0
+    for i, j, k in np.ndindex(f_ijk.shape):
+        f = f_ijk[i][j][k]*f_j[j]*f_ij[i][j]*f_jk[j][k]
+        if (f > 0):
+            num_ = f_ijk[i,j,k]*f_j[j]
+            den_ = f_ij[i,j]*f_jk[j,k]
+            T += (f_ijk[i,j,k]*np.log(num_/den_))
+    T *= 2.0
+    df = ns*(ns-1)*(ns-1)
+    #p = chi2test(T, df, alpha)
+    p = chi2.sf(T, df, loc=0, scale=1)
+    if verbose:
+        print(f"p: {p:.2e} | t: {T:.3f} | df: {df:.1f}")
+    return p
+
+
+def testMarkov2(X, ns, alpha, verbose=True):
+    """Test second-order Markovianity of symbolic sequence X with ns symbols.
+    Null hypothesis:
+    first-order MC <=>
+    p(X[t+1] | X[t], X[t-1]) = p(X[t+1] | X[t], X[t-1], X[t-2])
+    cf. Kullback, Technometrics (1962), Table 10.2.
+
+    Args:
+        x: symbolic sequence, symbols = [0, 1, 2, ...]
+        ns: number of symbols
+        alpha: significance level
+    Returns:
+        p: p-value of the Chi2 test for independence
+    """
+
+    if verbose:
+        print("\nSECOND-ORDER MARKOVIANITY:")
+    n = len(X)
+    f_ijkl = np.zeros((ns,ns,ns,ns))
+    f_ijk = np.zeros((ns,ns,ns))
+    f_jkl = np.zeros((ns,ns,ns))
+    f_jk = np.zeros((ns,ns))
+    for t in range(n-3):
+        i = X[t]
+        j = X[t+1]
+        k = X[t+2]
+        l = X[t+3]
+        f_ijkl[i,j,k,l] += 1.0
+        f_ijk[i,j,k] += 1.0
+        f_jkl[j,k,l] += 1.0
+        f_jk[j,k] += 1.0
+    T = 0.0
+    for i, j, k, l in np.ndindex(f_ijkl.shape):
+        f = f_ijkl[i,j,k,l]*f_ijk[i,j,k]*f_jkl[j,k,l]*f_jk[j,k]
+        if (f > 0):
+            num_ = f_ijkl[i,j,k,l]*f_jk[j,k]
+            den_ = f_ijk[i,j,k]*f_jkl[j,k,l]
+            T += (f_ijkl[i,j,k,l]*np.log(num_/den_))
+    T *= 2.0
+    df = ns*ns*(ns-1)*(ns-1)
+    #p = chi2test(T, df, alpha)
+    p = chi2.sf(T, df, loc=0, scale=1)
+    if verbose:
+        print(f"p: {p:.2e} | t: {T:.3f} | df: {df:.1f}")
+    return p
+
+
+def conditionalHomogeneityTest(X, ns, l, alpha, verbose=True):
+    """Test conditional homogeneity of non-overlapping blocks of
+    length l of symbolic sequence X with ns symbols
+    cf. Kullback, Technometrics (1962), Table 9.1.
+
+    Args:
+        x: symbolic sequence, symbols = [0, 1, 2, ...]
+        ns: number of symbols
+        l: split x into non-overlapping blocks of size l
+        alpha: significance level
+    Returns:
+        p: p-value of the Chi2 test for independence
+    """
+
+    if verbose:
+        print("\nCONDITIONAL HOMOGENEITY (three-way table):")
+    n = len(X)
+    r = int(np.floor(float(n)/float(l))) # number of blocks
+    nl = r*l
+    if verbose:
+        print("Split data in r = {:d} blocks of length {:d}.".format(r,l))
+    f_ijk = np.zeros((r,ns,ns))
+    f_ij = np.zeros((r,ns))
+    f_jk = np.zeros((ns,ns))
+    f_i = np.zeros(r)
+    f_j = np.zeros(ns)
+
+    # calculate f_ijk (time / block dep. transition matrix)
+    for i in  range(r): # block index
+        for ii in range(l-1): # pos. inside the current block
+            j = X[i*l + ii]
+            k = X[i*l + ii + 1]
+            f_ijk[i,j,k] += 1.0
+            f_ij[i,j] += 1.0
+            f_jk[j,k] += 1.0
+            f_i[i] += 1.0
+            f_j[j] += 1.0
+
+    # conditional homogeneity (Markovianity stationarity)
+    T = 0.0
+    for i, j, k in np.ndindex(f_ijk.shape):
+        # conditional homogeneity
+        f = f_ijk[i,j,k]*f_j[j]*f_ij[i,j]*f_jk[j,k]
+        if (f > 0):
+            num_ = f_ijk[i,j,k]*f_j[j]
+            den_ = f_ij[i,j]*f_jk[j,k]
+            T += (f_ijk[i,j,k]*np.log(num_/den_))
+    T *= 2.0
+    df = (r-1)*(ns-1)*ns
+    #p = chi2test(T, df, alpha)
+    p = chi2.sf(T, df, loc=0, scale=1)
+    if verbose:
+        print(f"p: {p:.2e} | t: {T:.3f} | df: {df:.1f}")
+    return p
+
+
+def symmetryTest(X, ns, alpha, verbose=True):
+    """Test symmetry of the transition matrix of symbolic sequence X with
+    ns symbols
+    cf. Kullback, Technometrics (1962)
+
+    Args:
+        x: symbolic sequence, symbols = [0, 1, 2, ...]
+        ns: number of symbols
+        alpha: significance level
+    Returns:
+        p: p-value of the Chi2 test for independence
+    """
+
+    if verbose:
+        print("\nSYMMETRY:")
+    n = len(X)
+    f_ij = np.zeros((ns,ns))
+    for t in range(n-1):
+        i = X[t]
+        j = X[t+1]
+        f_ij[i,j] += 1.0
+    T = 0.0
+    for i, j in np.ndindex(f_ij.shape):
+        if (i != j):
+            f = f_ij[i,j]*f_ij[j,i]
+            if (f > 0):
+                num_ = 2*f_ij[i,j]
+                den_ = f_ij[i,j]+f_ij[j,i]
+                T += (f_ij[i,j]*np.log(num_/den_))
+    T *= 2.0
+    df = ns*(ns-1)/2
+    #p = chi2test(T, df, alpha)
+    p = chi2.sf(T, df, loc=0, scale=1)
+    if verbose:
+        print(f"p: {p:.2e} | t: {T:.3f} | df: {df:.1f}")
+    return p
+
+
+def geoTest(X, ns, dt, alpha, verbose=True):
+    """Test the geometric distribution of lifetime distributions.
+
+    Args:
+        X: symbolic sequence, symbols = [0, 1, 2, ...]
+        ns: number of symbols
+        dt: time step in [ms]
+        alpha: significance level
+    Returns:
+        p_values: for each symbol
+    """
+
+    if verbose:
+        print("\nGEOMETRIC DISTRIBUTION of lifetimes:\n")
+    tauDist = lifetimes(X, ns)
+    T = T_empirical(X, ns)
+    p_values = np.zeros(ns)
+    for s in range(ns): # test for each symbol
+        if verbose:
+            print(f"\nTesting the distribution of symbol # {s:d}")
+        # m = max_tau:
+        m = len(tauDist[s])
+        # observed lifetime distribution:
+        q_obs = np.zeros(m)
+        # theoretical lifetime distribution:
+        q_exp = np.zeros(m)
+        for j in range(m):
+            # observed frequency of lifetime j+1 for state s
+            q_obs[j] = tauDist[s][j]
+            # expected frequency
+            q_exp[j] = (1.0-T[s][s]) * (T[s][s]**j)
+        q_exp *= sum(q_obs)
+
+        t = 0.0 # chi2 statistic
+        for j in range(m):
+            if ((q_obs[j] > 0) & (q_exp[j] > 0)):
+                t += (q_obs[j]*np.log(q_obs[j]/q_exp[j]))
+        t *= 2.0
+        df = m-1
+        #p0 = chi2test(t, df, alpha)
+        p0 = chi2.sf(t, df, loc=0, scale=1)
+        if verbose:
+            print(f"p: {p0:.2e} | t: {t:.3f} | df: {df:.1f}")
+        p_values[s] = p0
+
+        g1, p1, dof1, expctd1 = chi2_contingency(np.vstack((q_obs,q_exp)), \
+                                                 lambda_='log-likelihood')
+        if verbose:
+            print((f"G-test (log-likelihood) p: {p1:.2e}, g: {g1:.3f}, "
+                   f"df: {dof1:.1f}"))
+
+        # Pearson's Chi2 test
+        g2, p2, dof2, expctd2 = chi2_contingency( np.vstack((q_obs,q_exp)) )
+        if verbose:
+            print((f"G-test (Pearson Chi2) p: {p2:.2e}, g: {g2:.3f}, "
+                   f"df: {dof2:.1f}"))
+
+        p_ = tauDist[s]/np.sum(tauDist[s])
+        tau_mean = 0.0
+        tau_max = len(tauDist[s])*dt
+        for j in range(len(p_)): tau_mean += (p_[j] * j)
+        tau_mean *= dt
+        if verbose:
+            pass
+            #print("\t\tmean dwell time: {:.2f} [ms]".format(tau_mean))
+        if verbose:
+            pass
+            #print("\t\tmax. dwell time: {:.2f} [ms]\n\n".format(tau_max))
+    return p_values
+
+
+def lifetimes(X, ns):
+    """Compute the lifetime distributions for each symbol
+    in a symbolic sequence X with ns symbols.
+
+    Args:
+        x: symbolic sequence, symbols = [0, 1, 2, ...]
+        ns: number of symbols
+    Returns:
+        tauDist: list of lists, lifetime distributions for each symbol
+    """
+
+    n = len(X)
+    tauList = [[] for i in range(ns)] # unsorted lifetimes, [[],[],[],[]]
+    i = 0 # current index
+    s = X[i] # current symbol
+    tau = 1.0 # current lifetime
+    while (i < n-1):
+        i += 1
+        if ( X[i] == s ):
+            tau += 1.0
+        else:
+            tauList[int(s)].append(tau)
+            s = X[i]
+            tau = 1.0
+    tauList[int(s)].append(tau) # last state
+    # find max lifetime for each symbol
+    tau_max = [max(L) for L in tauList]
+    #print( "max lifetime for each symbol : " + str(tau_max) )
+    tauDist = [] # empty histograms for each symbol
+    for i in range(ns): tauDist.append( [0.]*int(tau_max[i]) )
+    # lifetime distributions
+    for s in range(ns):
+        for j in range(len(tauList[s])):
+            tau = tauList[s][j]
+            tauDist[s][int(tau)-1] += 1.0
+    return tauDist
+
+
+def mixing_time(X, ns):
+    """
+    Relaxation time, inverse of spectral gap
+    Arguments:
+        X: microstate label sequence
+        ns: number of unique labels
+    """
+    T_hat = T_empirical(X,ns) # transition matrix
+    ev = np.linalg.eigvals(T_hat)
+    #ev = np.real_if_close(ev)
+    ev = np.real(ev)
+    ev.sort() # ascending
+    ev2 = np.flipud(ev) # descending
+    #print("ordered eigenvalues: {:s}".format(str(ev2)))
+    sg = ev2[0] - ev2[1] # spectral gap
+    T_mix = 1.0/sg # mixing time
+    return T_mix
+
+
+def multiple_comparisons(p_values, method):
+    """Apply multiple comparisons correction code using the statsmodels package.
+    Input: array p-values.
+    'b': 'Bonferroni'
+    's': 'Sidak'
+    'h': 'Holm'
+    'hs': 'Holm-Sidak'
+    'sh': 'Simes-Hochberg'
+    'ho': 'Hommel'
+    'fdr_bh': 'FDR Benjamini-Hochberg'
+    'fdr_by': 'FDR Benjamini-Yekutieli'
+    'fdr_tsbh': 'FDR 2-stage Benjamini-Hochberg'
+    'fdr_tsbky': 'FDR 2-stage Benjamini-Krieger-Yekutieli'
+    'fdr_gbs': 'FDR adaptive Gavrilov-Benjamini-Sarkar'
+
+    Args:
+        p_values: uncorrected p-values
+        method: string, one of the methods given above
+    Returns:
+        p_values_corrected: corrected p-values
+    """
+    reject, pvals_corr, alphacSidak, alphacBonf = multipletests(p_values, \
+                                                                method=method)
+    return pvals_corr
+
+
+def locmax(x):
+    """Get local maxima of 1D-array
+
+    Args:
+        x: numeric sequence
+    Returns:
+        m: list, 1D-indices of local maxima
+    """
+
+    dx = np.diff(x) # discrete 1st derivative
+    zc = np.diff(np.sign(dx)) # zero-crossings of dx
+    m = 1 + np.where(zc == -2)[0] # indices of local max.
+    return m
+
+
+def findstr(s, L):
+    """Find string in list of strings, returns indices.
+
+    Args:
+        s: query string
+        L: list of strings to search
+    Returns:
+        x: list of indices where s is found in L
+    """
+
+    x = [i for i, l in enumerate(L) if (l==s)]
+    return x
+
+
+def print_matrix(T):
+    """Console-friendly output of the matrix T.
+
+    Args:
+        T: matrix to print
+    """
+
+    for i, j in np.ndindex(T.shape):
+        if (j == 0):
+            print("|{:.3f}".format(T[i,j]), end='')
+        elif (j == T.shape[1]-1):
+            print("{:.3f}|\n".format(T[i,j]), end='')
+        else:
+            #print "{:.3f}".format(T[i,j]), # Python-2
+            print("{:.3f}".format(T[i,j]), end='') # Python-3
+
+
+def main():
+    """Test module functions."""
+
+    os.system("clear")
+    print("\n\n--- EEG microstate analysis ---\n\n")
+    parser = argparse.ArgumentParser(description='EEG microstate analysis')
+    parser.add_argument('-i','--input', help='.edf file name', required=False)
+    parser.add_argument('-f','--filelist', help='.edf file list', required=False)
+    parser.add_argument('-d', '--directory', \
+                        help='process all .edf files in this directory', \
+                        required=False)
+    parser.add_argument('-m', '--markovsurrogates', \
+                        help='number of Markov surrogates', required=False)
+    args = parser.parse_args()
+    argsdict = vars(args)
+    print("\nTutorial: press <Enter> to proceed to the next step\n")
+
+    # (0) make file list
+    if argsdict['input']:
+        filenames = [argsdict['input']]
+    elif argsdict['filelist']:
+        with open(argsdict['filelist'], 'rb') as f:
+            filenames = [l.strip() for l in f]
+    elif argsdict['directory']:
+        filenames = []
+        for f1 in os.listdir(argsdict['directory']):
+            f2 = os.path.join(argsdict['directory'], f1)
+            if os.path.isfile(f2):
+                if (f2[-4:] == ".edf"):
+                    filenames.append(f2)
+    else:
+        filenames = ["test.edf"]
+
+    print("List of file names generated:")
+    for i, f in enumerate(filenames):
+        print(f"File {i:d}: {f:s}")
+    #input("\n\t\tpress ENTER to continue...")
+    # define lists that hold p-values for multiple comparisons
+    p_Markov0 = []
+    p_Markov1 = []
+    p_Markov2 = []
+    p_geo = []
+    p_stationarity = []
+    p_symmetry = []
+
+    # process files
+    for filename in filenames:
+
+        # (1) load edf file
+        os.system("clear")
+        print("\n[1] Load EEG file:")
+        chs, fs, data_raw = read_edf(filename)
+        print(f"\nFile: {filename:s}")
+        print("Sampling rate: {fs:.2f} Hz")
+        print(("Data shape: {data_raw.shape[0]:d} samples x "
+               "{data_raw.shape[1]:d} channels"))
+        print("Channels:")
+        for ch in chs:
+            print(f"\t{ch.decode():s}", end='')
+        print("")
+        #input("\n\n\n\t\tpress ENTER to continue...")
+
+        # (2) band-pass filter 1-35 Hz (6-th order Butterworth)
+        os.system("clear")
+        print("\n[2] Apply band-pass filter...")
+        data = bp_filter(data_raw, f_lo=1, f_hi=35, fs=fs) # (1, 35)
+        #data = np.copy(data_raw)
+        #input("\n\n\t\tpress ENTER to continue...")
+
+        # (3) visualize data
+        os.system("clear")
+        print("\n[3] Visualize data (PCA-1):")
+        pca = PCA(copy=True, n_components=1, whiten=False)
+        pca1 = pca.fit_transform(data)[:,0]
+        del pca
+        plot_data(pca1, fs)
+        #input("\n\n\t\tpress ENTER to continue...")
+
+        # (4) microstate clustering
+        os.system("clear")
+        print("\n[4] Clustering EEG topographies at GFP peaks:")
+        n_maps = 4
+        #maps, x, gfp_peaks, gev, cv = kmeans(data, n_maps=n_maps, n_runs=5, \
+        #                                      doplot=True)
+        locs = []
+
+        # choose clustering method
+        mode = ["aahc", "kmeans", "kmedoids", "pca", "ica"][1]
+        n_win = 5 # smoothing window
+        maps, x, gfp_peaks, gev = clustering(data, fs, chs, locs, mode, n_maps,\
+                                             n_win, interpol=True, doplot=True)
+        #print("\t\tK-means clustering of: n = {:d} EEG maps".\
+        #format(len(gfp_peaks)))
+        #print("\t\tMicrostates: {:d} maps x {:d} channels".\
+        #print("maps: ", type(maps), maps.shape, maps.dtype)
+        #format(maps.shape[0],maps.shape[1]))
+        #print("\t\tGlobal explained variance GEV = {:.2f}".format(gev.sum()))
+        #input("\n\n\t\tpress ENTER to continue...")
+
+        # (5) basic microstate statistics
+        os.system("clear")
+        print("\n[5] Basic microstate statistics:")
+        p_hat = p_empirical(x, n_maps)
+        T_hat = T_empirical(x, n_maps)
+        print("\nEmpirical symbol distribution (=RTT):\n")
+        for i in range(n_maps):
+            print(f"p_{i:d} = {p_hat[i]:.3f}")
+        print("\nEmpirical transition matrix:\n")
+        print_matrix(T_hat)
+        T_mix = mixing_time(x, n_maps)
+        print(f"\nRelaxation time T = {T_mix:.3f}:\n")
+
+        #'''
+        # taus: list of lists, each list is the lifetimes histogram of
+        # a microstate class, in sample units
+        taus = lifetimes(x,n_maps)
+        #print(taus)
+        dt = 1000./fs # ms
+        fig, ax = plt.subplots(1, n_maps, figsize=(16,4))
+        for i in range(n_maps):
+            taus_i = taus[i] # lifetimes histogram for microstate class i
+            ts = dt*np.arange(len(taus_i)) # histogram x-axis in msec
+            # normalize histogram to get probability distribution p_
+            p_ = taus_i / np.sum(taus_i)
+            mu = np.sum(ts*p_) # mean lifetime
+            print(f"ms-{i:d} lifetimes: {mu:.2f}")
+            ax[i].plot(ts, taus[i], '-k', lw=2)
+        plt.show()
+        #'''
+
+        #''' PPS (peaks per second)
+        pps = len(gfp_peaks) / (len(x)/fs)
+        print("\nGFP peaks per sec.: {pps:.2f}")
+
+        # GEV (global explained variance A-D)
+        print("\nGlobal explained variance (GEV) per map:")
+        for i, g in enumerate(gev):
+            print(f"GEV(ms-{i:d}) = {gev[i]:.2f}")
+        print(f"\nTotal GEV: {gev.sum():.2f}")
+
+        # Cross-correlation between maps
+        print("\nCross-correlation between microstate maps:")
+        C = np.corrcoef(maps)
+        C_max = np.max(np.abs(C-np.array(np.diag(C)*np.eye(n_maps))))
+        print_matrix(C)
+        print(f"\nCmax: {C_max:.2f}")
+        input("\n\npress ENTER to continue...")
+
+        # (6) entropy of the microstate sequence
+        #os.system("clear")
+        print("\n[6a] Shannon entropy of the microstate sequence:")
+        h_hat = H_1(x, n_maps)
+        h_max = max_entropy(n_maps)
+        print(f"\nEmpirical entropy H = {h_hat:.2f} (max. entropy: {h_max:.2f})\n")
+
+        # (6b) entropy rate of the microstate sequence
+        #os.system("clear")
+        print("\n[6b] Entropy rate of the microstate sequence:")
+        kmax = 6
+        h_rate, _ = excess_entropy_rate(x, n_maps, kmax, doplot=True)
+        h_mc = mc_entropy_rate(p_hat, T_hat)
+        print(f"\nEmpirical entropy rate h = {h_rate:.2f}")
+        print(f"Theoretical MC entropy rate h = {h_mc:.2f}")
+        input("\n\n\t\tpress ENTER to continue...")
+
+        # (7) Markov tests
+        os.system("clear")
+        print("\n[7] Markovianity tests:\n")
+        alpha = 0.01
+        p0 = testMarkov0(x, n_maps, alpha)
+        p1 = testMarkov1(x, n_maps, alpha)
+        p2 = testMarkov2(x, n_maps, alpha)
+        p_geo_vals = geoTest(x, n_maps, 1000./fs, alpha)
+        p_Markov0.append(p0)
+        p_Markov1.append(p1)
+        p_Markov2.append(p2)
+        #p_geo.append(p_geo_vals)
+        input("\n\n\t\tpress ENTER to continue...")
+
+        # (8) stationarity test
+        os.system("clear")
+        print("\n[8] Stationarity test:")
+        try:
+            L = int(input("enter block size: "))
+        except:
+            L = 5000
+        p3 = conditionalHomogeneityTest(x, n_maps, L, alpha)
+        p_stationarity.append(p3)
+        input("\n\n\t\tpress ENTER to continue...")
+
+        # (9) symmetry test
+        os.system("clear")
+        print("\n[9] Symmetry test:")
+        p4 = symmetryTest(x, n_maps, alpha)
+        p_symmetry.append(p4)
+        input("\n\n\t\tpress ENTER to continue...")
+
+        # (10) Markov surrogate example
+        os.system("clear")
+        print("\n[10] Synthesize a surrogate Markov chain:")
+        x_mc = surrogate_mc(p_hat, T_hat, n_maps, len(x))
+        p_surr = p_empirical(x_mc, n_maps)
+        T_surr = T_empirical(x_mc, n_maps)
+        print("\nSurrogate symbol distribution:\n")
+        for i in range(n_maps):
+            print(f"p_{i:d} = {p_surr[i]:.3f}")
+        print( "\nSurrogate transition matrix:\n" )
+        print_matrix(T_surr)
+        input("\n\n\t\tpress ENTER to continue...")
+
+        # (11) Markov tests for surrogate (sanity check)
+        os.system("clear")
+        print("\n[11] Markovianity tests for surrogate MC:\n")
+        p0_ = testMarkov0(x_mc, n_maps, alpha)
+        p1_ = testMarkov1(x_mc, n_maps, alpha)
+        p2_ = testMarkov2(x_mc, n_maps, alpha)
+        p_geo_vals_ = geoTest(x_mc, n_maps, 1000./fs, alpha)
+        input("\n\n\t\tpress ENTER to continue...")
+
+        # (12) Stationarity test for surrogate MC
+        os.system("clear")
+        print("\n[12] Stationarity test for surrogate MC:")
+        try:
+            L = int(input("enter block size: "))
+        except:
+            L = 5000
+        p3_ = conditionalHomogeneityTest(x_mc, n_maps, L, alpha)
+        input("\n\n\t\tpress ENTER to continue...")
+
+        # (13) Symmetry test for surrogate (sanity check)
+        os.system("clear")
+        print("\n[13] Symmetry test for surrogate MC:")
+        p4_ = symmetryTest(x_mc, n_maps, alpha)
+        input("\n\n\t\tpress ENTER to continue...")
+
+        # (14) time-lagged mutual information (AIF) empirical vs. Markov model
+        os.system("clear")
+        print("\n[14] Mutual information of EEG microstates and surrogate MC:\n")
+        l_max = 100
+        #print(argsdict)
+        if argsdict['markovsurrogates']:
+            n_mc = int(argsdict['markovsurrogates'])
+        else:
+            n_mc = 10
+        aif = mutinf(x, n_maps, l_max)
+        #jmax, mx_mi = aif_peak1(aif, fs, doplot=True)
+        #print("\n\t\tAIF 1st peak: index {:d} = {:.2f} ms".format(jmax, mx_mi))
+
+        print("\n\nComputing n = {:d} Markov surrogates...".format(n_mc))
+        aif_array = mutinf_CI(p_hat, T_hat, len(x), alpha, n_mc, l_max)
+        pct = np.percentile(aif_array,[100.*alpha/2.,100.*(1-alpha/2.)],axis=0)
+        print("\n\n")
+        #'''
+
+        #'''
+        plt.ioff()
+        fig = plt.figure(1, figsize=(20,5))
+        t = np.arange(l_max)*1000./fs
+        plt.semilogy(t, aif, '-sk', linewidth=3, label='AIF (EEG)')
+        #plt.semilogy(t, pct[0,:], '-k', linewidth=1)
+        #plt.semilogy(t, pct[1,:], '-k', linewidth=1)
+        plt.fill_between(t, pct[0,:], pct[1,:], facecolor='gray', alpha=0.5, \
+        label='AIF (Markov)')
+        plt.xlabel("time lag [ms]", fontsize=24, fontweight="bold")
+        plt.ylabel("AIF [nats]", fontsize=24, fontweight="bold")
+        plt.legend(fontsize=22)
+        ax = plt.gca()
+        #ax.set_xticklabels(ax.get_xticks(), fontsize=24, fontweight='normal')
+        #ax.set_yticklabels(ax.get_yticks(), fontsize=24, fontweight='normal')
+        ax.tick_params(axis='both', labelsize=24)
+        plt.tight_layout()
+        plt.show()
+        #'''
+
+    if (len(filenames) > 1):
+        os.system("clear")
+        print("\n\t[15] Multiple comparisons correction")
+        mc_method = 'b'
+        if (len(p_Markov0) > 1):
+            p_Markov0_mc = multiple_comparisons(p_Markov0, mc_method)
+            print("\nZero-order Markov test: " + str(p_Markov0_mc))
+        if (len(p_Markov1) > 1):
+            p_Markov1_mc = multiple_comparisons(p_Markov1, mc_method)
+            print("\nFirst-order Markov test: " + str(p_Markov1_mc))
+        if (len(p_Markov2) > 1):
+            p_Markov2_mc = multiple_comparisons(p_Markov2, mc_method)
+            print("\nZero-order Markov test: " + str(p_Markov0_mc))
+        if (len(p_stationarity) > 1):
+            p_stationarity_mc = multiple_comparisons(p_stationarity, mc_method)
+            print("\nStationarity test: " + str(p_stationarity_mc))
+        if (len(p_symmetry) > 1):
+            p_symmetry_mc = multiple_comparisons(p_symmetry, mc_method)
+            print("\nSymmetry test: " + str(p_symmetry_mc))
+
+    print("\n\n\t\t--- Tutorial completed ---")
+
+
+if __name__ == '__main__':
+    np.set_printoptions(precision=3, suppress=True)
+    main()
diff --git a/feature_select.py b/feature_select.py
new file mode 100644
index 0000000..b05cdfa
--- /dev/null
+++ b/feature_select.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python
+
+'''
+This file contains functions that enable MRMR based Feature Selection.
+See the paper: Feature Selection Based on Mutual Information: Criteria of Max-Dependency, Max-Relevance,
+and Min-Redundancy by H. Peng, F. Long, and C. Ding
+Author: Kiran Karra [Virginia Tech]
+        <kiran.karra@gmail.com, kiran.karra@vt.edu>
+Distribution Statement A: Approved for Public Release, Distribution Unlimited
+'''
+
+import os
+import shutil
+
+import json
+
+from tempfile import mkdtemp
+
+from tqdm import tqdm
+
+from scipy.stats import randint as sp_randint
+
+from sklearn.model_selection import RandomizedSearchCV
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.pipeline import FeatureUnion, Pipeline
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import accuracy_score, roc_curve
+# from sklearn.externals import * #joblib
+# from sklearn.externals.joblib import Memory
+import joblib
+from joblib import Memory
+from sklearn.metrics import matthews_corrcoef
+
+import gplearn
+import gplearn.genetic
+import gplearn.fitness
+
+import numpy as np
+
+from joblib import Parallel, delayed
+from joblib import load, dump
+
+# from identity_transformer import IdentityTransformer # wasent used
+
+# import depmeas # couldnt find depmeas
+
+def generic_combined_scorer(x1,o1,ii_1,x2,o2,ii_2,y,h):
+    s1 = h(x1,y)
+    s2 = h(x2,y)
+    o1[ii_1] = s1
+    o2[ii_2] = s2
+"""
+def feature_select(X,y,num_features_to_select=None,K_MAX=1000,estimator=depmeas.mi_tau,n_jobs=-1,verbose=True):
+    '''
+    Implements the MRMR algorithm for feature-selection: http://ieeexplore.ieee.org/document/1453511/
+    Inputs:
+        X - A feature-matrix, of shape (N,D) where N is the number of samples and D is the number
+            of features
+        y - A vector of shape (N,1) which represents the output.  Each index in y is assumed to
+            correspond to the row with the same index in X.
+        num_features_to_select - the number of features to select from the provided X matrix.  If None
+                                 are provided, then all the features that are available are ranked/ordered.
+                                 (default: None)
+        K_MAX - the maximum number of top-scoring features to consider.
+        estimator - a function handle to an estimator of association (that theoretically should
+                    follow the DPI assumptions)
+        n_jobs - the numer of processes to use with parallel processing in the background
+        verbose - if True, show progress
+    Output:
+        a vector of indices sorted in descending order, where each index represents the "importance"
+        of the feature, as computed by the MRMR algorithm.
+    '''
+    num_dim = X.shape[1]
+
+    if(num_features_to_select is not None):
+        num_selected_features = min(num_dim,num_features_to_select)
+    else:
+        num_selected_features = num_dim
+    K_MAX_internal = min(num_dim,K_MAX)
+
+    initial_scores = Parallel(n_jobs=n_jobs)(delayed(estimator)(X[:,ii],y) for ii in range(num_dim))
+    # rank the scores in descending order
+    sorted_scores_idxs = np.flipud(np.argsort(initial_scores))
+    
+    # subset the data down so that joblib doesn't have to 
+    # transport large matrices to its workers
+    X_subset = X[:,sorted_scores_idxs[0:K_MAX_internal]]
+    # memory map this for parallelization speed
+    tmp_folder = mkdtemp()
+    # TODO: why is X_subset crashing when we increase K_MAX_in?  Investigate in detail, but
+    # for now, do not use memory mapping for X_subset for stability
+    # X_subset_fname = os.path.join(tmp_folder, 'X_subset')
+    # dump(X_subset, X_subset_fname)
+    # X_subset = load(X_subset_fname, mmap_mode='r')
+
+    selected_feature_idxs    = np.zeros(num_selected_features,dtype=int)
+    remaining_candidate_idxs = list(range(1,K_MAX_internal))
+    
+    # mi_matrix = np.empty((K_MAX_internal,num_selected_features-1))
+    # mi_matrix[:] = np.nan
+
+    relevance_vec_fname = os.path.join(tmp_folder, 'relevance_vec')
+    feature_redundance_vec_fname = os.path.join(tmp_folder, 'feature_redundance_vec')
+    mi_matrix_fname = os.path.join(tmp_folder, 'mi_matrix')
+    relevance_vec = np.memmap(relevance_vec_fname, dtype=float, 
+                                shape=(K_MAX_internal,), mode='w+')
+    feature_redundance_vec = np.memmap(feature_redundance_vec_fname, dtype=float, 
+                                shape=(K_MAX_internal,), mode='w+')
+    mi_matrix = np.memmap(mi_matrix_fname, dtype=float, 
+                                shape=(K_MAX_internal,num_selected_features-1), mode='w+')
+    mi_matrix[:] = np.nan
+
+    # TODO: investigate whether its worth it to parallelize the nested for-loop?
+    with tqdm(total=num_selected_features,desc='Selecting Features ...',disable=(not verbose)) as pbar:
+        pbar.update(1)
+        for k in range(1,num_selected_features):
+            ncand = len(remaining_candidate_idxs)
+            last_selected_feature = k-1
+
+            Parallel(n_jobs=n_jobs)(delayed(generic_combined_scorer)(y,relevance_vec,ii,
+                                                X_subset[:,selected_feature_idxs[last_selected_feature]],
+                                                feature_redundance_vec,ii,X_subset[:,ii],
+                                                estimator) 
+                          for ii in remaining_candidate_idxs)
+
+            # copy the redundance into the mi_matrix, which accumulates our redundance as we compute
+            mi_matrix[remaining_candidate_idxs,last_selected_feature] = feature_redundance_vec[remaining_candidate_idxs]
+            redundance_vec = np.nanmean(mi_matrix[remaining_candidate_idxs,:], axis=1)
+
+            tmp_idx = np.argmax(relevance_vec[remaining_candidate_idxs]-redundance_vec)
+            selected_feature_idxs[k] = remaining_candidate_idxs[tmp_idx]
+            del remaining_candidate_idxs[tmp_idx]
+            
+            pbar.update(1)
+    
+    # map the selected features back to the original dimensions
+    selected_feature_idxs = sorted_scores_idxs[selected_feature_idxs]
+
+    # clean up
+    try:
+        shutil.rmtree(tmp_folder)
+    except:
+        pass
+
+    return selected_feature_idxs
+    """
+    
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 2bc2f51..55b5e14 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,12 +6,12 @@ nitime==0.8.1
 nolds==0.5.2
 statsmodels==0.11.1
 spm1d==0.4.2
-pysparcl==1.4.1
+# pysparcl==1.4.1
 fooof==1.0.0
 pandas==1.0.3
 seaborn==0.10.1
 pingouin==0.3.11
-sklearn==0.24.2
+# sklearn==0.24.2 # installed newer version due to python 3.10 limitation
 autoreject==0.2.1
 mlxtend==0.17.0
-mayavi==4.7.1
\ No newline at end of file
+# mayavi==4.7.1 # installed 4.8.1
\ No newline at end of file
-- 
GitLab