FeatureEstimation.py

# -*- coding: utf-8 -*-
"""
Updated Oct 18 2022

@author: Qianliang Li (glia@dtu.dk)

This script contains the code to estimate the following EEG features:
    1. Power Spectral Density
    2. Frontal Theta/Beta Ratio
    3. Asymmetry
    4. Peak Alpha Frequency
    5. 1/f Exponents
    6. Microstates
    7. Long-Range Temporal Correlation (DFA Exponent)
Source localization and functional connectivity
    8. Imaginary part of Coherence
    9. Weighted Phase Lag Index
    10. (Orthogonalized) Power Envelope Correlations
    11. Granger Causality

It should be run after Preprocessing.py

All features are saved in pandas DataFrame format for the machine learning
pipeline

Note that the code has not been changed to fit the demonstration dataset,
thus just running it might introduce some errors.
The code is provided to show how we performed the feature estimation
and if you are adapting the code, you should check if it fits your dataset
e.g. the questionnaire data, sensors and source parcellation etc

The script was written in Spyder. The outline panel can be used to navigate
the different parts easier (Default shortcut: Ctrl + Shift + O)
"""


# Set working directory
import os
wkdir = "/home/s200431"
os.chdir(wkdir)

# Load all libraries from the Preamble
from Preamble import *

# %% Load preprocessed epochs and questionnaire data
load_path = "home/s200431/PreprocessedData"

# Get filenames
files = []
for r, d, f in os.walk(load_path):
    for file in f:
        if ".fif" in file:
            files.append(os.path.join(r, file))
files.sort()


# Subject IDs
Subject_id = [0] * len(files)
for i in range(len(files)):
    temp = files[i].split("/")
    temp = temp[-1].split(".")
    temp = temp[0].split("_")
    Subject_id[i] = int(temp[0])

# Should exclude subject 100326 due to 7 bad channels
# Exclude 200013 and 200015 due to too many dropped epochs
# (200001, 200004, 200053 and 200072 were already excluded prior to preprocessing)
# Exclude 302215, 302224, 302227, 302233, 302264, 302268, 302275 due to too many dropped epochs
# 13 subjects excluded in total + 4 I did not receive because they were marked as bad from Helse
bad_subjects = [100326, 200013, 200015, 302224, 302227, 302233, 302264, 302268, 302275]
good_subject_idx = [not i in bad_subjects for i in Subject_id]

Subject_id = list(np.array(Subject_id)[good_subject_idx])
files = list(np.array(files)[good_subject_idx])

n_subjects = len(Subject_id)

# Load ISAF
n_ISAF = 51-1
ISAF7_final_epochs = [0]*n_ISAF
for n in range(len(ISAF7_final_epochs)):
    ISAF7_final_epochs[n] = mne.read_epochs(fname = os.path.join(files[n]),
                    verbose=0)

# Load HELSE
n_HELSE = 70-2
HELSE_final_epochs = [0]*n_HELSE
for n in range(len(HELSE_final_epochs)):
    HELSE_final_epochs[n] = mne.read_epochs(fname = os.path.join(files[n+n_ISAF]),
                    verbose=0)
    # Rename channels to match ISAF (Using 10-20 MCN)
    mne.rename_channels(HELSE_final_epochs[n].info, {"T3":"T7",
                                                     "T4":"T8",
                                                     "T5":"P7",
                                                     "T6":"P8"})
# Warning about chronological order is due to interleaved EO->EC->EO->EC being concatenated as 5xEC->5xEO

# Load Baseline
n_Base = 91-6
Base_final_epochs = [0]*n_Base
for n in range(len(Base_final_epochs)):
    Base_final_epochs [n] = mne.read_epochs(fname = os.path.join(files[n+n_ISAF+n_HELSE]),
                    verbose=0)

# I will use the union of the channels in both dataset (except mastoids)
# This means I add empty channels and interpolate when it is missing
# Helsefond montage has wrong calibration, head size are too big. 
# Thus I will need to re-calibrate by comparing with ISAF7
# Notice I already used the final dig montage for Baseline data

# Get channel names
ISAF7_chs = ISAF7_final_epochs[0].copy().info["ch_names"]
Helse_chs = HELSE_final_epochs[0].copy().info["ch_names"]
Base_chs = Base_final_epochs[0].copy().info["ch_names"]

# Get intersection of channels
intersect_ch = list(set(Helse_chs) & set(ISAF7_chs))
intersect_ch_ratio = [0]*len(intersect_ch)
for i in range(len(intersect_ch)):
    # Get channel name
    ch_name0 = intersect_ch[i]
    # Get index and electrode location
    ISAF7_ch_idx = np.where(np.array(ISAF7_chs) == ch_name0)[0]
    ISAF7_ch_loc = ISAF7_final_epochs[0].info["chs"][int(ISAF7_ch_idx)]["loc"]
    Helse_ch_idx = np.where(np.array(Helse_chs) == ch_name0)[0]
    Helse_ch_loc = HELSE_final_epochs[0].info["chs"][int(Helse_ch_idx)]["loc"]
    # Calculate ratio
    intersect_ch_ratio[i] = [ch_name0,ISAF7_ch_loc[0:3]/Helse_ch_loc[0:3]]
# Most of the ratios are either around 0.095 or 0/Inf when division with 0
# We also see that Cz for ISAF is defined as (0,0,0.095m). For Helse Cz is (0,0,1m)
Helse_to_ISAF7_cal = 0.095

# Now we are ready to make a combined montage (info["dig"])
# Get list of channels to add
ISAF7_add_ch_list = list(set(Helse_chs)-set(ISAF7_chs))
Helse_add_ch_list = list(set(ISAF7_chs)-set(Helse_chs))
combined_ch_list = Helse_chs + ISAF7_chs
# Remove duplicates while maintaining A-P order
duplicate = set()
final_ch_list = [x for x in combined_ch_list if not (x in duplicate or duplicate.add(x))]
# Move AFz and POz to correct position
final_ch_list.insert(3,final_ch_list.pop(-2)) # from second last to 3
final_ch_list.insert(-5,final_ch_list.pop(-1)) # from last to seventh from last

# The order of DigPoints in info are based on sorted ch names
Helse_dig = HELSE_final_epochs[0].copy().info["dig"]
Helse_chs_sorted = Helse_chs.copy()
Helse_chs_sorted.sort()
ISAF7_dig = ISAF7_final_epochs[0].copy().info["dig"]
ISAF7_chs_sorted = ISAF7_chs.copy()
ISAF7_chs_sorted.sort()

# Make one list with DigPoints from Helse + unique channels from ISAF7
ch_idx = [i for i, item in enumerate(ISAF7_chs_sorted) if item in set(Helse_add_ch_list)] # indices of unique channels
final_ch_list_sorted = final_ch_list.copy()
final_ch_list_sorted.sort()
dig_insert_idx = [i for i, item in enumerate(final_ch_list_sorted) if item in set(Helse_add_ch_list)] # find where ISAF7 channels should be inserted

# Prepare combined dig montage
final_dig = Helse_dig.copy()
# Calibrate Helse digpoints
for i in range(len(final_dig)):
    final_dig[i]["r"] = final_dig[i]["r"]*Helse_to_ISAF7_cal
# Insert ISAF7 digpoints
for i in range(len(ch_idx)):
    final_dig.insert(dig_insert_idx[i],ISAF7_dig[ch_idx[i]])

# Remove mastoids
Mastoid_ch = ["M1", "M2"]
M_idx = [i for i, item in enumerate(final_ch_list_sorted) if item in set(Mastoid_ch)] # find mastoids ch
M_idx2 = [i for i, item in enumerate(final_ch_list) if item in set(Mastoid_ch)] # find mastoids ch
M_idx3 = [i for i, item in enumerate(Helse_add_ch_list) if item in set(Mastoid_ch)] # find mastoids ch
for i in reversed(range(len(M_idx))):
    del final_ch_list_sorted[M_idx[i]]
    del final_dig[M_idx[i]]
    # Mastoids are placed in the back of final_ch_list and Helse_add_ch_list and are also removed
    del final_ch_list[M_idx2[i]]
    del Helse_add_ch_list[M_idx3[i]]

# T7, T8, Pz, P8 and P7 are placed wrongly (probably due to renaming)
# This is fixed manually
final_dig.insert(-7,final_dig.pop(-4)) # swap between P8 and T7
final_dig.insert(-6,final_dig.pop(-3)) # swap between T7 and T8
final_dig.insert(-4,final_dig.pop(-4)) # swap between Pz and T7
final_dig.insert(-5,final_dig.pop(-5)) # swap between Pz and POz

# Update EEG identity number
for i in range(len(final_dig)):
    final_dig[i]["ident"] = i+1

# Make final digital montage
final_digmon = mne.channels.DigMontage(dig=final_dig, ch_names=final_ch_list_sorted)
# final_digmon.plot() # visually inspect topographical positions
# final_digmon.plot(kind="3d") # visually inspect 3D positions
# final_digmon.save("final_digmon.fif") # Save digital montage
with open("final_digmon_ch_names.pkl", "wb") as filehandle:
    # The data is stored as binary data stream
    pickle.dump(final_digmon.ch_names, filehandle)

# Remove mastoids from ISAF, add channels from Helse and interpolate