Newer
Older
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
# temp_list1.append(temp_list2)
# # Save the timeseries across subjects
# Timeseries_data.append(temp_list1) # output [subject][eye][epoch](ch,time)
# # Test multiple specified model orders of AR models, each combination has its own model
# m_orders = np.linspace(1,25,25) # the model orders tested
# m_orders = np.round(m_orders)
# n_timepoints = len(Timeseries_data[0][0][0])
# n_ch_combinations = scipy.special.comb(n_channels,2, exact=True, repetition=False)
# # To reduce computation time I only test representative epochs (1 from each 1 min session)
# # There will be 5 epochs from eyes closed and 5 from eyes open
# n_rep_epoch = 5
# # The subjects have different number of epochs due to dropped epochs
# gaps_trials_idx = np.load("Gaps_trials_idx.npy") # time_points between sessions
# # I convert the gap time points to epoch number used as representative epoch
# epoch_idx = np.zeros((n_subjects,n_eye_status,n_rep_epoch), dtype=int) # prepare array
# epoch_idx[:,:,0:4] = np.round(gaps_trials_idx/n_timepoints,0)-8 # take random epoch from sessions 1 to 4
# epoch_idx[:,:,4] = np.round(gaps_trials_idx[:,:,3]/n_timepoints,0)+5 # take random epoch from session 5
# # Checking if all epoch idx exists
# for i in range(n_subjects):
# EC_index = final_epochs[i].events[:,2] == 1
# EO_index = final_epochs[i].events[:,2] == 2
# assert np.sum(EC_index) >= epoch_idx[i,0,4]
# assert np.sum(EO_index) >= epoch_idx[i,1,4]
# # Prepare model order estimation
# AIC_arr = np.zeros((len(m_orders),n_subjects,n_eye_status,n_rep_epoch,n_ch_combinations))
# BIC_arr = np.zeros((len(m_orders),n_subjects,n_eye_status,n_rep_epoch,n_ch_combinations))
# def GC_model_order_est(i):
# AIC_arr0 = np.zeros((len(m_orders),n_eye_status,n_rep_epoch,n_ch_combinations))
# BIC_arr0 = np.zeros((len(m_orders),n_eye_status,n_rep_epoch,n_ch_combinations))
# for e in range(n_eye_status):
# n_epochs = STC_eye_data[i][e].shape[0]
# N_total = n_timepoints*n_epochs # total number of datapoints for specific eye condition
# for ep in range(n_rep_epoch):
# epp = epoch_idx[i,e,ep]
# for o in range(len(m_orders)):
# order = int(m_orders[o])
# # Make the Granger Causality object
# GCA1 = nta.GrangerAnalyzer(Timeseries_data[i][e][epp-1], order=order,
# n_freqs=2000)
# for c in range(n_ch_combinations):
# # Retrieve error covariance matrix for all combinations
# ecov = np.array(list(GCA1.error_cov.values()))
# # Calculate AIC
# AIC = ntsu.akaike_information_criterion(ecov[c,:,:], p = n_channels,
# m=order, Ntotal=N_total)
# # Calculate BIC
# BIC = ntsu.bayesian_information_criterion(ecov[c,:,:], p = n_channels,
# m=order, Ntotal=N_total)
# # Save the information criterions
# AIC_arr0[o,e,ep,c] = AIC
# BIC_arr0[o,e,ep,c] = BIC
# print("{} out of {} finished testing".format(i+1,n_subjects))
# return i, AIC_arr0, BIC_arr0
# # Get current time
# c_time1 = time.localtime()
# c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
# print(c_time1)
# with concurrent.futures.ProcessPoolExecutor() as executor:
# for i, AIC_result, BIC_result in executor.map(GC_model_order_est, range(n_subjects)): # Function and arguments
# AIC_arr[:,i] = AIC_result
# BIC_arr[:,i] = BIC_result
# # Get current time
# c_time2 = time.localtime()
# c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
# print("Started", c_time1, "\nCurrent Time",c_time2)
# # Save the AIC and BIC results
# np.save(Feature_savepath+"AIC_Source_drop_interpol_GC_model_order.npy", AIC_arr) # (m. order, subject, eye, epoch, combination)
# np.save(Feature_savepath+"BIC_Source_drop_interpol_GC_model_order.npy", BIC_arr) # (m. order, subject, eye, epoch, combination)
# # Load data
# AIC_arr = np.load(Feature_savepath+"AIC_Source_drop_interpol_GC_model_order.npy")
# BIC_arr = np.load(Feature_savepath+"BIC_Source_drop_interpol_GC_model_order.npy")
# # Average across all subjects, eye status, epochs and combinations
# plt.figure(figsize=(8,6))
# plt.plot(m_orders, np.nanmean(AIC_arr, axis=(1,2,3,4)), label="AIC")
# plt.plot(m_orders, np.nanmean(BIC_arr, axis=(1,2,3,4)), label="BIC")
# plt.title("Average information criteria value")
# plt.xlabel("Model order (Lag)")
# plt.legend()
# np.sum(np.isnan(AIC_arr))/AIC_arr.size # around 0.07% NaN due to non-convergence
# np.sum(np.isnan(BIC_arr))/BIC_arr.size # around 0.07% NaN due to non-convergence
# # If we look at each subject
# mean_subject_AIC = np.nanmean(AIC_arr, axis=(2,3,4))
# plt.figure(figsize=(8,6))
# for i in range(n_subjects):
# plt.plot(m_orders, mean_subject_AIC[:,i])
# plt.title("Average AIC for each subject")
# plt.xlabel("Model order (Lag)")
# mean_subject_BIC = np.nanmean(BIC_arr, axis=(2,3,4))
# plt.figure(figsize=(8,6))
# for i in range(n_subjects):
# plt.plot(m_orders, mean_subject_BIC[:,i])
# plt.title("Average BIC for each subject")
# plt.xlabel("Model order (Lag)")
# # We see that for many cases in BIC, it does not converge. Monotonic increasing!
# # We can look at the distribution of chosen order for each time series analyzed
# # I.e. I will find the minima in model order for each model
# AIC_min_arr = np.argmin(AIC_arr, axis=0)
# BIC_min_arr = np.argmin(BIC_arr, axis=0)
# # Plot the distributions of the model order chosen
# plt.figure(figsize=(8,6))
# sns.distplot(AIC_min_arr.reshape(-1)+1, kde=False, norm_hist=True,
# bins=np.linspace(0.75,30.25,60), label="AIC")
# plt.ylabel("Frequency density")
# plt.xlabel("Model order")
# plt.title("AIC Model Order Estimation")
# plt.figure(figsize=(8,6))
# sns.distplot(BIC_min_arr.reshape(-1)+1, kde=False, norm_hist=True, color="blue",
# bins=np.linspace(0.75,30.25,60), label="BIC")
# plt.ylabel("Frequency density")
# plt.xlabel("Model order")
# plt.title("BIC Model Order Estimation")
# # It is clear from the BIC model that most have model order 1
# # which reflect their monotonic increasing nature without convergence
# # Thus I will only use AIC
# # There is a bias variance trade-off with model order [Stokes & Purdon, 2017]
# # Lower order is associated with higher bias and higher order with variance
# # I will choose the model order that is chosen the most (i.e. majority voting)
# AR_order = int(np.nanquantile(AIC_min_arr.reshape(-1), q=0.5))
# # Order = 5
# # Calculate Granger Causality for each subject, eye and epoch
# Freq_Bands = {"delta": [1.25, 4.0],
# "theta": [4.0, 8.0],
# "alpha": [8.0, 13.0],
# "beta": [13.0, 30.0],
# "gamma": [30.0, 49.0]}
# n_freq_bands = len(Freq_Bands)
# # Pre-allocate memory
# GC_data = np.zeros((2,n_subjects,n_eye_status,n_channels,n_channels,n_freq_bands))
# def GC_analysis(i):
# GC_data0 = np.zeros((2,n_eye_status,n_channels,n_channels,n_freq_bands))
# for e in range(n_eye_status):
# n_epochs = STC_eye_data[i][e].shape[0]
# # Make temporary array to save GC for each epoch
# temp_GC_data = np.zeros((2,n_epochs,n_channels,n_channels,n_freq_bands))
# for ep in range(n_epochs):
# # Fit the AR model
# GCA = nta.GrangerAnalyzer(Timeseries_data[i][e][ep], order=AR_order,
# n_freqs=int(800)) # n_Freq=800 correspond to step of 0.25Hz, the same as multitaper for power estimation
# for f in range(n_freq_bands):
# # Define lower and upper band
# f_lb = list(Freq_Bands.values())[f][0]
# f_ub = list(Freq_Bands.values())[f][1]
# # Get index corresponding to the frequency bands of interest
# freq_idx_G = np.where((GCA.frequencies >= f_lb) * (GCA.frequencies < f_ub))[0]
# # Calculcate Granger causality quantities
# g_xy = np.mean(GCA.causality_xy[:, :, freq_idx_G], -1) # avg on last dimension
# g_yx = np.mean(GCA.causality_yx[:, :, freq_idx_G], -1) # avg on last dimension
# # Transpose to use same format as con_measurement and save
# temp_GC_data[0,ep,:,:,f] = np.transpose(g_xy)
# temp_GC_data[1,ep,:,:,f] = np.transpose(g_yx)
# # Average over epochs for each person, eye condition, direction and frequency band
# temp_GC_epoch_mean = np.nanmean(temp_GC_data, axis=1) # sometimes Log(Sxx/xx_auto_component) is nan
# # Save to array
# GC_data0[:,e,:,:,:] = temp_GC_epoch_mean
# print("{} out of {} finished analyzing".format(i+1,n_subjects))
# return i, GC_data0
# # Get current time
# c_time1 = time.localtime()
# c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
# print(c_time1)
# with concurrent.futures.ProcessPoolExecutor() as executor:
# for i, GC_result in executor.map(GC_analysis, range(n_subjects)): # Function and arguments
# GC_data[:,i] = GC_result
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
# # Get current time
# c_time2 = time.localtime()
# c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
# print("Started", c_time1, "\nCurrent Time",c_time2)
# # Output: GC_data (g_xy/g_yx, subject, eye, chx, chy, freq)
# # Notice that for g_xy ([0,...]) it means "chy" Granger causes "chx"
# # and for g_yx ([1,...]) it means "chx" Granger causes "chy"
# # This is due to the transposing which flipped the results on to the lower-part of the diagonal
# # Save the Granger_Causality data
# np.save(Feature_savepath+"Source_drop_interpol_GrangerCausality_data.npy", GC_data)
# # Theoretically negative GC values should be impossible, but in practice
# # they can still occur due to problems with model fitting (see Stokes & Purdon, 2017)
# print("{:.3f}% negative GC values".\
# format(np.sum(GC_data[~np.isnan(GC_data)]<0)/np.sum(~np.isnan(GC_data))*100)) # 0.08% negative values
# # These values cannot be interpreted, but seems to occur mostly for true non-causal connections
# # Hence I set them to 0
# with np.errstate(invalid="ignore"): # invalid number refers to np.nan, which will be set to False for comparisons
# GC_data[(GC_data<0)] = 0
# # Save as dataframe for further processing with other features
# # Convert to Pandas dataframe
# # The dimensions will each be a column with numbers and the last column will be the actual values
# arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, GC_data.shape), indexing="ij"))) + [GC_data.ravel()])
# GC_data_df = pd.DataFrame(arr, columns = ["GC_direction", "Subject_ID", "Eye_status", "chx", "chy", "Freq_band", "Value"])
# # Change from numerical coding to actual values
# eye_status = list(final_epochs[0].event_id.keys())
# freq_bands_name = list(Freq_Bands.keys())
# GC_directions_info = ["chy -> chx", "chx -> chy"]
# index_values = [GC_directions_info,Subject_id,eye_status,ch_names,ch_names,freq_bands_name]
# for col in range(len(index_values)):
# col_name = GC_data_df.columns[col]
# for shape in range(GC_data.shape[col]): # notice not dataframe but the array
# GC_data_df.loc[GC_data_df.iloc[:,col] == shape,col_name]\
# = index_values[col][shape]
# # Add group status
# Group_status = np.array(["CTRL"]*len(GC_data_df["Subject_ID"]))
# Group_status[np.array([i in cases for i in GC_data_df["Subject_ID"]])] = "PTSD"
# # Add to dataframe
# GC_data_df.insert(3, "Group_status", Group_status)
# # Remove all nan (including diagonal and upper-matrix entries)
# GC_data_df = GC_data_df.iloc[np.invert(np.isnan(GC_data_df["Value"].to_numpy()))]
# # Swap ch values for GC_direction chy -> chx (so it is always chx -> chy)
# tempchy = GC_data_df[GC_data_df["GC_direction"] == "chy -> chx"]["chy"] # save chy
# GC_data_df.loc[GC_data_df["GC_direction"] == "chy -> chx","chy"] =\
# GC_data_df.loc[GC_data_df["GC_direction"] == "chy -> chx","chx"] # overwrite old chy
# GC_data_df.loc[GC_data_df["GC_direction"] == "chy -> chx","chx"] = tempchy # overwrite chx
# # Drop the GC_direction column
# GC_data_df = GC_data_df.drop("GC_direction", axis=1)
# # Save df
# GC_data_df.to_pickle(os.path.join(Feature_savepath,"GC_data_source_drop_interpol_df.pkl"))
# # Testing if df was formatted correctly
# expected_GC_values = n_subjects*n_eye_status*n_ch_combinations*n_freq_bands*2 # 2 because it is bidirectional
# assert GC_data_df.shape[0] == expected_GC_values
# # Testing a random GC value
# random_connection = np.random.randint(0,GC_data_df.shape[0])
# test_connection = GC_data_df.iloc[random_connection,:]
# i = np.where(Subject_id==test_connection["Subject_ID"])[0]
# e = np.where(np.array(eye_status)==test_connection["Eye_status"])[0]
# chx = np.where(np.array(ch_names)==test_connection["chx"])[0]
# chy = np.where(np.array(ch_names)==test_connection["chy"])[0]
# f = np.where(np.array(freq_bands_name)==test_connection["Freq_band"])[0]
# value = test_connection["Value"]
# if chx < chy: # the GC array is only lower diagonal to save memory
# assert GC_data[0,i,e,chy,chx,f] == value
# if chx > chy:
# assert GC_data[1,i,e,chx,chy,f] == value