Skip to content
Snippets Groups Projects
FeatureEstimation.py 156 KiB
Newer Older
  • Learn to ignore specific revisions
  • #         temp_list1.append(temp_list2)
    #     # Save the timeseries across subjects
    #     Timeseries_data.append(temp_list1) # output [subject][eye][epoch](ch,time)
    
    # # Test multiple specified model orders of AR models, each combination has its own model
    # m_orders = np.linspace(1,25,25) # the model orders tested
    # m_orders = np.round(m_orders)
    # n_timepoints = len(Timeseries_data[0][0][0])
    # n_ch_combinations = scipy.special.comb(n_channels,2, exact=True, repetition=False)
    
    # # To reduce computation time I only test representative epochs (1 from each 1 min session)
    # # There will be 5 epochs from eyes closed and 5 from eyes open
    # n_rep_epoch = 5
    # # The subjects have different number of epochs due to dropped epochs
    # gaps_trials_idx = np.load("Gaps_trials_idx.npy") # time_points between sessions
    # # I convert the gap time points to epoch number used as representative epoch
    # epoch_idx = np.zeros((n_subjects,n_eye_status,n_rep_epoch), dtype=int) # prepare array
    # epoch_idx[:,:,0:4] = np.round(gaps_trials_idx/n_timepoints,0)-8 # take random epoch from sessions 1 to 4
    # epoch_idx[:,:,4] = np.round(gaps_trials_idx[:,:,3]/n_timepoints,0)+5 # take random epoch from session 5
    
    # # Checking if all epoch idx exists
    # for i in range(n_subjects):
    #     EC_index = final_epochs[i].events[:,2] == 1
    #     EO_index = final_epochs[i].events[:,2] == 2
    #     assert np.sum(EC_index) >= epoch_idx[i,0,4]
    #     assert np.sum(EO_index) >= epoch_idx[i,1,4]
    
    # # Prepare model order estimation
    # AIC_arr = np.zeros((len(m_orders),n_subjects,n_eye_status,n_rep_epoch,n_ch_combinations))
    # BIC_arr = np.zeros((len(m_orders),n_subjects,n_eye_status,n_rep_epoch,n_ch_combinations))
    
    # def GC_model_order_est(i):
    #     AIC_arr0 = np.zeros((len(m_orders),n_eye_status,n_rep_epoch,n_ch_combinations))
    #     BIC_arr0 = np.zeros((len(m_orders),n_eye_status,n_rep_epoch,n_ch_combinations))
    #     for e in range(n_eye_status):
    #         n_epochs = STC_eye_data[i][e].shape[0]
    #         N_total = n_timepoints*n_epochs # total number of datapoints for specific eye condition
    #         for ep in range(n_rep_epoch):
    #             epp = epoch_idx[i,e,ep]
    #             for o in range(len(m_orders)):
    #                 order = int(m_orders[o])
    #                 # Make the Granger Causality object
    #                 GCA1 = nta.GrangerAnalyzer(Timeseries_data[i][e][epp-1], order=order,
    #                                            n_freqs=2000)
    #                 for c in range(n_ch_combinations):
    #                     # Retrieve error covariance matrix for all combinations
    #                     ecov = np.array(list(GCA1.error_cov.values()))
    #                     # Calculate AIC
    #                     AIC = ntsu.akaike_information_criterion(ecov[c,:,:], p = n_channels,
    #                                                             m=order, Ntotal=N_total)
    #                     # Calculate BIC
    #                     BIC = ntsu.bayesian_information_criterion(ecov[c,:,:], p = n_channels,
    #                                                               m=order, Ntotal=N_total)
    #                     # Save the information criterions
    #                     AIC_arr0[o,e,ep,c] = AIC
    #                     BIC_arr0[o,e,ep,c] = BIC
    
    #     print("{} out of {} finished testing".format(i+1,n_subjects))
    #     return i, AIC_arr0, BIC_arr0
    
    # # Get current time
    # c_time1 = time.localtime()
    # c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
    # print(c_time1)
    
    # with concurrent.futures.ProcessPoolExecutor() as executor:
    #     for i, AIC_result, BIC_result in executor.map(GC_model_order_est, range(n_subjects)): # Function and arguments
    #         AIC_arr[:,i] = AIC_result
    #         BIC_arr[:,i] = BIC_result
    
    # # Get current time
    # c_time2 = time.localtime()
    # c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
    # print("Started", c_time1, "\nCurrent Time",c_time2)
    
    # # Save the AIC and BIC results
    # np.save(Feature_savepath+"AIC_Source_drop_interpol_GC_model_order.npy", AIC_arr) # (m. order, subject, eye, epoch, combination)
    # np.save(Feature_savepath+"BIC_Source_drop_interpol_GC_model_order.npy", BIC_arr) # (m. order, subject, eye, epoch, combination)
    
    # # Load data
    # AIC_arr = np.load(Feature_savepath+"AIC_Source_drop_interpol_GC_model_order.npy")
    # BIC_arr = np.load(Feature_savepath+"BIC_Source_drop_interpol_GC_model_order.npy")
    
    # # Average across all subjects, eye status, epochs and combinations
    # plt.figure(figsize=(8,6))
    # plt.plot(m_orders, np.nanmean(AIC_arr, axis=(1,2,3,4)), label="AIC")
    # plt.plot(m_orders, np.nanmean(BIC_arr, axis=(1,2,3,4)), label="BIC")
    # plt.title("Average information criteria value")
    # plt.xlabel("Model order (Lag)")
    # plt.legend()
    
    # np.sum(np.isnan(AIC_arr))/AIC_arr.size # around 0.07% NaN due to non-convergence
    # np.sum(np.isnan(BIC_arr))/BIC_arr.size # around 0.07% NaN due to non-convergence
    
    # # If we look at each subject
    # mean_subject_AIC = np.nanmean(AIC_arr, axis=(2,3,4))
    
    # plt.figure(figsize=(8,6))
    # for i in range(n_subjects):
    #     plt.plot(m_orders, mean_subject_AIC[:,i])
    # plt.title("Average AIC for each subject")
    # plt.xlabel("Model order (Lag)")
    
    # mean_subject_BIC = np.nanmean(BIC_arr, axis=(2,3,4))
    # plt.figure(figsize=(8,6))
    # for i in range(n_subjects):
    #     plt.plot(m_orders, mean_subject_BIC[:,i])
    # plt.title("Average BIC for each subject")
    # plt.xlabel("Model order (Lag)")
    
    # # We see that for many cases in BIC, it does not converge. Monotonic increasing!
    
    # # We can look at the distribution of chosen order for each time series analyzed
    # # I.e. I will find the minima in model order for each model
    # AIC_min_arr = np.argmin(AIC_arr, axis=0)
    # BIC_min_arr = np.argmin(BIC_arr, axis=0)
    
    # # Plot the distributions of the model order chosen
    # plt.figure(figsize=(8,6))
    # sns.distplot(AIC_min_arr.reshape(-1)+1, kde=False, norm_hist=True,
    #              bins=np.linspace(0.75,30.25,60), label="AIC")
    # plt.ylabel("Frequency density")
    # plt.xlabel("Model order")
    # plt.title("AIC Model Order Estimation")
    
    # plt.figure(figsize=(8,6))
    # sns.distplot(BIC_min_arr.reshape(-1)+1, kde=False, norm_hist=True, color="blue",
    #              bins=np.linspace(0.75,30.25,60), label="BIC")
    # plt.ylabel("Frequency density")
    # plt.xlabel("Model order")
    # plt.title("BIC Model Order Estimation")
    # # It is clear from the BIC model that most have model order 1
    # # which reflect their monotonic increasing nature without convergence
    # # Thus I will only use AIC
    
    # # There is a bias variance trade-off with model order [Stokes & Purdon, 2017]
    # # Lower order is associated with higher bias and higher order with variance
    # # I will choose the model order that is chosen the most (i.e. majority voting)
    # AR_order = int(np.nanquantile(AIC_min_arr.reshape(-1), q=0.5))
    # # Order = 5
    
    # # Calculate Granger Causality for each subject, eye and epoch
    # Freq_Bands = {"delta": [1.25, 4.0],
    #               "theta": [4.0, 8.0],
    #               "alpha": [8.0, 13.0],
    #               "beta": [13.0, 30.0],
    #               "gamma": [30.0, 49.0]}
    # n_freq_bands = len(Freq_Bands)
    
    # # Pre-allocate memory
    # GC_data = np.zeros((2,n_subjects,n_eye_status,n_channels,n_channels,n_freq_bands))
    
    # def GC_analysis(i):
    #     GC_data0 = np.zeros((2,n_eye_status,n_channels,n_channels,n_freq_bands))
    #     for e in range(n_eye_status):
    #         n_epochs = STC_eye_data[i][e].shape[0]
    #         # Make temporary array to save GC for each epoch
    #         temp_GC_data = np.zeros((2,n_epochs,n_channels,n_channels,n_freq_bands))
    #         for ep in range(n_epochs):
    #             # Fit the AR model
    #             GCA = nta.GrangerAnalyzer(Timeseries_data[i][e][ep], order=AR_order,
    #                                        n_freqs=int(800)) # n_Freq=800 correspond to step of 0.25Hz, the same as multitaper for power estimation
    #             for f in range(n_freq_bands):
    #                 # Define lower and upper band
    #                 f_lb = list(Freq_Bands.values())[f][0]
    #                 f_ub = list(Freq_Bands.values())[f][1]
    #                 # Get index corresponding to the frequency bands of interest
    #                 freq_idx_G = np.where((GCA.frequencies >= f_lb) * (GCA.frequencies < f_ub))[0]
    #                 # Calculcate Granger causality quantities
    #                 g_xy = np.mean(GCA.causality_xy[:, :, freq_idx_G], -1) # avg on last dimension
    #                 g_yx = np.mean(GCA.causality_yx[:, :, freq_idx_G], -1) # avg on last dimension
    #                 # Transpose to use same format as con_measurement and save
    #                 temp_GC_data[0,ep,:,:,f] = np.transpose(g_xy)
    #                 temp_GC_data[1,ep,:,:,f] = np.transpose(g_yx)
    
    glia's avatar
    glia committed
            
    
    #         # Average over epochs for each person, eye condition, direction and frequency band
    #         temp_GC_epoch_mean = np.nanmean(temp_GC_data, axis=1) # sometimes Log(Sxx/xx_auto_component) is nan
    #         # Save to array
    #         GC_data0[:,e,:,:,:] = temp_GC_epoch_mean
    
    glia's avatar
    glia committed
            
    
    #     print("{} out of {} finished analyzing".format(i+1,n_subjects))
    #     return i, GC_data0
    
    glia's avatar
    glia committed
    
    
    # # Get current time
    # c_time1 = time.localtime()
    # c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
    # print(c_time1)
    
    glia's avatar
    glia committed
    
    
    # with concurrent.futures.ProcessPoolExecutor() as executor:
    #     for i, GC_result in executor.map(GC_analysis, range(n_subjects)): # Function and arguments
    #         GC_data[:,i] = GC_result
    
    glia's avatar
    glia committed
            
    
    # # Get current time
    # c_time2 = time.localtime()
    # c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
    # print("Started", c_time1, "\nCurrent Time",c_time2)
    
    # # Output: GC_data (g_xy/g_yx, subject, eye, chx, chy, freq)
    # # Notice that for g_xy ([0,...]) it means "chy" Granger causes "chx"
    # # and for g_yx ([1,...]) it means "chx" Granger causes "chy"
    # # This is due to the transposing which flipped the results on to the lower-part of the diagonal
    
    # # Save the Granger_Causality data
    # np.save(Feature_savepath+"Source_drop_interpol_GrangerCausality_data.npy", GC_data)
    
    # # Theoretically negative GC values should be impossible, but in practice
    # # they can still occur due to problems with model fitting (see Stokes & Purdon, 2017)
    # print("{:.3f}% negative GC values".\
    #       format(np.sum(GC_data[~np.isnan(GC_data)]<0)/np.sum(~np.isnan(GC_data))*100)) # 0.08% negative values
    # # These values cannot be interpreted, but seems to occur mostly for true non-causal connections
    # # Hence I set them to 0
    # with np.errstate(invalid="ignore"): # invalid number refers to np.nan, which will be set to False for comparisons
    #     GC_data[(GC_data<0)] = 0
    
    # # Save as dataframe for further processing with other features
    # # Convert to Pandas dataframe
    # # The dimensions will each be a column with numbers and the last column will be the actual values
    # arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, GC_data.shape), indexing="ij"))) + [GC_data.ravel()])
    # GC_data_df = pd.DataFrame(arr, columns = ["GC_direction", "Subject_ID", "Eye_status", "chx", "chy", "Freq_band", "Value"])
    # # Change from numerical coding to actual values
    # eye_status = list(final_epochs[0].event_id.keys())
    # freq_bands_name = list(Freq_Bands.keys())
    # GC_directions_info = ["chy -> chx", "chx -> chy"]
    
    # index_values = [GC_directions_info,Subject_id,eye_status,ch_names,ch_names,freq_bands_name]
    # for col in range(len(index_values)):
    #     col_name = GC_data_df.columns[col]
    #     for shape in range(GC_data.shape[col]): # notice not dataframe but the array
    #         GC_data_df.loc[GC_data_df.iloc[:,col] == shape,col_name]\
    #         = index_values[col][shape]
    
    # # Add group status
    # Group_status = np.array(["CTRL"]*len(GC_data_df["Subject_ID"]))
    # Group_status[np.array([i in cases for i in GC_data_df["Subject_ID"]])] = "PTSD"
    # # Add to dataframe
    # GC_data_df.insert(3, "Group_status", Group_status)
    
    # # Remove all nan (including diagonal and upper-matrix entries)
    # GC_data_df = GC_data_df.iloc[np.invert(np.isnan(GC_data_df["Value"].to_numpy()))]
    
    # # Swap ch values for GC_direction chy -> chx (so it is always chx -> chy)
    # tempchy = GC_data_df[GC_data_df["GC_direction"] == "chy -> chx"]["chy"] # save chy
    # GC_data_df.loc[GC_data_df["GC_direction"] == "chy -> chx","chy"] =\
    #              GC_data_df.loc[GC_data_df["GC_direction"] == "chy -> chx","chx"] # overwrite old chy
    # GC_data_df.loc[GC_data_df["GC_direction"] == "chy -> chx","chx"] = tempchy # overwrite chx
    
    # # Drop the GC_direction column
    # GC_data_df = GC_data_df.drop("GC_direction", axis=1)
    
    # # Save df
    # GC_data_df.to_pickle(os.path.join(Feature_savepath,"GC_data_source_drop_interpol_df.pkl"))
    
    # # Testing if df was formatted correctly
    # expected_GC_values = n_subjects*n_eye_status*n_ch_combinations*n_freq_bands*2 # 2 because it is bidirectional
    # assert GC_data_df.shape[0] == expected_GC_values
    # # Testing a random GC value
    # random_connection = np.random.randint(0,GC_data_df.shape[0])
    # test_connection = GC_data_df.iloc[random_connection,:]
    # i = np.where(Subject_id==test_connection["Subject_ID"])[0]
    # e = np.where(np.array(eye_status)==test_connection["Eye_status"])[0]
    # chx = np.where(np.array(ch_names)==test_connection["chx"])[0]
    # chy = np.where(np.array(ch_names)==test_connection["chy"])[0]
    # f = np.where(np.array(freq_bands_name)==test_connection["Freq_band"])[0]
    # value = test_connection["Value"]
    # if chx < chy: # the GC array is only lower diagonal to save memory
    #     assert GC_data[0,i,e,chy,chx,f] == value
    # if chx > chy:
    #     assert GC_data[1,i,e,chx,chy,f] == value