Skip to content
Snippets Groups Projects
FeatureEstimation.py 156 KiB
Newer Older
  • Learn to ignore specific revisions
  • # labels.append(FEF_label)
    
    # ### Supplementary eye fields
    # # Located at caudal end of frontal gyrus and upper part of paracentral sulcus
    # label_aparc_names0 = ["G_and_S_paracentral","G_front_sup"]
    # temp_labels = []
    # for i in range(len(label_aparc_names0)):
    #     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
    #     for i2 in range(len(labels_aparc_idx)):
    #         temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
    
    # pos1 = temp_labels[0].pos
    # pos2 = temp_labels[2].pos
    # distm = scipy.spatial.distance.cdist(pos1,pos2)
    # # Find the closest points between the 2 ROIs
    # l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[0]) # q chosen to correspond to around 15% of ROI
    # l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.005))[1]) # q chosen to correspond to around 10% of ROI
    # # Notice that superior frontal gyrus is around 4 times bigger than paracentral
    # len(l1_idx)/pos1.shape[0]
    # len(l2_idx)/pos2.shape[0]
    # # Only use upper part
    # z_threshold = 0.06 # visually inspected
    # l1_idx = l1_idx[pos1[l1_idx,2] > z_threshold]
    # l2_idx = l2_idx[pos2[l2_idx,2] > z_threshold]
    
    # SEF_label_p1 = label_subset(temp_labels[0], l1_idx, "SEF")
    # SEF_label_p2 = label_subset(temp_labels[2], l2_idx, "SEF")
    # # Combine the 2 parts
    # SEF_label = SEF_label_p1.__add__(SEF_label_p2)
    # SEF_label.name = SEF_label_p1.name
    # # Assign a color
    # SEF_label.color = matplotlib.colors.to_rgba("royalblue")
    # # Append to final list
    # labels.append(SEF_label)
    
    # # Do the same for the right hemisphere
    # pos1 = temp_labels[1].pos
    # pos2 = temp_labels[3].pos
    # distm = scipy.spatial.distance.cdist(pos1,pos2)
    # # Find the closest points between the 2 ROIs
    # l1_idx = np.unique(np.where(distm<np.quantile(distm, 0.0005))[0]) # q chosen to correspond to around 15% of ROI
    # l2_idx = np.unique(np.where(distm<np.quantile(distm, 0.005))[1]) # q chosen to correspond to around 10% of ROI
    # # Notice that superior frontal gyrus is around 4 times bigger than paracentral
    # len(l1_idx)/pos1.shape[0]
    # len(l2_idx)/pos2.shape[0]
    # # Only use upper part
    # z_threshold = 0.06 # visually inspected
    # l1_idx = l1_idx[pos1[l1_idx,2] > z_threshold]
    # l2_idx = l2_idx[pos2[l2_idx,2] > z_threshold]
    
    # SEF_label_p1 = label_subset(temp_labels[1], l1_idx, "SEF")
    # SEF_label_p2 = label_subset(temp_labels[3], l2_idx, "SEF")
    # # Combine the 2 parts
    # SEF_label = SEF_label_p1.__add__(SEF_label_p2)
    # SEF_label.name = SEF_label_p1.name
    # # Assign a color
    # SEF_label.color = matplotlib.colors.to_rgba("royalblue")
    # # Append to final list
    # labels.append(SEF_label)
    
    # ### Posterior cingulate cortex
    # label_aparc_names0 = ["G_cingul-Post-dorsal", "G_cingul-Post-ventral"]
    # temp_labels = []
    # for i in range(len(label_aparc_names0)):
    #     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
    #     for i2 in range(len(labels_aparc_idx)):
    #         temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
    # labels0 = []
    # for hem in range(2):
    #     PCC_p1 = temp_labels[hem]
    #     for i in range(1,len(temp_labels)//2):
    #         PCC_p2 = temp_labels[hem+2*i]
    #         PCC_p1 = PCC_p1.__add__(PCC_p2)
    #     PCC_p1.name = "PCC-{}".format(PCC_p1.hemi)
    #     labels0.append(PCC_p1)
    # # Combine the 2 hemisphere in 1 label
    # labels.append(labels0[0].__add__(labels0[1]))
    
    # ### Medial prefrontal cortex
    # # From their schematic it looks like rostral 1/4 of superior frontal gyrus
    # label_aparc_names0 = ["G_front_sup"]
    # temp_labels = []
    # for i in range(len(label_aparc_names0)):
    #     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
    #     for i2 in range(len(labels_aparc_idx)):
    #         temp_labels0 = labels_aparc[labels_aparc_idx[i2]].copy()
    #         temp_labels0 = temp_labels0.split(4, subjects_dir=subjects_dir)[3]
    #         temp_labels0.name = "mPFC-{}".format(temp_labels0.hemi)
    #         temp_labels.append(temp_labels0)
    # # Combine the 2 hemisphere in 1 label
    # labels.append(temp_labels[0].__add__(temp_labels[1]))
    
    # ### Angular gyrus
    # label_aparc_names0 = ["G_pariet_inf-Angular"]
    # for i in range(len(label_aparc_names0)):
    #     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
    #     for i2 in range(len(labels_aparc_idx)):
    #         temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
    #         temp_labels.name = "ANG-{}".format(temp_labels.hemi)
    #         labels.append(temp_labels)
    
    # ### Posterior middle frontal gyrus
    # label_aparc_names0 = ["G_front_middle"]
    # for i in range(len(label_aparc_names0)):
    #     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
    #     for i2 in range(len(labels_aparc_idx)):
    #         temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
    #         temp_labels = temp_labels.split(2, subjects_dir=subjects_dir)[0]
    #         temp_labels.name = "PMFG-{}".format(temp_labels.hemi)
    #         labels.append(temp_labels)
    
    # ### Inferior parietal lobule
    # # From their parcellation figure seems to be rostral angular gyrus and posterior supramarginal gyrus
    # label_aparc_names0 = ["G_pariet_inf-Angular","G_pariet_inf-Supramar"]
    # temp_labels = []
    # for i in range(len(label_aparc_names0)):
    #     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
    #     for i2 in range(len(labels_aparc_idx)):
    #         temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
    # # Split angular in 2 and get rostral part
    # temp_labels[0] = temp_labels[0].split(2, subjects_dir=subjects_dir)[1]
    # temp_labels[1] = temp_labels[1].split(2, subjects_dir=subjects_dir)[1]
    # # Split supramarginal in 2 and get posterior part
    # temp_labels[2] = temp_labels[2].split(2, subjects_dir=subjects_dir)[0]
    # temp_labels[3] = temp_labels[3].split(2, subjects_dir=subjects_dir)[0]
    
    # for hem in range(2):
    #     PCC_p1 = temp_labels[hem]
    #     for i in range(1,len(temp_labels)//2):
    #         PCC_p2 = temp_labels[hem+2*i]
    #         PCC_p1 = PCC_p1.__add__(PCC_p2)
    #     PCC_p1.name = "IPL-{}".format(PCC_p1.hemi)
    #     labels.append(PCC_p1)
    
    # ### Orbital gyrus
    # # From their figure it seems to correspond to orbital part of inferior frontal gyrus
    # label_aparc_names0 = ["G_front_inf-Orbital"]
    # for i in range(len(label_aparc_names0)):
    #     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
    #     for i2 in range(len(labels_aparc_idx)):
    #         temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
    #         temp_labels.name = "ORB-{}".format(temp_labels.hemi)
    #         labels.append(temp_labels)
    
    # ### Middle temporal gyrus
    # # From their figure it seems to only be 1/4 of MTG at the 2nd to last caudal part
    # label_aparc_names0 = ["G_temporal_middle"]
    # for i in range(len(label_aparc_names0)):
    #     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
    #     for i2 in range(len(labels_aparc_idx)):
    #         temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
    #         temp_labels = temp_labels.split(4, subjects_dir=subjects_dir)[1]
    #         temp_labels.name = "MTG-{}".format(temp_labels.hemi)
    #         labels.append(temp_labels)
    
    # ### Anterior middle frontal gyrus
    # label_aparc_names0 = ["G_front_middle"]
    # for i in range(len(label_aparc_names0)):
    #     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
    #     for i2 in range(len(labels_aparc_idx)):
    #         temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
    #         temp_labels = temp_labels.split(2, subjects_dir=subjects_dir)[1]
    #         temp_labels.name = "AMFG-{}".format(temp_labels.hemi)
    #         labels.append(temp_labels)
    
    # ### Insula
    # label_aparc_names0 = ["G_Ins_lg_and_S_cent_ins","G_insular_short"]
    # temp_labels = []
    # for i in range(len(label_aparc_names0)):
    #     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
    #     for i2 in range(len(labels_aparc_idx)):
    #         temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
    # for hem in range(2):
    #     PCC_p1 = temp_labels[hem]
    #     for i in range(1,len(temp_labels)//2):
    #         PCC_p2 = temp_labels[hem+2*i]
    #         PCC_p1 = PCC_p1.__add__(PCC_p2)
    #     PCC_p1.name = "INS-{}".format(PCC_p1.hemi)
    #     labels.append(PCC_p1)
    
    # ### (Dorsal) Anterior Cingulate Cortex
    # label_aparc_names0 = ["G_and_S_cingul-Ant"]
    # temp_labels = []
    # for i in range(len(label_aparc_names0)):
    #     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
    #     for i2 in range(len(labels_aparc_idx)):
    #         temp_labels.append(labels_aparc[labels_aparc_idx[i2]].copy())
    #         temp_labels[-1].name = "ACC-{}".format(temp_labels[-1].hemi)
    # # Combine the 2 hemisphere in 1 label
    # labels.append(temp_labels[0].__add__(temp_labels[1]))
    
    # ### Supramarginal Gyrus
    # label_aparc_names0 = ["G_pariet_inf-Supramar"]
    # for i in range(len(label_aparc_names0)):
    #     labels_aparc_idx = [labels_aparc_names.index(l) for l in labels_aparc_names if l.startswith(label_aparc_names0[i])]
    #     for i2 in range(len(labels_aparc_idx)):
    #         temp_labels = labels_aparc[labels_aparc_idx[i2]].copy()
    #         temp_labels.name = "SUP-{}".format(temp_labels.hemi)
    #         labels.append(temp_labels)
    
    # print("{} ROIs have been defined".format(len(labels)))
    
    # # # Visualize positions
    # # fig = plt.figure()
    # # ax = fig.add_subplot(111, projection="3d")
    # # for i in range(0,3):
    # #     temp_pos = temp_labels[i].pos
    # #     ax.scatter(temp_pos[:,0],temp_pos[:,1],temp_pos[:,2], marker="o", alpha=0.1)
    # # # Add to plot
    # # ax.scatter(labels[-1].pos[:,0],labels[-1].pos[:,1],labels[-1].pos[:,2], marker="o")
    
    # # # Visualize the labels
    # # # temp_l = labels_aparc[labels_aparc_idx[0]]
    # # temp_l = labels[-2]
    # # l_stc = stc[100].in_label(temp_l)
    # # l_stc.vertices
    
    # # l_stc.plot(**surfer_kwargs)
    
    # # Save the annotation file
    # with open("custom_aparc2009_Li_et_al_2022.pkl", "wb") as file:
    #     pickle.dump(labels, file)
    
    # # %% Calculate orthogonalized power envelope connectivity in source space
    # # In non-interpolated channels
    # # Updated 22/1 - 2021 to use delta = 1/81 and assumption
    # # about non-correlated and equal variance noise covariance matrix for channels
    
    glia's avatar
    glia committed
    
    
    # # Load
    # with open("custom_aparc2009_Li_et_al_2022.pkl", "rb") as file:
    #     labels = pickle.load(file)
    # label_names = [label.name for label in labels]
    
    # # Define function to estimate PEC
    # def PEC_estimation(x, freq_bands, sfreq=200):
    #     """
    #     This function takes a source timeseries signal x and performs:
    #         1. Bandpass filtering
    #         2. Hilbert transform to yield analytical signal
    #         3. Compute all to all connectivity by iteratively computing for each pair
    #             a. Orthogonalization
    #             b. Computing power envelopes by squaring the signals |x|^2
    #             c. Log-transform to enhance normality
    #             d. Pearson's correlation between each pair
    #             e. Fisher's r-to-z transform to enhance normality
    #     The code has been optimized by inspiration from MNE-Python's function:
    #     mne.connectivity.enelope_correlation.
    
    glia's avatar
    glia committed
        
    
    #     In MNE-python version < 0.22 there was a bug, but after the fix in 0.22
    #     the mne function is equivalent to my implementation, although they don't
    #     use epsilon but gives same result with a RuntimeWarning about log(0)
    
    glia's avatar
    glia committed
        
    
    #     IMPORTANT NOTE:
    #         Filtering introduce artifacts for first and last timepoint
    #     The values are very low, more than 1e-12 less than the others
    #     If they are not removed, then they will heavily influence Pearson's
    #     correlation as it is outlier sensitive
    
    glia's avatar
    glia committed
        
    
    #     Inputs:
    #         x - The signal in source space as np.array with shape (ROIs,Timepoints)
    #         freq_bands - The frequency bands of interest as a dictionary e.g.
    #                      {"alpha": [8.0, 13.0], "beta": [13.0, 30.0]}
    #         sfreq - The sampling frequency in Hertz
    
    glia's avatar
    glia committed
        
    
    #     Output:
    #         The pairwise connectivity matrix
    #     """
    #     n_roi, n_timepoints = x.shape
    #     n_freq_bands = len(freq_bands)
    
    glia's avatar
    glia committed
        
    
    #     epsilon = 1e-100 # small value to prevent log(0) errors
    
    glia's avatar
    glia committed
        
    
    #     # Filter the signal in the different freq bands
    #     PEC_con0 = np.zeros((n_roi,n_roi,n_freq_bands))
    #     for fname, frange in freq_bands.items():
    #         fmin, fmax = [float(interval) for interval in frange]
    #         signal_filtered = mne.filter.filter_data(x, sfreq, fmin, fmax,
    #                                           fir_design="firwin", verbose=0)
    #         # Filtering on finite signals will yield very low values for first
    #         # and last timepoint, which can create outliers. E.g. 1e-29 compared to 1e-14
    #         # Outlier sensitive methods, like Pearson's correlation, is therefore
    #         # heavily affected and this systematic error is removed by removing
    #         # the first and last timepoint
    #         signal_filtered = signal_filtered[:,1:-1]
    
    glia's avatar
    glia committed
            
    
    #         # Hilbert transform
    #         analytic_signal = scipy.signal.hilbert(signal_filtered)
    #         # I will use x and y to keep track of orthogonalization
    #         x0 = analytic_signal
    #         # Get power envelope
    #         x0_mag = np.abs(x0)
    #         # Get scaled conjugate used for orthogonalization estimation
    #         x0_conj_scaled = x0.conj()
    #         x0_conj_scaled /= x0_mag
    #         # Take square power envelope
    #         PEx = np.square(x0_mag)
    #         # Take log transform
    #         lnPEx = np.log(PEx+epsilon)
    #         # Remove mean for Pearson correlation calculation
    #         lnPEx_nomean = lnPEx - np.mean(lnPEx, axis=-1, keepdims=True) # normalize each roi timeseries
    #         # Get std for Pearson correlation calculation
    #         lnPEx_std = np.std(lnPEx, axis=-1)
    #         lnPEx_std[lnPEx_std == 0] = 1 # Prevent std = 0 problems
    #         # Prepare con matrix
    #         con0 = np.zeros((n_roi,n_roi))
    #         for roi_r, y0 in enumerate(x0): # for each y0
    #             # Calculate orthogonalized signal y with respect to x for all x
    #             # Using y_ort = imag(y*x_conj/|x|)
    #             # I checked the formula in temp_v3 and it works as intended
    #             # I want to orthogonalize element wise for each timepoint
    #             y0_ort = (y0*x0_conj_scaled).imag
    #             # Here y0_ort.shape = (n_roi, n_timepoints)
    #             # So y is current roi and the first axis gives each x it is orthogonalized to
    #             # Take the abs to get power envelope
    #             y0_ort = np.abs(y0_ort)
    #             # Prevent log(0) error when calculating y_ort on y
    #             y0_ort[roi_r] = 1. # this will be 0 zero after mean subtraction
    #             # Take square power envelope
    #             PEy = np.square(y0_ort) # squared power envelope
    #             # Take log transform
    #             lnPEy = np.log(PEy+epsilon)
    #             # Remove mean for pearson correlation calculation
    #             lnPEy_nomean = lnPEy - np.mean(lnPEy, axis=-1, keepdims=True)
    #             # Get std for Pearson correlation calculation
    #             lnPEy_std = np.std(lnPEy, axis=-1)
    #             lnPEy_std[lnPEy_std == 0] = 1.
    #             # Pearson correlation is expectation of X_nomean * Y_nomean for each time-series divided with standard deviations
    #             PEC = np.mean(lnPEx_nomean*lnPEy_nomean, axis=-1)
    #             PEC /= lnPEx_std
    #             PEC /= lnPEy_std
    #             con0[roi_r] = PEC
    #         # The con0 connectivity matrix should be read as correlation between
    #         # orthogonalized y (row number) and x (column number)
    #         # It is not symmetrical, as cor(roi2_ort, roi1) is not cor(roi1_ort, roi2)
    #         # To make it symmetrical the average of the absolute correlation
    #         # of the 2 possibilities for each pair are taken
    #         con0 = np.abs(con0)
    #         con0 = (con0.T+con0)/2.
    #         # Fisher's z transform - which is equivalent to arctanh
    #         con0 = np.arctanh(con0)
    #         # The diagonal is not 0 as I wanted to avoid numerical errors with log(0)
    #         # and used a small epsilon value. Thus the diagonal is explicitly set to 0
    
    glia's avatar
    glia committed
            
    
    #         # Save to array
    #         PEC_con0[:,:,list(freq_bands.keys()).index(fname)] = con0
    #     return PEC_con0
    
    # # Prepare variables
    # Freq_Bands = {"delta": [1.25, 4.0],
    #               "theta": [4.0, 8.0],
    #               "alpha": [8.0, 13.0],
    #               "beta": [13.0, 30.0],
    #               "gamma": [30.0, 49.0]}
    # n_freq_bands = len(Freq_Bands)
    # n_roi = len(labels)
    
    # # Get current time
    # c_time1 = time.localtime()
    # c_time1 = time.strftime("%a %d %b %Y %H:%M:%S", c_time1)
    # print(c_time1)
    
    # # PEC analysis
    # PEC_data_list = [0]*n_subjects
    # STCs_list = [0]*n_subjects
    
    # # Using inverse operator as generator interferes with concurrent processes
    # # If I run it for multiple subjects I run out of ram
    # # Thus concurrent processes are used inside the for loop
    # def PEC_analysis(input_args): # iterable epoch number and corresponding ts
    #     i2, ts = input_args
    #     # Estimate PEC
    #     PEC_con0 = PEC_estimation(ts, Freq_Bands, sfreq)
    #     print("Finished {} out of {} epochs".format(i2+1,n_epochs))
    #     return i2, PEC_con0, ts
    
    # for i in range(n_subjects):
    #     n_epochs, n_ch, n_timepoints = source_epochs[i].get_data().shape
    #     # Use different forward solutions depending on number of channels
    #     cur_subject_id = Subject_id[i]
    #     fwd = fwds[i]
    
    glia's avatar
    glia committed
        
    
    #     # Using assumption about equal variance and no correlations I make a diagonal matrix
    #     # Using the default option for 0.2µV std for EEG data
    #     noise_cov = mne.make_ad_hoc_cov(source_epochs[i].info, None)
    
    glia's avatar
    glia committed
        
    
    #     # Make inverse operator
    #     # Using default depth parameter = 0.8 and free orientation (loose = 1)
    #     inverse_operator = mne.minimum_norm.make_inverse_operator(source_epochs[i].info,
    #                                                               fwd, noise_cov,
    #                                                               loose = 1, depth = 0.8,
    #                                                               verbose = 0)
    #     src_inv = inverse_operator["src"]
    #     # Compute inverse solution and retrieve time series for each label
    #     # Preallocate memory
    #     label_ts = np.full((n_epochs,len(labels),n_timepoints),np.nan)
    #     # Define regularization
    #     snr = 9 # Zhang et al, 2020 used delta = 1/81, which is inverse SNR and correspond to lambda2
    #     # A for loop is used for each label due to memory issues when doing all labels at the same time
    #     for l in range(len(labels)):
    #         stc = mne.minimum_norm.apply_inverse_epochs(source_epochs[i],inverse_operator,
    #                                                     lambda2 = 1/(snr**2),
    #                                                     label = labels[l],
    #                                                     pick_ori = "vector",
    #                                                     return_generator=False,
    #                                                     method = "MNE", verbose = 0)
    #         # Use PCA to reduce the 3 orthogonal directions to 1 principal direction with max power
    #         # There can be ambiguity about the orientation, thus the one that
    #         # is pointing most "normal", i.e. closest 90 degrees to the skull is used
    #         stc_pca = [0]*len(stc)
    #         for ep in range(n_epochs):
    #             stc_pca[ep], pca_dir = stc[ep].project(directions="pca", src=src_inv)
    #         # Get mean time series for the whole label
    #         temp_label_ts = mne.extract_label_time_course(stc_pca, labels[l], src_inv, mode="mean_flip",
    #                                          return_generator=False, verbose=0)
    #         # Save to array
    #         label_ts[:,l,:] = np.squeeze(np.array(temp_label_ts))
    #         print("Finished estimating STC for {} out of {} ROIs".format(l+1,len(labels)))
    
    glia's avatar
    glia committed
        
    
    #     # Free up memory
    #     del stc
    
    #     # Prepare variables
    #     sfreq=source_epochs[i].info["sfreq"]
    #     n_epochs = len(source_epochs[i])
    #     # Estimate the pairwise PEC for each epoch
    #     PEC_con_subject = np.zeros((n_epochs,n_roi,n_roi,n_freq_bands))
    #     stcs0 = np.zeros((n_epochs,n_roi,int(sfreq)*4)) # 4s epochs
    #     # Make list of arguments to pass into PEC_analysis using the helper func
    #     args = []
    #     for i2 in range(n_epochs):
    #         args.append((i2,label_ts[i2]))
    
    glia's avatar
    glia committed
        
    
    #     with concurrent.futures.ProcessPoolExecutor(max_workers=16) as executor:
    #         for i2, PEC_result, stc_result in executor.map(PEC_analysis, args): # Function and arguments
    #             PEC_con_subject[i2] = PEC_result
    #             stcs0[i2] = stc_result
    
    glia's avatar
    glia committed
        
    
    #     # Save to list
    #     PEC_data_list[i] = PEC_con_subject # [subject](epoch,ch,ch,freq)
    #     STCs_list[i] = stcs0 # [subject][epoch,roi,timepoint]
    
    glia's avatar
    glia committed
        
    
    #     # Print progress
    #     print("Finished {} out of {} subjects".format(i+1,n_subjects))
    
    glia's avatar
    glia committed
    
    
    # # Get current time
    # c_time2 = time.localtime()
    # c_time2 = time.strftime("%a %d %b %Y %H:%M:%S", c_time2)
    # print("Started", c_time1, "\nFinished",c_time2)
    
    glia's avatar
    glia committed
    
    
    # with open(Feature_savepath+"PEC_each_epoch_drop_interpol_ch_fix_snr.pkl", "wb") as file:
    #     pickle.dump(PEC_data_list, file)
    # with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "wb") as file:
    #     pickle.dump(STCs_list, file)
    
    # # # # Load
    # # with open(Feature_savepath+"PEC_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file:
    # #     PEC_data_list = pickle.load(file)
    
    glia's avatar
    glia committed
    
    # # # Load
    
    # # with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file:
    # #     STCs_list = pickle.load(file)
    
    # # Average over eye status
    # eye_status = list(source_epochs[0].event_id.keys())
    # n_eye_status = len(eye_status)
    # pec_data = np.zeros((n_subjects,n_eye_status,n_roi,n_roi,n_freq_bands))
    # for i in range(n_subjects):
    #     # Get indices for eyes open and closed
    #     EC_index = source_epochs[i].events[:,2] == 1
    #     EO_index = source_epochs[i].events[:,2] == 2
    #     # Average over the indices and save to array
    #     pec_data[i,0] = np.mean(PEC_data_list[i][EC_index], axis=0)
    #     pec_data[i,1] = np.mean(PEC_data_list[i][EO_index], axis=0)
    #     # Only use the lower diagonal as the diagonal should be 0 (or very small due to numerical errors)
    #     # And it is symmetric
    #     for f in range(n_freq_bands):
    #         pec_data[i,0,:,:,f] = np.tril(pec_data[i,0,:,:,f],k=-1)
    #         pec_data[i,1,:,:,f] = np.tril(pec_data[i,1,:,:,f],k=-1)
    
    # # Also save as dataframe format for feature selection
    # # Convert to Pandas dataframe
    # # The dimensions will each be a column with numbers and the last column will be the actual values
    # arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, pec_data.shape), indexing="ij"))) + [pec_data.ravel()])
    # pec_data_df = pd.DataFrame(arr, columns = ["Subject_ID", "Eye_status", "chx", "chy", "Freq_band", "Value"])
    # # Change from numerical coding to actual values
    # eye_status = list(source_epochs[0].event_id.keys())
    # freq_bands_name = list(Freq_Bands.keys())
    # label_names = [label.name for label in labels]
    
    # index_values = [Subject_id,eye_status,label_names,label_names,freq_bands_name]
    # for col in range(len(index_values)):
    #     col_name = pec_data_df.columns[col]
    #     for shape in range(pec_data.shape[col]): # notice not dataframe but the array
    #         pec_data_df.loc[pec_data_df.iloc[:,col] == shape,col_name]\
    #         = index_values[col][shape]
    
    # # Add group status
    # Group_status = np.array(["CTRL"]*len(pec_data_df["Subject_ID"]))
    # Group_status[np.array([i in cases for i in pec_data_df["Subject_ID"]])] = "PTSD"
    # # Add to dataframe
    # pec_data_df.insert(3, "Group_status", Group_status)
    
    # # Remove all diagonal and upper-matrix entries by removing zeros
    # pec_data_df = pec_data_df.iloc[pec_data_df["Value"].to_numpy().nonzero()]
    
    # # Save df
    # pec_data_df.to_pickle(os.path.join(Feature_savepath,"pec_data_drop_interpol_ch_df.pkl"))
    
    # # %% Sparse clustering of PEC for subtyping PTSD group
    # # They did it for both eye status together, so all data in one matrix
    # # Load PEC df
    # # pec_data_df = pd.read_pickle(os.path.join(Feature_savepath,"pec_data_df.pkl"))
    # pec_data_df = pd.read_pickle(os.path.join(Feature_savepath,"pec_data_drop_interpol_ch_df.pkl"))
    
    # # Convert to wide format
    # # Make function to add measurement column for indexing
    # def add_measurement_column(df, measurement = "Text"):
    #     dummy_variable = [measurement]*df.shape[0]
    #     df.insert(1, "Measurement", dummy_variable)
    #     return df
    # # Make function to convert column tuple to string
    # def convertTupleHeader(header):
    #     header = list(header)
    #     str = "_".join(header)
    #     return str
    
    # # Prepare overall dataframe
    # PEC_df = pd.DataFrame(Subject_id, columns = ["Subject_ID"])
    # # Add PEC
    # pec_data_df = add_measurement_column(pec_data_df, "PEC")
    # temp_df = pec_data_df.pivot_table(index="Subject_ID",columns=["Measurement",
    #                                     "Eye_status", "chx", "chy",
    #                                     "Freq_band"], dropna=True,
    #                                values="Value").reset_index(drop=True)
    # # check NaN is properly dropped and subject index is correct
    # assert pec_data_df.shape[0] == np.prod(temp_df.shape)
    # test1 = pec_data_df.iloc[np.random.randint(n_subjects),:]
    # assert test1["Value"] ==\
    #     temp_df[test1[1]][test1[2]][test1[3]][test1[5]][test1[6]][Subject_id.index(test1[0])]
    # # Fix column names
    # temp_df.columns = [convertTupleHeader(temp_df.columns[i]) for i in range(len(temp_df.columns))]
    
    # PEC_df = pd.concat([PEC_df,temp_df], axis=1)
    
    # # Add group status
    # Groups = ["CTRL", "PTSD"]
    # Group_status = np.array([0]*PEC_df.shape[0]) # CTRL = 0
    # Group_status[np.array([i in cases for i in PEC_df["Subject_ID"]])] = 1 # PTSD = 1
    # PEC_df.insert(1, "Group_status", Group_status)
    
    # # Only use PTSD patient group
    # PEC_df2 = PEC_df.loc[PEC_df["Group_status"]==1,:]
    
    # Subject_info_cols = ["Subject_ID","Group_status"]
    
    # # Use gridsearch and permutations to estimate gap statistic and use it to 
    # # determine number of clusters and sparsity s
    # # I will use 100 permutations and test 2 to 6 clusters as Zhang 2020
    # # Error when trying to determine Gap statistic for 1 cluster? Also in R package
    # max_clusters = 6
    # n_sparsity_feat = 20
    # perm_res = []
    # for k in range(1,max_clusters):
    #     # Cannot permute with 1 cluster
    #     n_clusters = k+1
    #     x = np.array(PEC_df2.copy().drop(Subject_info_cols, axis=1))
    #     perm = pysparcl.cluster.permute_modified(x, k=n_clusters, verbose=True,
    #                                              nvals=n_sparsity_feat, nperms=100)
    #     perm_res.append(perm)
    
    # # Save the results
    # with open(Feature_savepath+"PEC_drop_interpol_ch_kmeans_perm.pkl", "wb") as file:
    #     pickle.dump(perm_res, file)
    
    glia's avatar
    glia committed
    
    
    # # # Load
    # # with open(Feature_savepath+"PEC_drop_interpol_ch_kmeans_perm.pkl", "rb") as file:
    # #     perm_res = pickle.load(file)
    
    # # Convert results to array
    # perm_res_arr = np.zeros((len(perm_res)*n_sparsity_feat,4))
    # for i in range(len(perm_res)):
    #     _, gaps, sdgaps, wbounds, _ = perm_res[i].values()
    #     for i2 in range(n_sparsity_feat):
    #         perm_res_arr[20*i+i2,0] = i+2 # cluster size
    #         perm_res_arr[20*i+i2,1] = gaps[i2] # gap statistic
    #         perm_res_arr[20*i+i2,2] = sdgaps[i2] # gap statistic std
    #         perm_res_arr[20*i+i2,3] = wbounds[i2] # sparsity feature s
    
    # # For each sparsity s, determine best k using one-standard-error criterion
    # # Meaning the cluster and sparsity is chosen for the smallest value of k for a fixed s
    # # that fulfill Gap(k) >= Gap(k+1)-std(k+1)
    # def one_standard_deviation_search(gaps, std):
    #     best_gaps = np.argmax(gaps)
    #     current_gaps = gaps[best_gaps]
    #     current_std = std[best_gaps]
    #     current_gaps_idx = best_gaps
    #     while (gaps[current_gaps_idx-1] >= current_gaps - current_std):
    #         if current_gaps_idx == 0:
    #             break
    #         else:
    #             current_gaps_idx -= 1
    #             current_gaps = gaps[current_gaps_idx]
    #             current_std = std[current_gaps_idx]
    #     out = current_gaps, current_std, current_gaps_idx
    #     return out
    
    # best_ks = np.zeros((n_sparsity_feat, 2))
    # all_s = np.unique(perm_res_arr[:,3])
    # plt.figure(figsize=(12,12))
    # for i2 in range(n_sparsity_feat):
    #     current_s = all_s[i2]
    #     gaps = perm_res_arr[perm_res_arr[:,3] == current_s,1]
    #     std = perm_res_arr[perm_res_arr[:,3] == current_s,2]
    #     _, _, idx = one_standard_deviation_search(gaps, std)
    #     # Save to array
    #     best_ks[i2,0] = current_s
    #     best_ks[i2,1] = perm_res_arr[perm_res_arr[:,3] == current_s,0][idx]
    #     # Plot gap
    #     plt.errorbar(perm_res_arr[perm_res_arr[:,3] == current_s,0].astype("int"),
    #              gaps, yerr=std, capsize=5, label = np.round(current_s,3))
    # plt.title("Gap statistic for different fixed s")
    # plt.legend(loc=1)
    # plt.xlabel("Number of clusters")
    # plt.ylabel("Gap statistic")
    
    # best_k = int(scipy.stats.mode(best_ks[:,1])[0])
    
    # # Determine s using fixed k as lowest s within 1 std of max gap statistic
    # # According to Witten & Tibshirani, 2010
    # best_gaps_idx = np.argmax(perm_res_arr[perm_res_arr[:,0] == best_k,1])
    # best_gaps = perm_res_arr[perm_res_arr[:,0] == best_k,1][best_gaps_idx]
    # best_gaps_std = perm_res_arr[perm_res_arr[:,0] == best_k,2][best_gaps_idx]
    # one_std_crit = perm_res_arr[perm_res_arr[:,0] == best_k,1]>=best_gaps-best_gaps_std
    
    # best_s = np.array([perm_res_arr[perm_res_arr[:,0] == best_k,3][one_std_crit][0]])
    
    # # Perform clustering with k clusters
    # x = np.array(PEC_df2.copy().drop(Subject_info_cols, axis=1))
    # sparcl = pysparcl.cluster.kmeans(x, k=best_k, wbounds=best_s)[0]
    
    # # Save the results
    # with open(Feature_savepath+"PEC_drop_interpol_ch_sparse_kmeans.pkl", "wb") as file:
    #     pickle.dump(sparcl, file)
    
    # # Get overview of the features chosen and summarize feature type with countplot
    # nonzero_idx = sparcl["ws"].nonzero()
    # sparcl_features = PEC_df2.copy().drop(Subject_info_cols, axis=1).columns[nonzero_idx]
    
    # # Prepare variables
    # Freq_Bands = {"delta": [1.25, 4.0],
    #               "theta": [4.0, 8.0],
    #               "alpha": [8.0, 13.0],
    #               "beta": [13.0, 30.0],
    #               "gamma": [30.0, 49.0]}
    # n_freq_bands = len(Freq_Bands)
    # eye_status = list(source_epochs[0].event_id.keys())
    # n_eye_status = len(eye_status)
    
    # sparcl_feat = []
    # sparcl_feat_counts = []
    # for e in range(n_eye_status):
    #     ee = eye_status[e]
    #     for f in range(n_freq_bands):
    #         ff = list(Freq_Bands.keys())[f]
    #         temp_feat = sparcl_features[sparcl_features.str.contains(("_"+ee))]
    #         temp_feat = temp_feat[temp_feat.str.contains(("_"+ff))]
    #         # Save to list
    #         sparcl_feat.append(temp_feat)
    #         sparcl_feat_counts.append(["{}_{}".format(ee,ff), len(temp_feat)])
    
    # # Convert the list to dataframe to use in countplot
    # sparcl_feat_counts_df = pd.DataFrame(columns=["Eye_status", "Freq_band"])
    # for i in range(len(sparcl_feat_counts)):
    #     # If this feature type does not exist, then skip it
    #     if sparcl_feat_counts[i][1] == 0:
    #         continue
    #     ee, ff = sparcl_feat_counts[i][0].split("_")
    #     counts = sparcl_feat_counts[i][1]
    #     temp_df = pd.DataFrame({"Eye_status":np.repeat(ee,counts),
    #                             "Freq_band":np.repeat(ff,counts)})
    #     sparcl_feat_counts_df = sparcl_feat_counts_df.append(temp_df, ignore_index=True)
    
    # # Fix Freq_band categorical order
    # cat_type = pd.CategoricalDtype(categories=list(Freq_Bands.keys()), ordered=True)
    # sparcl_feat_counts_df["Freq_band"] = sparcl_feat_counts_df["Freq_band"].astype(cat_type)
    
    # plt.figure(figsize=(8,8))
    # g = sns.countplot(y="Freq_band", hue="Eye_status", data=sparcl_feat_counts_df)
    # plt.title("PEC Sparse K-means features")
    # plt.xlabel("Number of non-zero weights")
    # plt.ylabel("Frequency Band")
    
    # # %% Functional connectivity in source space
    # # MNE implementation of PLV and wPLI is phase across trials(epochs), e.g. for ERPs
    # # I'll use my own manually implemented PLV and wPLI across time and then average across epochs
    # # Notice that the new MNE-connectivity library now also takes phase across time
    
    # sfreq = final_epochs[0].info["sfreq"]
    # # error when using less than 5 cycles for spectrum estimation
    # # 1Hz too low with epoch length of 4, thus I changed the fmin to 1.25 for delta
    # Freq_Bands = {"delta": [1.25, 4.0],
    #               "theta": [4.0, 8.0],
    #               "alpha": [8.0, 13.0],
    #               "beta": [13.0, 30.0],
    #               "gamma": [30.0, 49.0]}
    # n_freq_bands = len(Freq_Bands)
    # freq_centers = np.array([2.5,6,10.5,21.5,40])
    # # Convert to tuples for the mne function
    # fmin=tuple([list(Freq_Bands.values())[f][0] for f in range(len(Freq_Bands))])
    # fmax=tuple([list(Freq_Bands.values())[f][1] for f in range(len(Freq_Bands))])
    
    # # Make linspace array for morlet waves
    # freq_centers = np.arange(fmin[0],fmax[-1]+0.25,0.25)
    # # Prepare Morlets
    # morlets = mne.time_frequency.tfr.morlet(sfreq,freq_centers,n_cycles=3)
    
    # # Make freqs array for indexing
    # freqs0 = [0]*n_freq_bands
    # for f in range(n_freq_bands):
    #     freqs0[f] = freq_centers[(freq_centers>=fmin[f]) & (freq_centers<=fmax[f])]
    
    # # The in-built connectivity function gives an (n_channel, n_channel, freqs output
    # # For loop over subject ID and eye status is implemented
    # n_subjects = len(Subject_id)
    # eye_status = list(final_epochs[0].event_id.keys())
    # n_eye_status = len(eye_status)
    # ch_names = final_epochs[0].info["ch_names"]
    
    # # Load source labels
    # with open("custom_aparc2009_Li_et_al_2022.pkl", "rb") as file:
    #     labels = pickle.load(file)
    # label_names = [label.name for label in labels]
    # n_sources = len(label_names)
    
    # # Connectivity methods
    # connectivity_methods = ["coh","imcoh","plv","wpli"]
    # n_con_methods=len(connectivity_methods)
    
    # # Number of pairwise ch connections
    # n_ch_connections = scipy.special.comb(n_sources,2, exact=True, repetition=False)
    
    # # Load source time series
    
    glia's avatar
    glia committed
    # with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file:
    #     STCs_list = pickle.load(file)
    
    
    # # I made my own slightly-optimized PLV & WPLI function
    # # Version 2 based on Filter + Hilbert instead of Morlets
    # def calculate_PLV_WPLI_across_time(data):
    #     n_ch, n_time0 = data.shape
    #     x = data.copy()
    #     # Filter the signal in the different freq bands
    #     con_array0 = np.zeros((2,n_ch,n_ch,n_freq_bands))
    #     # con_array0[con_array0==0] = np.nan
    #     for fname, frange in Freq_Bands.items():
    #         fmin, fmax = [float(interval) for interval in frange]
    #         signal_filtered = mne.filter.filter_data(x, sfreq, fmin, fmax,
    #                                           fir_design="firwin", verbose=0)
    #         # Filtering on finite signals will yield very low values for first
    #         # and last timepoint, which can create outliers. E.g. 1e-29 compared to 1e-14
    #         # This systematic error is removed by removing the first and last timepoint
    #         signal_filtered = signal_filtered[:,1:-1]
    #         # Hilbert transform to get complex signal
    #         analytic_signal = scipy.signal.hilbert(signal_filtered)
    #         # Calculate for the lower diagnonal only as it is symmetric
    #         for ch_r in range(n_ch):
    #             for ch_c in range(n_ch):
    #                 if ch_r>ch_c:
    #                     # =========================================================================
    #                     # PLV over time correspond to mean across time of the absolute value of
    #                     # the circular length of the relative phases. So PLV will be 1 if
    #                     # the phases of 2 signals maintain a constant lag
    #                     # In equational form: PLV = 1/N * |sum(e^i(phase1-phase2))|
    #                     # In code: abs(mean(exp(1i*phase_diff)))
    #                     # =========================================================================
    #                     # The real part correspond to the amplitude and the imaginary part can be used to calculate the phase
    #                     phase_diff = np.angle(analytic_signal[ch_r])-np.angle(analytic_signal[ch_c])
    #                     # Convert phase difference to complex part i(phase1-phase2)
    #                     phase_diff_im = 0*phase_diff+1j*phase_diff
    #                     # Take the exponential, then the mean followed by absolute value
    #                     PLV = np.abs(np.mean(np.exp(phase_diff_im)))
    #                     # Save to array
    #                     con_array0[0,ch_r,ch_c,list(Freq_Bands.keys()).index(fname)] = PLV
    #                     # =========================================================================
    #                     # PLI over time correspond to the sign of the sine of relative phase
    #                     # differences. So PLI will be 1 if one signal is always leading or
    #                     # lagging behind the other signal. But it is insensitive to changes in
    #                     # relative phase, as long as it is the same signal that leads.
    #                     # If 2 signals are almost in phase, they might shift between lead/lag
    #                     # due to small fluctuations from noise. This would lead to unstable
    #                     # estimation of "phase" synchronisation.
    #                     # The wPLI tries to correct for this by weighting the PLI with the
    #                     # magnitude of the lag, to attenuate noise sources giving rise to
    #                     # near zero phase lag "synchronization"
    #                     # In equational form: WPLI = |E{|phase_diff|*sign(phase_diff)}| / E{|phase_diff|}
    #                     # =========================================================================
    #                     # Calculate the magnitude of phase differences
    #                     phase_diff_mag = np.abs(np.sin(phase_diff))
    #                     # Calculate the signed phase difference (PLI)
    #                     sign_phase_diff = np.sign(np.sin(phase_diff))
    #                     # Calculate the nominator (abs and average across time)
    #                     WPLI_nominator = np.abs(np.mean(phase_diff_mag*sign_phase_diff))
    #                     # Calculate denominator for normalization
    #                     WPLI_denom = np.mean(phase_diff_mag)
    #                     # Calculate WPLI
    #                     WPLI = WPLI_nominator/WPLI_denom
    #                     # Save to array
    #                     con_array0[1,ch_r,ch_c,list(Freq_Bands.keys()).index(fname)] = WPLI
    #     return con_array0
    
    # # Pre-allocatate memory
    # con_data = np.zeros((n_con_methods,n_subjects,n_eye_status,n_sources,n_sources,n_freq_bands))
    # n_epochs_matrix = np.zeros((n_subjects,n_eye_status))
    
    # # Get current time
    # c_time = time.localtime()
    # c_time = time.strftime("%H:%M:%S", c_time)
    # print(c_time)
    
    # def connectivity_estimation(i):
    #     con_data0 = np.zeros((n_con_methods,n_eye_status,n_sources,n_sources,n_freq_bands))
    #     con_data0[con_data0==0] = np.nan
    #     n_epochs_matrix0 = np.zeros((n_eye_status))
    #     for e in range(n_eye_status):
    #         ee = eye_status[e]
    #         eye_idx = final_epochs[i].events[:,2] == e+1 # EC = 1 and EO = 2
    #         # Get source time series
    #         temp_STC = STCs_list[i][eye_idx]
    #         # Calculate the coherence and ImgCoh for the given subject and eye status
    #         con, freqs, times, n_epochs, n_tapers = spectral_connectivity(
    #             temp_STC, method=connectivity_methods[0:2],
    #             mode="multitaper", sfreq=sfreq, fmin=fmin, fmax=fmax,
    #             faverage=True, verbose=0)
    #         # Save the results in array
    #         con_data0[0,e,:,:,:] = con[0] # coherence
    #         con_data0[1,e,:,:,:] = np.abs(con[1]) # Absolute value of ImgCoh to reflect magnitude of ImgCoh
    
    glia's avatar
    glia committed
            
    
    #         # Calculate PLV and wPLI for each epoch and then average
    #         n_epochs0 = temp_STC.shape[0]
    #         con_data1 = np.zeros((len(connectivity_methods[2:]),n_epochs0,n_sources,n_sources,n_freq_bands))
    #         for epoch in range(n_epochs0):
    #             # First the data is retrieved and epoch axis dropped
    #             temp_data = temp_STC[epoch,:,:]
    #             # PLV and WPLI value is calculated across timepoints in each freq band
    #             PLV_WPLI_con = calculate_PLV_WPLI_across_time(temp_data)
    #             # Save results
    #             con_data1[0,epoch,:,:,:] = PLV_WPLI_con[0] # phase locking value
    #             con_data1[1,epoch,:,:,:] = PLV_WPLI_con[1] # weighted phase lag index
    #         # Take average across epochs for PLV and wPLI
    #         con_data2 = np.mean(con_data1,axis=1)
    #         # Save to final array
    #         con_data0[2,e,:,:,:] = con_data2[0] # phase locking value
    #         con_data0[3,e,:,:,:] = con_data2[1] # weighted phase lag index
    #         n_epochs_matrix0[e] = n_epochs
    
    glia's avatar
    glia committed
        
    
    #     print("{} out of {} finished".format(i+1,n_subjects))
    #     return i, con_data0, n_epochs_matrix0
    
    # with concurrent.futures.ProcessPoolExecutor(max_workers=16) as executor:
    #     for i, con_result, n_epochs_mat in executor.map(connectivity_estimation, range(n_subjects)): # Function and arguments
    #         con_data[:,i,:,:,:,:] = con_result
    #         n_epochs_matrix[i] = n_epochs_mat
    
    # # Get current time
    # c_time = time.localtime()
    # c_time = time.strftime("%H:%M:%S", c_time)
    # print(c_time)
    
    # # Save the results
    # np.save(Feature_savepath+"Source_drop_interpol_ch_connectivity_measures_data.npy", con_data) # (con_measure,subject,eye,ch,ch,freq)
    
    # # Also save as dataframe format for feature selection
    # # Convert to Pandas dataframe
    # # The dimensions will each be a column with numbers and the last column will be the actual values
    # arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, con_data.shape), indexing="ij"))) + [con_data.ravel()])
    # con_data_df = pd.DataFrame(arr, columns = ["Con_measurement", "Subject_ID", "Eye_status", "chx", "chy", "Freq_band", "Value"])
    # # Change from numerical coding to actual values
    # eye_status = list(final_epochs[0].event_id.keys())
    # freq_bands_name = list(Freq_Bands.keys())
    
    # index_values = [connectivity_methods,Subject_id,eye_status,label_names,label_names,freq_bands_name]
    # for col in range(len(index_values)):
    #     col_name = con_data_df.columns[col]
    #     for shape in range(con_data.shape[col]): # notice not dataframe but the array
    #         con_data_df.loc[con_data_df.iloc[:,col] == shape,col_name]\
    #         = index_values[col][shape]
    
    # # Add group status
    # Group_status = np.array(["CTRL"]*len(con_data_df["Subject_ID"]))
    # Group_status[np.array([i in cases for i in con_data_df["Subject_ID"]])] = "PTSD"
    # # Add to dataframe
    # con_data_df.insert(3, "Group_status", Group_status)
    
    # # Remove all diagonal and upper-matrix entries
    # con_data_df = con_data_df.iloc[con_data_df["Value"].to_numpy().nonzero()]
    
    # # Save df
    # con_data_df.to_pickle(os.path.join(Feature_savepath,"con_data_source_drop_interpol_df.pkl"))
    
    # # %% Estimate Granger's Causality in source space
    # # Load source labels
    # with open("custom_aparc2009_Li_et_al_2022.pkl", "rb") as file:
    #     labels = pickle.load(file)
    # label_names = [label.name for label in labels]
    # n_sources = len(label_names)
    
    # # Load source time series
    # with open(Feature_savepath+"STCs_each_epoch_drop_interpol_ch_fix_snr.pkl", "rb") as file:
    #     STCs_list = pickle.load(file)
    
    glia's avatar
    glia committed
    
    
    # # Granger's causality might be influenced by volume conduction, thus working with CSD might be beneficial
    # # But since I already used source modelling to alleviate this problem I will not apply CSD
    # # Barrett et al, 2012 also do not apply CSD on source GC
    
    # # GC assumes stationarity, thus I will test for stationarity using ADF test
    # # The null hypothesis of ADF is that it has unit root, i.e. is non-stationary
    # # I will test how many can reject the null hypothesis, i.e. are stationary
    
    # # Due to the low numerical values in STC the ADF test is unstable, thus I multiply it to be around 1e0
    
    # stationary_test_arr = [0]*n_subjects
    # n_tests = [0]*n_subjects
    # for i in range(n_subjects):
    #     # Get data
    #     data_arr = STCs_list[i]
    #     # Get shape
    #     n_epochs, n_channels, n_timepoints = data_arr.shape
    #     # Create array for indices to print out progress
    #     ep_progress_idx = np.arange(n_epochs//5,n_epochs,n_epochs//5)
    #     # Calculate number of tests performed for each subject
    #     n_tests[i] = n_epochs*n_channels
    #     # Prepare empty array (with 2's as 0 and 1 will be used)
    #     stationary_test_arr0 = np.zeros((n_epochs,n_channels))+2 # make array of 2's
    #     for ep in range(n_epochs):
    #         for c in range(n_channels):
    #             ADF = adfuller(data_arr[ep,c,:]*1e14) # multilying with a constant does not change ADF, but helps against numerical instability
    #             p_value = ADF[1]
    #             if p_value < 0.05:
    #                 stationary_test_arr0[ep,c] = True # Stationary set to 1
    #             else:
    #                 stationary_test_arr0[ep,c] = False # Non-stationary set to 0
    #         # Print partial progress
    #         if len(np.where(ep_progress_idx==ep)[0]) > 0:
    #             print("Finished epoch number: {} out of {}".format(ep,n_epochs))
    #     # Indices that were not tested
    #     no_test_idx = np.where(stationary_test_arr0==2)[0]
    #     if len(no_test_idx) > 0:
    #         print("An unexpected error occurred and {} was not tested".format(no_test_idx))
    #     # Save to list
    #     stationary_test_arr[i] = stationary_test_arr0
    #     # Print progress
    #     print("Finished subject {} out of {}".format(i+1,n_subjects))
    
    # with open(Stat_savepath+"Source_drop_interpol_GC_stationarity_tests.pkl", "wb") as filehandle:
    #     # The data is stored as binary data stream
    #     pickle.dump(stationary_test_arr, filehandle)
    
    # # I used a threshold of 0.05
    # # This means that on average I would expect 5% false positives among the tests that showed significance for stationarity
    # ratio_stationary = [0]*n_subjects
    # for i in range(n_subjects):
    #     # Ratio of tests that showed stationarity
    #     ratio_stationary[i] = np.sum(stationary_test_arr[i])/n_tests[i]
    
    # print("Ratio of stationary time series: {0:.3f}".format(np.mean(ratio_stationary))) # 88%
    
    # # The pre-processing have already ensured that most of the data fulfills the stationarity assumption.
    
    # # Divide the data into eyes closed and open
    # ch_names = label_names
    # n_channels = len(ch_names)
    
    # STC_eye_data = []
    # for i in range(n_subjects):
    #     # Get index for eyes open and eyes closed
    #     EC_index = final_epochs[i].events[:,2] == 1
    #     EO_index = final_epochs[i].events[:,2] == 2
    #     # Get the data
    #     EC_epoch_data = STCs_list[i][EC_index,:,:] # eye index
    #     EO_epoch_data = STCs_list[i][EO_index,:,:]
    #     # Save to list
    #     STC_eye_data.append([EC_epoch_data, EO_epoch_data])
    
    # # Make each epoch a TimeSeries object
    # # Input for TimeSeries is: (ch, time)
    # eye_status = list(final_epochs[0].event_id.keys())
    # n_eye_status = len(eye_status)
    # sfreq = final_epochs[0].info["sfreq"]
    
    # Timeseries_data = []
    # for i in range(n_subjects):
    #     temp_list1 = []
    #     for e in range(n_eye_status):
    #         temp_list2 = []
    #         n_epochs = STC_eye_data[i][e].shape[0]
    #         for ep in range(n_epochs):
    #             # Convert to TimeSeries
    #             time_series = nts.TimeSeries(STC_eye_data[i][e][ep,:,:], sampling_rate=sfreq)
    #             # Save the object
    #             temp_list2.append(time_series)
    #         # Save the timeseries across eye status