diff --git a/FeatureEstimation.py b/FeatureEstimation.py
index af84e9f1c540aae9cdb93c92c207d1ff2b2d3040..4c599d466a1c402e30f172a778f9e4f5e7c04a73 100644
--- a/FeatureEstimation.py
+++ b/FeatureEstimation.py
@@ -53,51 +53,362 @@ for r, d, f in os.walk(load_path):
             files.append(os.path.join(r, file))
 files.sort()
 
+
 # Subject IDs
 Subject_id = [0] * len(files)
 for i in range(len(files)):
-    temp = files[i].split("\\")
-    temp = temp[-1].split("_")
-    Subject_id[i] = int(temp[0][-1]) # Subject_id[i] = int(temp[0])
+    temp = files[i].split("/")
+    temp = temp[-1].split(".")
+    temp = temp[0].split("_")
+    Subject_id[i] = int(temp[0])
+
+# Should exclude subject 100326 due to 7 bad channels
+# Exclude 200013 and 200015 due to too many dropped epochs
+# (200001, 200004, 200053 and 200072 were already excluded prior to preprocessing)
+# Exclude 302215, 302224, 302227, 302233, 302264, 302268, 302275 due to too many dropped epochs
+# 13 subjects excluded in total + 4 I did not receive because they were marked as bad from Helse
+bad_subjects = [100326, 200013, 200015, 302224, 302227, 302233, 302264, 302268, 302275]
+good_subject_idx = [not i in bad_subjects for i in Subject_id]
+
+Subject_id = list(np.array(Subject_id)[good_subject_idx])
+files = list(np.array(files)[good_subject_idx])
 
 n_subjects = len(Subject_id)
 
-# Load Final epochs
-final_epochs = [0]*n_subjects
-for n in range(n_subjects):
-    final_epochs[n] = mne.read_epochs(fname = os.path.join(files[n]),
+# Load ISAF
+n_ISAF = 51-1
+ISAF7_final_epochs = [0]*n_ISAF
+for n in range(len(ISAF7_final_epochs)):
+    ISAF7_final_epochs[n] = mne.read_epochs(fname = os.path.join(files[n]),
+                    verbose=0)
+
+# Load HELSE
+n_HELSE = 70-2
+HELSE_final_epochs = [0]*n_HELSE
+for n in range(len(HELSE_final_epochs)):
+    HELSE_final_epochs[n] = mne.read_epochs(fname = os.path.join(files[n+n_ISAF]),
                     verbose=0)
+    # Rename channels to match ISAF (Using 10-20 MCN)
+    mne.rename_channels(HELSE_final_epochs[n].info, {"T3":"T7",
+                                                     "T4":"T8",
+                                                     "T5":"P7",
+                                                     "T6":"P8"})
+# Warning about chronological order is due to interleaved EO->EC->EO->EC being concatenated as 5xEC->5xEO
+
+# Load Baseline
+n_Base = 91-6
+Base_final_epochs = [0]*n_Base
+for n in range(len(Base_final_epochs)):
+    Base_final_epochs [n] = mne.read_epochs(fname = os.path.join(files[n+n_ISAF+n_HELSE]),
+                    verbose=0)
+
+# I will use the union of the channels in both dataset (except mastoids)
+# This means I add empty channels and interpolate when it is missing
+# Helsefond montage has wrong calibration, head size are too big. 
+# Thus I will need to re-calibrate by comparing with ISAF7
+# Notice I already used the final dig montage for Baseline data
+
+# Get channel names
+ISAF7_chs = ISAF7_final_epochs[0].copy().info["ch_names"]
+Helse_chs = HELSE_final_epochs[0].copy().info["ch_names"]
+Base_chs = Base_final_epochs[0].copy().info["ch_names"]
+
+# Get intersection of channels
+intersect_ch = list(set(Helse_chs) & set(ISAF7_chs))
+intersect_ch_ratio = [0]*len(intersect_ch)
+for i in range(len(intersect_ch)):
+    # Get channel name
+    ch_name0 = intersect_ch[i]
+    # Get index and electrode location
+    ISAF7_ch_idx = np.where(np.array(ISAF7_chs) == ch_name0)[0]
+    ISAF7_ch_loc = ISAF7_final_epochs[0].info["chs"][int(ISAF7_ch_idx)]["loc"]
+    Helse_ch_idx = np.where(np.array(Helse_chs) == ch_name0)[0]
+    Helse_ch_loc = HELSE_final_epochs[0].info["chs"][int(Helse_ch_idx)]["loc"]
+    # Calculate ratio
+    intersect_ch_ratio[i] = [ch_name0,ISAF7_ch_loc[0:3]/Helse_ch_loc[0:3]]
+# Most of the ratios are either around 0.095 or 0/Inf when division with 0
+# We also see that Cz for ISAF is defined as (0,0,0.095m). For Helse Cz is (0,0,1m)
+Helse_to_ISAF7_cal = 0.095
+
+# Now we are ready to make a combined montage (info["dig"])
+# Get list of channels to add
+ISAF7_add_ch_list = list(set(Helse_chs)-set(ISAF7_chs))
+Helse_add_ch_list = list(set(ISAF7_chs)-set(Helse_chs))
+combined_ch_list = Helse_chs + ISAF7_chs
+# Remove duplicates while maintaining A-P order
+duplicate = set()
+final_ch_list = [x for x in combined_ch_list if not (x in duplicate or duplicate.add(x))]
+# Move AFz and POz to correct position
+final_ch_list.insert(3,final_ch_list.pop(-2)) # from second last to 3
+final_ch_list.insert(-5,final_ch_list.pop(-1)) # from last to seventh from last
+
+# The order of DigPoints in info are based on sorted ch names
+Helse_dig = HELSE_final_epochs[0].copy().info["dig"]
+Helse_chs_sorted = Helse_chs.copy()
+Helse_chs_sorted.sort()
+ISAF7_dig = ISAF7_final_epochs[0].copy().info["dig"]
+ISAF7_chs_sorted = ISAF7_chs.copy()
+ISAF7_chs_sorted.sort()
+
+# Make one list with DigPoints from Helse + unique channels from ISAF7
+ch_idx = [i for i, item in enumerate(ISAF7_chs_sorted) if item in set(Helse_add_ch_list)] # indices of unique channels
+final_ch_list_sorted = final_ch_list.copy()
+final_ch_list_sorted.sort()
+dig_insert_idx = [i for i, item in enumerate(final_ch_list_sorted) if item in set(Helse_add_ch_list)] # find where ISAF7 channels should be inserted
+
+# Prepare combined dig montage
+final_dig = Helse_dig.copy()
+# Calibrate Helse digpoints
+for i in range(len(final_dig)):
+    final_dig[i]["r"] = final_dig[i]["r"]*Helse_to_ISAF7_cal
+# Insert ISAF7 digpoints
+for i in range(len(ch_idx)):
+    final_dig.insert(dig_insert_idx[i],ISAF7_dig[ch_idx[i]])
+
+# Remove mastoids
+Mastoid_ch = ["M1", "M2"]
+M_idx = [i for i, item in enumerate(final_ch_list_sorted) if item in set(Mastoid_ch)] # find mastoids ch
+M_idx2 = [i for i, item in enumerate(final_ch_list) if item in set(Mastoid_ch)] # find mastoids ch
+M_idx3 = [i for i, item in enumerate(Helse_add_ch_list) if item in set(Mastoid_ch)] # find mastoids ch
+for i in reversed(range(len(M_idx))):
+    del final_ch_list_sorted[M_idx[i]]
+    del final_dig[M_idx[i]]
+    # Mastoids are placed in the back of final_ch_list and Helse_add_ch_list and are also removed
+    del final_ch_list[M_idx2[i]]
+    del Helse_add_ch_list[M_idx3[i]]
+
+# T7, T8, Pz, P8 and P7 are placed wrongly (probably due to renaming)
+# This is fixed manually
+final_dig.insert(-7,final_dig.pop(-4)) # swap between P8 and T7
+final_dig.insert(-6,final_dig.pop(-3)) # swap between T7 and T8
+final_dig.insert(-4,final_dig.pop(-4)) # swap between Pz and T7
+final_dig.insert(-5,final_dig.pop(-5)) # swap between Pz and POz
+
+# Update EEG identity number
+for i in range(len(final_dig)):
+    final_dig[i]["ident"] = i+1
+
+# Make final digital montage
+final_digmon = mne.channels.DigMontage(dig=final_dig, ch_names=final_ch_list_sorted)
+# final_digmon.plot() # visually inspect topographical positions
+# final_digmon.plot(kind="3d") # visually inspect 3D positions
+# final_digmon.save("final_digmon.fif") # Save digital montage
+with open("final_digmon_ch_names.pkl", "wb") as filehandle:
+    # The data is stored as binary data stream
+    pickle.dump(final_digmon.ch_names, filehandle)
+
+# Remove mastoids from ISAF, add channels from Helse and interpolate
+for n in range(len(ISAF7_final_epochs)):
+    # Remove mastoid channels
+    ISAF7_final_epochs[n].drop_channels(Mastoid_ch)
+    # Add empty channels to interpolate - notice that the locations are set to 0
+    mne.add_reference_channels(ISAF7_final_epochs[n],ISAF7_add_ch_list,copy=False)
+    # Fix channel info (both after removal of mastoids and newly added chs)
+    # Ch info loc are linked for all reference channels and this link is removed
+    for c in range(ISAF7_final_epochs[n].info["nchan"]):
+        ISAF7_final_epochs[n].info["chs"][c]["loc"] = ISAF7_final_epochs[n].info["chs"][c]["loc"].copy()
+        ISAF7_final_epochs[n].info["chs"][c]["scanno"] = c+1
+        ISAF7_final_epochs[n].info["chs"][c]["logno"] = c+1
+    # Set new combined montage
+    ISAF7_final_epochs[n].set_montage(final_digmon)
+    # Set newly added channels as "bad" and interpolate
+    ISAF7_final_epochs[n].info["bads"] = ISAF7_add_ch_list
+    ISAF7_final_epochs[n].interpolate_bads(reset_bads=True)
+    # Fix "picks" in order to reorder channels
+    ISAF7_final_epochs[n].picks = np.array(range(ISAF7_final_epochs[n].info["nchan"]))
+    # Reorder channel
+    ISAF7_final_epochs[n].reorder_channels(final_ch_list)
+
+# Add channels from ISAF to Helse and interpolate
+for n in range(len(HELSE_final_epochs)):
+    # Add empty channels to interpolate
+    mne.add_reference_channels(HELSE_final_epochs[n],Helse_add_ch_list,copy=False)
+    # Fix channel info (both after removal of mastoids and newly added chs)
+    # Ch info loc are linked for all reference channels and this link is removed
+    for c in range(HELSE_final_epochs[n].info["nchan"]):
+        HELSE_final_epochs[n].info["chs"][c]["loc"] = HELSE_final_epochs[n].info["chs"][c]["loc"].copy()
+        HELSE_final_epochs[n].info["chs"][c]["scanno"] = c+1
+        HELSE_final_epochs[n].info["chs"][c]["logno"] = c+1
+    # Set new combined montage
+    HELSE_final_epochs[n].set_montage(final_digmon)
+    # Set newly added channels as "bad" and interpolate
+    HELSE_final_epochs[n].info["bads"] = Helse_add_ch_list
+    HELSE_final_epochs[n].interpolate_bads(reset_bads=True)
+    # Fix "picks" in order to reorder channels
+    HELSE_final_epochs[n].picks = np.array(range(HELSE_final_epochs[n].info["nchan"]))
+    # Reorder channels
+    HELSE_final_epochs[n].reorder_channels(final_ch_list)
+
+# Add missing channels to Baseline and interpolate
+Base_add_ch_list = list(set(final_ch_list)-set(Base_chs))
+for n in range(len(Base_final_epochs)):
+    # Add empty channels to interpolate
+    mne.add_reference_channels(Base_final_epochs[n],Base_add_ch_list,copy=False)
+    # Fix channel info (both after removal of mastoids and newly added chs)
+    # Ch info loc are linked for all reference channels and this link is removed
+    for c in range(Base_final_epochs[n].info["nchan"]):
+        Base_final_epochs[n].info["chs"][c]["loc"] = Base_final_epochs[n].info["chs"][c]["loc"].copy()
+        Base_final_epochs[n].info["chs"][c]["scanno"] = c+1
+        Base_final_epochs[n].info["chs"][c]["logno"] = c+1
+    # Set new combined montage
+    Base_final_epochs[n].set_montage(final_digmon)
+    # Set newly added channels as "bad" and interpolate
+    Base_final_epochs[n].info["bads"] = Base_add_ch_list
+    Base_final_epochs[n].interpolate_bads(reset_bads=True)
+    # Fix "picks" in order to reorder channels
+    Base_final_epochs[n].picks = np.array(range(Base_final_epochs[n].info["nchan"]))
+    # Reorder channels
+    Base_final_epochs[n].reorder_channels(final_ch_list)    
+
+# Combine both dataset in one list
+final_epochs = ISAF7_final_epochs+HELSE_final_epochs+Base_final_epochs
+# Check number of epochs
+file_lengths = [0]*len(final_epochs)
+for i in range(len(final_epochs)):
+    file_lengths[i] = len(final_epochs[i])
+# sns.distplot(file_lengths) # visualize
+np.min(file_lengths)/150*100 # Max 20% epochs dropped. Above and the subjects were excluded
+n_subjects = len(final_epochs)
+
+# Re-sample files to 200Hz (Data was already lowpass filtered at 100, so above 200 is oversampling)
+for i in range(len(final_epochs)):
+    final_epochs[i].resample(sfreq=200, verbose=2)
+
+# # Save final epochs data
+# save_path = "/home/glia/Analysis/Final_epochs_data"
+# for n in range(len(final_epochs)):
+#     # Make file writeable
+#     final_epochs[n]._times_readonly.flags["WRITEABLE"] = False
+#     # Save file
+#     final_epochs[n].save(fname = os.path.join(save_path,str("{}_final_epoch".format(Subject_id[n]) + "-epo.fif")),
+#                     overwrite=True, verbose=0)
 
 # Load dropped epochs - used for gap idx in microstates
-bad_subjects = [12345] # list with subjects that were excluded due to too many dropped epochs/chs
-Drop_epochs_df = pd.read_pickle("./Preprocessing/dropped_epochs.pkl")
+ISAF7_dropped_epochs_df = pd.read_pickle("ISAF7_dropped_epochs.pkl")
+Helse_dropped_epochs_df = pd.read_pickle("HELSE_dropped_epochs.pkl")
+Base_dropped_epochs_df = pd.read_pickle("Base_dropped_epochs.pkl")
 
+Drop_epochs_df = pd.concat([ISAF7_dropped_epochs_df,Helse_dropped_epochs_df,
+                            Base_dropped_epochs_df]).reset_index(drop=True)
 good_subject_df_idx = [not i in bad_subjects for i in Drop_epochs_df["Subject_ID"]]
 Drop_epochs_df = Drop_epochs_df.loc[good_subject_df_idx,:]
 Drop_epochs_df = Drop_epochs_df.sort_values(by=["Subject_ID"]).reset_index(drop=True)
 
 ### Load questionnaire data
-# For the purposes of this demonstration I will make a dummy dataframe
-# The original code imported csv files with questionnaire data and group status
-final_qdf = pd.read_csv(r'/data/raw/FOR_DTU/Questionnaires_for_DTU.csv')
-
-"""
-final_qdf = pd.DataFrame({"Subject_ID":Subject_id,
-                          "Age":[23,26],
-                          "Gender":[0,0],
-                          "Group_status":[0,1],
-                          "PCL_total":[33,56],
-                          "Q1":[1.2, 2.3],
-                          "Q2":[1.7, 1.5],
-                          "Qn":[2.1,1.0]})
-"""
+# ISAF
+qdf_ISAF7 = pd.read_csv("/data/raw/FOR_DTU/Questionnaires_for_DTU.csv",
+                  sep=";", na_values=' ')
+# Rename Subject_ID column
+qdf_ISAF7.rename({"ID_number": "Subject_ID"}, axis=1, inplace=True)
+# Sort Subject_id to match Subject_id for files
+qdf_ISAF7 = qdf_ISAF7.sort_values(by=["Subject_ID"], ignore_index=True)
+# Get column idx for PCL_t7 columns
+PCL_idx = qdf_ISAF7.columns.str.contains("PCL") & np.invert(qdf_ISAF7.columns.str.contains("PCL_"))
+# Keep subject id
+PCL_idx[qdf_ISAF7.columns=="Subject_ID"] = True
+
+# Make a final df and exclude dropped subjects
+final_qdf0 = qdf_ISAF7.loc[qdf_ISAF7["Subject_ID"].isin(Subject_id),PCL_idx].reset_index(drop=True)
+# Make column that is sum of all PCL
+final_qdf0.insert(len(final_qdf0.columns),"PCL_total",np.sum(final_qdf0.iloc[:,1:],axis=1))
+
+# Helse
+qdf_helse = pd.read_csv("/data/may2020/Questionnaires/HelsfondenQuestData_nytLbn.csv",
+                  sep=",", na_values=' ')
+# Rename subject ID column
+qdf_helse.rename(columns={"Nyt_lbn":"Subject_ID"}, inplace=True)
+Helse_ID_modifier = 200000
+# Add 200000 to id
+qdf_helse["Subject_ID"] += Helse_ID_modifier
+# Sort Subject_id to match Subject_id for files
+qdf_helse = qdf_helse.sort_values(by=["Subject_ID"], ignore_index=True)
+# Get column idx for PCL columns (don't use summarized columns with _)
+PCL_idx = qdf_helse.columns.str.contains("PCL") & np.invert(qdf_helse.columns.str.contains("PCL_"))
+# Keep subject id
+PCL_idx[qdf_helse.columns=="Subject_ID"] = True
+
+# Make a final df and exclude dropped subjects
+final_qdf1 = qdf_helse.loc[qdf_helse["Subject_ID"].isin(Subject_id),PCL_idx].reset_index(drop=True)
+# Make column that is sum of all PCL
+final_qdf1.insert(len(final_qdf1.columns),"PCL_total",np.sum(final_qdf1.iloc[:,1:],axis=1))
+
+# Baseline
+# antal_bÃ¸rm renamed to antal_boern
+qdf_base = pd.read_csv("/data/sep2020/BaselineForLi.csv", sep=",", na_values=' ')
+
+# Rename subject ID column
+qdf_base.rename(columns={"LbnRand":"Subject_ID"}, inplace=True)
+Base_ID_modifier = 300000
+# Add 300000 to id
+qdf_base["Subject_ID"] += Base_ID_modifier
+# Sort Subject_id to match Subject_id for files
+qdf_base = qdf_base.sort_values(by=["Subject_ID"], ignore_index=True)
+# Get column idx for PCL columns (don't use summarized columns with _)
+PCL_idx = qdf_base.columns.str.contains("PCL") & np.invert(qdf_base.columns.str.contains("PCL_"))
+# Keep subject id
+PCL_idx[qdf_base.columns=="Subject_ID"] = True
+
+# Make a final df and exclude dropped subjects
+final_qdf2 = qdf_base.loc[qdf_base["Subject_ID"].isin(Subject_id),PCL_idx].reset_index(drop=True)
+# Make column that is sum of all PCL
+final_qdf2.insert(len(final_qdf2.columns),"PCL_total",np.sum(final_qdf2.iloc[:,1:],axis=1))
+
+# Find NaN
+nan_idx = np.where(final_qdf2.isna()==True)
+final_qdf2.iloc[nan_idx[0],np.concatenate([np.array([0]),nan_idx[1]])] # 2252 has NaN for PCL3 and 12
+# Interpolate with mean of column
+final_qdf2 = final_qdf2.fillna(final_qdf2.mean())
+
+# Combine the 3 datasets
+final_qdf0.columns = final_qdf1.columns # fix colnames with t7
+final_qdf = pd.concat([final_qdf0,final_qdf1,final_qdf2], ignore_index=True)
+
+# Ensure all columns are integers
+final_qdf = final_qdf.astype("int")
 
 # Define cases as >= 44 total PCL
 # Type: numpy array with subject id
-cases = np.array(final_qdf["Subject_ID"][final_qdf["PCL_t7"]>=44])
+cases = np.array(final_qdf["Subject_ID"][final_qdf["PCL_total"]>=44])
 n_groups = 2
 Groups = ["CTRL", "PTSD"]
 
+# Check percentage of cases in both datasets
+len(np.where((cases>100000)&(cases<200000))[0])/n_ISAF # around 32%
+len(np.where((cases>200000)&(cases<300000))[0])/n_HELSE # around 51%
+len(np.where((cases>300000)&(cases<400000))[0])/n_Base # around 66%
+# There is clearly class imbalance between studies!
+
+### Get depression scores as binary above threshold
+# BDI >= 20 is moderate depression
+# DASS-42 >= 14 is moderate depression for depression subscale
+dep_cases = np.concatenate([np.array(qdf_ISAF7["Subject_ID"][qdf_ISAF7["BDI_t7"] >= 20]),
+                           np.array(qdf_helse["Subject_ID"][qdf_helse["DASS_D_t0"] >= 14]),
+                           np.array(qdf_base["Subject_ID"][qdf_base["DASS_D_t0"] >= 14])])
+dep_cases.sort()
+dep_cases = dep_cases[np.isin(dep_cases,Subject_id)] # only keep those that we received and not excluded
+
+# Check percentage of dep cases in both datasets
+len(np.where((dep_cases>100000)&(dep_cases<200000))[0])/n_ISAF # around 34%
+len(np.where((dep_cases>200000)&(dep_cases<300000))[0])/n_HELSE # around 53%
+len(np.where((dep_cases>300000)&(dep_cases<400000))[0])/n_Base # around 72%
+
+# Make normalized to max depression score to combine from both scales
+# Relative score seem to be consistent between BDI-II and DASS-42 and clinical label
+max_BDI = 63
+max_DASS = 42
+# Get normalized depression scores for each dataset
+ISAF7_dep_score = qdf_ISAF7["BDI_t7"][qdf_ISAF7["Subject_ID"].isin(Subject_id)]/max_BDI
+Helse_dep_score = qdf_helse["DASS_D_t0"][qdf_helse["Subject_ID"].isin(Subject_id)]/max_DASS
+Base_dep_score = qdf_base["DASS_D_t0"][qdf_base["Subject_ID"].isin(Subject_id)]/max_DASS
+Norm_dep_score = np.concatenate([ISAF7_dep_score.to_numpy(),Helse_dep_score.to_numpy(),Base_dep_score.to_numpy()])
+
+# Check if subject id match when using concat
+test_d1 = qdf_ISAF7["Subject_ID"][qdf_ISAF7["Subject_ID"].isin(Subject_id)]
+test_d2 = qdf_helse["Subject_ID"][qdf_helse["Subject_ID"].isin(Subject_id)]
+test_d3 = qdf_base["Subject_ID"][qdf_base["Subject_ID"].isin(Subject_id)]
+test_d4 = np.concatenate([test_d1.to_numpy(),test_d2.to_numpy(),test_d3.to_numpy()])
+assert all(np.equal(Subject_id,test_d4))
+
 # Define folder for saving features
 Feature_savepath = "./Features/"
 Stat_savepath = "./Statistics/"