diff --git a/Import_data.py b/Import_data.py new file mode 100644 index 0000000000000000000000000000000000000000..58c600b0923caa657557797162f74f5ae9abf31b --- /dev/null +++ b/Import_data.py @@ -0,0 +1,44 @@ +""" +Created on Sun Sep 1 12:13:23 2024 + +@author: Maya Coulson Theodorsen (mcoth@dtu.dk) + +This script imports the CSV data file, drops participants who have not consented +to their data being used for research, filters incomplete datasets, keeps only +one dataset for each participant + +""" + +#Import all necessary packages +import pandas as pd + +# Define import of data +def load_data(file_path): + # Import data + data_original = pd.read_csv("data6_9_2023.csv", encoding='latin-1', sep = ';') + data = data_original + print('Total amount of participants/measurements:', data.shape[0]) + + # Delete the data of individuals who do not consent to research + data = data.drop(data.index[data['Forskning'] == False]) + + #Check the counts to make sure all who haven't consented are deleted + data['Forskning'].value_counts() + + #Display amout that have given consent + print('Total amount of participants wtih consent:', data.shape[0]) + + # Delete incomplete datasets, drop duplicates + data = data[data['PCL_t0'].notna()] + data = data[data['DASS_D_t0'].notna()] + data = data[data['DASS_S_t0'].notna()] + data = data[data['DASS_A_t0'].notna()] + data = data[data['q0003_0001'].notna()] + data = data.drop_duplicates(subset='lbnr') + + print('Total amount of unique participants wtih complete data:', data.shape[0]) + + data_complete = data.copy() + data_complete = data_complete.reset_index() + + return data_complete \ No newline at end of file