""" Created on Sun Sep 1 12:13:23 2024 @author: Maya Coulson Theodorsen (mcoth@dtu.dk) This script imports the CSV data file, drops participants who have not consented to their data being used for research, filters incomplete datasets, keeps only one dataset for each participant """ #Import all necessary packages import pandas as pd # Define import of data def load_data(file_path): # Import data data_original = pd.read_csv("data6_9_2023.csv", encoding='latin-1', sep = ';') data = data_original print('Total amount of participants/measurements:', data.shape[0]) # Delete the data of individuals who do not consent to research data = data.drop(data.index[data['Forskning'] == False]) #Check the counts to make sure all who haven't consented are deleted data['Forskning'].value_counts() #Display amout that have given consent print('Total amount of participants wtih consent:', data.shape[0]) # Delete incomplete datasets, drop duplicates data = data[data['PCL_t0'].notna()] data = data[data['DASS_D_t0'].notna()] data = data[data['DASS_S_t0'].notna()] data = data[data['DASS_A_t0'].notna()] data = data[data['q0003_0001'].notna()] data = data.drop_duplicates(subset='lbnr') print('Total amount of unique participants wtih complete data:', data.shape[0]) data_complete = data.copy() data_complete = data_complete.reset_index() return data_complete