Skip to content
Snippets Groups Projects
Import_data.py 1.42 KiB
Newer Older
mcoth's avatar
mcoth committed
"""
Created on Sun Sep  1 12:13:23 2024

@author: Maya Coulson Theodorsen (mcoth@dtu.dk)

This script imports the CSV data file, drops participants who have not consented
to their data being used for research, filters incomplete datasets, keeps only
one dataset for each participant

"""

#Import all necessary packages
import pandas as pd

# Define import of data
def load_data(file_path):
    # Import data
    data_original = pd.read_csv("data6_9_2023.csv", encoding='latin-1', sep = ';')
    data = data_original
    print('Total amount of participants/measurements:', data.shape[0])

    # Delete the data of individuals who do not consent to research
    data = data.drop(data.index[data['Forskning'] == False])

    #Check the counts to make sure all who haven't consented are deleted
    data['Forskning'].value_counts()

    #Display amout that have given consent
    print('Total amount of participants wtih consent:', data.shape[0])

    # Delete incomplete datasets, drop duplicates
    data = data[data['PCL_t0'].notna()]
    data = data[data['DASS_D_t0'].notna()]
    data = data[data['DASS_S_t0'].notna()]
    data = data[data['DASS_A_t0'].notna()]
    data = data[data['q0003_0001'].notna()]
    data = data.drop_duplicates(subset='lbnr')

    print('Total amount of unique participants wtih complete data:', data.shape[0])
    
    data_complete = data.copy()
    data_complete = data_complete.reset_index()

    return data_complete