Skip to content
Snippets Groups Projects
Import_data.py 1.42 KiB
Newer Older
  • Learn to ignore specific revisions
  • mcoth's avatar
    mcoth committed
    """
    Created on Sun Sep  1 12:13:23 2024
    
    @author: Maya Coulson Theodorsen (mcoth@dtu.dk)
    
    This script imports the CSV data file, drops participants who have not consented
    to their data being used for research, filters incomplete datasets, keeps only
    one dataset for each participant
    
    """
    
    #Import all necessary packages
    import pandas as pd
    
    # Define import of data
    def load_data(file_path):
        # Import data
        data_original = pd.read_csv("data6_9_2023.csv", encoding='latin-1', sep = ';')
        data = data_original
        print('Total amount of participants/measurements:', data.shape[0])
    
        # Delete the data of individuals who do not consent to research
        data = data.drop(data.index[data['Forskning'] == False])
    
        #Check the counts to make sure all who haven't consented are deleted
        data['Forskning'].value_counts()
    
        #Display amout that have given consent
        print('Total amount of participants wtih consent:', data.shape[0])
    
        # Delete incomplete datasets, drop duplicates
        data = data[data['PCL_t0'].notna()]
        data = data[data['DASS_D_t0'].notna()]
        data = data[data['DASS_S_t0'].notna()]
        data = data[data['DASS_A_t0'].notna()]
        data = data[data['q0003_0001'].notna()]
        data = data.drop_duplicates(subset='lbnr')
    
        print('Total amount of unique participants wtih complete data:', data.shape[0])
        
        data_complete = data.copy()
        data_complete = data_complete.reset_index()
    
        return data_complete