diff --git a/Main_script.py b/Main_script.py deleted file mode 100644 index 239f05cb643630719dca8c54b606f2494a63915b..0000000000000000000000000000000000000000 --- a/Main_script.py +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Sun Sep 1 13:50:11 2024 - -@author: Maya Coulson Theodorsen (mcoth@dtu.dk) -""" - -import os -os.chdir('/Volumes/T7/') -import sys -sys.path.append('/Volumes/T7') # Path - -# Import custom functions -from Import_data import load_data -from Sort_data import sort_data -from Perform_pca import perform_pca -from Perform_clustering import perform_clustering -from Compare_clusters import compare_clusters -from Descriptives import total_descriptives, cluster_descriptives - -#Import all necessary packages -import numpy as np -import pandas as pd - -# Plotting -import matplotlib.pyplot as plt -import seaborn as sns - -# PCA -from sklearn.decomposition import PCA -from sklearn.cluster import KMeans -from sklearn import metrics -from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity, calculate_kmo - -#Clustering -from scipy.cluster.hierarchy import dendrogram, linkage -from yellowbrick.cluster import KElbowVisualizer, SilhouetteVisualizer - -# Statisticl tests -import pingouin as pg -import scipy.stats as stats -import statsmodels.api as sm -import scikit_posthocs as sp -from sklearn.preprocessing import StandardScaler -from scipy.stats import bartlett, levene, chi2_contingency -from pingouin import normality, kruskal, homoscedasticity -from itertools import combinations -from statsmodels.stats.multitest import multipletests - -# Turn off warnings -import warnings -warnings.filterwarnings("ignore") - -#%% Import data using my Import_data function file -data_complete = load_data("/Volumes/T7/data6_9_2023.csv") -data = data_complete.loc[:, 'q0010_0001': 'q0014_0007'] -#%% Call the sort_data function -data, DASS, PCL, questionnaireClusters, questionnaireClusters_std, std_data, columnNames, PCAcolumns, data_complete = sort_data(data_complete) - -#%% Call the perform_pca function -pca, loadings, principleComponents = perform_pca(std_data, PCAcolumns, columnNames) - -#%% Call the perform_clustering function -PC234, LABELS, clusterNames = perform_clustering(std_data, principleComponents, data_complete, questionnaireClusters, questionnaireClusters_std) - -#%% Call the function to compare clusters across all variables -p_values, posthoc_p_values, categorical_variables, continuous_variables = compare_clusters(data_complete, questionnaireClusters) -pd.options.display.float_format = '{:.10f}'.format -p_values = pd.DataFrame(p_values) -posthoc_p_values = pd.DataFrame(posthoc_p_values) - -#%% Descriptive stats for total N and each k -cluster_column = 'clusters' -sorter = ['Sex (male)', 'Age', 'Civil status (single)', 'Children', 'Unemployed', - 'Self-rated health', 'Psychoanaleptica', 'Psycholeptica', 'Excessive alcohol intake', - 'Current drug usage', 'Suicidal history', 'Probable childhood ADHD', 'Exposed to war', 'combat', - 'PCL Intrusion', 'PCL Avoidance', 'PCL Numbing', 'PCL Hyperarousal', 'DASS Anxiety', - 'DASS Depression', 'DASS Stress', 'PCL total score', 'Probable PTSD diagnosis','Total traumas', - 'Total unique traumas'] - -binary_variables = ['PTSD_t0_DSMIV','q0002', 'q0006', 'civil_status', 'Psychoanaleptica', 'Psycholeptica', 'binge','q0033_0001', 'ADHD_total_GROUP_t0', 'drugs', 'Military_trauma', 'combat','Unemployed'] - -descriptives_total = total_descriptives(data_complete, questionnaireClusters,categorical_variables, continuous_variables, binary_variables, sorter) - -descriptives_cluster = cluster_descriptives(data_complete, questionnaireClusters,categorical_variables, continuous_variables, cluster_column, binary_variables, sorter)