Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
Symptom clustering analysis
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
mcoth
Symptom clustering analysis
Commits
e057b366
Commit
e057b366
authored
4 months ago
by
mcoth
Browse files
Options
Downloads
Patches
Plain Diff
Upload New File
parent
b26eb067
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Pipeline
#38562
passed with warnings
4 months ago
Stage: test
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
Main_script.py
+90
-0
90 additions, 0 deletions
Main_script.py
with
90 additions
and
0 deletions
Main_script.py
0 → 100644
+
90
−
0
View file @
e057b366
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Sep 1 13:50:11 2024
@author: Maya Coulson Theodorsen (mcoth@dtu.dk)
This is the main script used for the analysis. Runninng this file alone is
sufficient, as long as the custom functions below are also imported.
"""
import
os
os
.
chdir
(
'
/Volumes/T7/
'
)
import
sys
sys
.
path
.
append
(
'
/Volumes/T7
'
)
# Path
# Import custom functions
from
Import_data
import
load_data
from
Sort_data
import
sort_data
from
Perform_pca
import
perform_pca
from
Perform_clustering
import
perform_clustering
from
Compare_clusters
import
compare_clusters
from
Descriptives
import
total_descriptives
,
cluster_descriptives
#Import all necessary packages
import
numpy
as
np
import
pandas
as
pd
# Plotting
import
matplotlib.pyplot
as
plt
import
seaborn
as
sns
# PCA
from
sklearn.decomposition
import
PCA
from
sklearn.cluster
import
KMeans
from
sklearn
import
metrics
from
factor_analyzer.factor_analyzer
import
calculate_bartlett_sphericity
,
calculate_kmo
#Clustering
from
scipy.cluster.hierarchy
import
dendrogram
,
linkage
from
yellowbrick.cluster
import
KElbowVisualizer
,
SilhouetteVisualizer
# Statisticl tests
import
pingouin
as
pg
import
scipy.stats
as
stats
import
statsmodels.api
as
sm
import
scikit_posthocs
as
sp
from
sklearn.preprocessing
import
StandardScaler
from
scipy.stats
import
bartlett
,
levene
,
chi2_contingency
from
pingouin
import
normality
,
kruskal
,
homoscedasticity
from
itertools
import
combinations
from
statsmodels.stats.multitest
import
multipletests
# Turn off warnings
import
warnings
warnings
.
filterwarnings
(
"
ignore
"
)
#%% Import data using my Import_data function file
data_complete
=
load_data
(
"
/Volumes/T7/data6_9_2023.csv
"
)
data
=
data_complete
.
loc
[:,
'
q0010_0001
'
:
'
q0014_0007
'
]
#%% Call the sort_data function
data
,
DASS
,
PCL
,
questionnaireClusters
,
questionnaireClusters_std
,
std_data
,
columnNames
,
PCAcolumns
,
data_complete
=
sort_data
(
data_complete
)
#%% Call the perform_pca function
pca
,
loadings
,
principleComponents
=
perform_pca
(
std_data
,
PCAcolumns
,
columnNames
)
#%% Call the perform_clustering function
PC234
,
LABELS
,
clusterNames
=
perform_clustering
(
std_data
,
principleComponents
,
data_complete
,
questionnaireClusters
,
questionnaireClusters_std
)
#%% Call the function to compare clusters across all variables
p_values
,
posthoc_p_values
,
categorical_variables
,
continuous_variables
=
compare_clusters
(
data_complete
,
questionnaireClusters
)
pd
.
options
.
display
.
float_format
=
'
{:.10f}
'
.
format
p_values
=
pd
.
DataFrame
(
p_values
)
posthoc_p_values
=
pd
.
DataFrame
(
posthoc_p_values
)
#%% Descriptive stats for total N and each k
cluster_column
=
'
clusters
'
sorter
=
[
'
Sex (male)
'
,
'
Age
'
,
'
Civil status (single)
'
,
'
Children
'
,
'
Unemployed
'
,
'
Self-rated health
'
,
'
Psychoanaleptica
'
,
'
Psycholeptica
'
,
'
Excessive alcohol intake
'
,
'
Current drug usage
'
,
'
Suicidal history
'
,
'
Probable childhood ADHD
'
,
'
Exposed to war
'
,
'
combat
'
,
'
PCL Intrusion
'
,
'
PCL Avoidance
'
,
'
PCL Numbing
'
,
'
PCL Hyperarousal
'
,
'
DASS Anxiety
'
,
'
DASS Depression
'
,
'
DASS Stress
'
,
'
PCL total score
'
,
'
Probable PTSD diagnosis
'
,
'
Total traumas
'
,
'
Total unique traumas
'
]
binary_variables
=
[
'
PTSD_t0_DSMIV
'
,
'
q0002
'
,
'
q0006
'
,
'
civil_status
'
,
'
Psychoanaleptica
'
,
'
Psycholeptica
'
,
'
binge
'
,
'
q0033_0001
'
,
'
ADHD_total_GROUP_t0
'
,
'
drugs
'
,
'
Military_trauma
'
,
'
combat
'
,
'
Unemployed
'
]
descriptives_total
=
total_descriptives
(
data_complete
,
questionnaireClusters
,
categorical_variables
,
continuous_variables
,
binary_variables
,
sorter
)
descriptives_cluster
=
cluster_descriptives
(
data_complete
,
questionnaireClusters
,
categorical_variables
,
continuous_variables
,
cluster_column
,
binary_variables
,
sorter
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment