-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain_OverviewStats.py
84 lines (68 loc) · 3.75 KB
/
main_OverviewStats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import pandas as pd
# Updated paths
MOOD_TRACKING_SHEET_PATH = '/home/jgopal/NAS/Analysis/AudioFacialEEG/Behavioral Labeling/Mood_Tracking.xlsx'
OUTPUT_SPREADSHEET_PATH = '/home/jgopal/NAS/Analysis/AudioFacialEEG/Behavioral Labeling/Mood_Tracking_Overview.xlsx'
# Expanded list of patients
patients_to_include = [
'S_199', 'S_201', 'S_202', 'S_203', 'S_205', 'S_206', 'S_207', 'S_208', 'S_209', 'S_10b',
'S_211', 'S_212', 'S_214', 'S_199b', 'S_174', 'S_217', 'S_218', 'S_219', 'S_221', 'S_224',
'S_226', 'S_227', 'S_230', 'S_231', 'S_233', 'S_234', 'S_236', 'S_237', 'S_238', 'S_239'
]
# Read the Excel file
xls = pd.ExcelFile(MOOD_TRACKING_SHEET_PATH)
# Prepare a list to store the overview data for each patient
overview_data = []
# Process each patient
for sheet_name in patients_to_include:
if sheet_name in xls.sheet_names:
# Read data for the patient
df = pd.read_excel(xls, sheet_name=sheet_name)
# Get columns related to moods (e.g., Mood, Depression, Anxiety)
mood_columns = [col for col in df.columns if col.lower() in ['mood', 'depression', 'anxiety', 'hunger', 'pain']]
# Skip if no mood columns are found
if not mood_columns:
overview_data.append({
'Patient': sheet_name,
**{f"Num_Self_Reports_{mood}": 0 for mood in mood_columns},
**{f"Num_Distinct_Scores_{mood}": 0 for mood in mood_columns}
})
continue
# Initialize a dictionary to store the patient's data
patient_data = {'Patient': sheet_name}
# Process each mood column
for mood in mood_columns:
# Drop NA and empty values for the mood
valid_mood_data = df[mood].dropna().astype(str).str.strip()
valid_mood_data = valid_mood_data[valid_mood_data != '']
# Calculate self-reports and distinct scores
num_self_reports = len(valid_mood_data)
num_distinct_scores = valid_mood_data.nunique()
# Store the results
patient_data[f"Num_Self_Reports_{mood}"] = num_self_reports
patient_data[f"Num_Distinct_Scores_{mood}"] = num_distinct_scores
# Append the patient's data to the overview list
overview_data.append(patient_data)
else:
# Append empty row for patients with no data
overview_data.append({
'Patient': sheet_name,
**{f"Num_Self_Reports_{mood}": 0 for mood in ['Mood', 'Depression', 'Anxiety', 'Hunger', 'Pain']},
**{f"Num_Distinct_Scores_{mood}": 0 for mood in ['Mood', 'Depression', 'Anxiety', 'Hunger', 'Pain']}
})
# Create a DataFrame from the overview data
overview_df = pd.DataFrame(overview_data)
# Save the DataFrame to an Excel file
overview_df.to_excel(OUTPUT_SPREADSHEET_PATH, index=False)
# Print overview numbers with updated statistics
print("Overview Numbers:")
print(f"Total Patients Processed: {len(patients_to_include)}")
print(f"Patients with Data: {overview_df.filter(like='Num_Self_Reports').sum(axis=1).gt(0).sum()}")
for mood in ['Mood', 'Depression', 'Anxiety', 'Hunger', 'Pain']:
mood_columns = overview_df.filter(like=f"Num_Distinct_Scores_{mood}").columns
if not mood_columns.empty:
print(f"\n{mood} Statistics:")
print(f"Mean Num Distinct Scores: {overview_df[mood_columns].mean().mean():.2f}")
print(f"Median Num Distinct Scores: {overview_df[mood_columns].median().median():.2f}")
total_score_columns = overview_df.filter(like=f"Num_Self_Reports_{mood}").columns
print(f"Mean Num Total Scores: {overview_df[total_score_columns].mean().mean():.2f}")
print(f"Median Num Total Scores: {overview_df[total_score_columns].median().median():.2f}")