-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
52 lines (42 loc) · 1.52 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import os
from pathlib import Path
from src.data_preparation import load_and_clean_data
from src.data_analysis import (
analyze_data,
print_analysis,
analyze_genre_preferences,
print_genre_analysis,
analyze_keywords,
print_keyword_analysis
)
from src.feature_engineering import create_base_features
from src.model_training import train_host_model, print_model_metrics, setup_logging
def main():
current_dir = Path(__file__).parent
DATA_PATH = current_dir / "data" / "raw" / "Sneakpod Punkte.csv"
print("Movie Rating Predictor")
print("=====================")
if DATA_PATH.exists():
# Logging Setup
setup_logging()
# Daten laden und bereinigen
df = load_and_clean_data(str(DATA_PATH))
# Feature Engineering
features = create_base_features(df)
# Datenanalyse
analysis = analyze_data(df)
print_analysis(df, features, analysis)
# Genre-Analyse
genre_stats = analyze_genre_preferences(df)
print_genre_analysis(genre_stats)
# Modelle für jeden Host trainieren
for host in ['Christoph', 'Robert', 'Stefan']:
ratings = df[host]
if len(ratings.dropna()) > 0:
metrics, model = train_host_model(features, ratings, host)
print_model_metrics(metrics, host)
else:
print(f"Fehler: Keine Daten gefunden unter {DATA_PATH}")
return
if __name__ == "__main__":
main()