-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcalorie_burnt_prediction.py
132 lines (81 loc) · 2.72 KB
/
calorie_burnt_prediction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# -*- coding: utf-8 -*-
"""calorie burnt prediction.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1NEM7bC21J1ab5esZjGGTdINN5-LA68qP
importing dependencies
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn import metrics
"""data collection and processing"""
#import the calories dataset to pandas dataframes
calories = pd.read_csv('/content/calories.csv')
#print first rows of dataframes
calories.head()
#import excercise dataset to pandas dataframe
excercise_data = pd.read_csv('/content/exercise.csv')
#checking first 5 rows of excercise_data
excercise_data.head()
"""combining the two dataframes"""
calories_data = pd.concat([excercise_data, calories['Calories']], axis=1)
calories_data.head()
#checking the num of rows n cols
calories_data.shape
#gettin more info abt the data
calories_data.info()
#checkin for null values
calories_data.isnull().sum()
"""data analysis"""
#get some stats measures abt the data
calories_data.describe()
"""Data visualization"""
sns.set()
#plotting gender column in count plot
sns.countplot(calories_data['Gender'])
#findin the distribution of age cols
sns.distplot(calories_data['Age'])
#findin the distribution of height cols
sns.distplot(calories_data['Height'])
#findin the distribution of weight cols
sns.distplot(calories_data['Weight'])
#findin the distribution of heart rate cols
sns.distplot(calories_data['Heart_Rate'])
"""Finding the correlation in dataset
postive correlation
negative correlation
"""
correlation = calories_data.corr(numeric_only=True)
#constructing to understand the correlation
plt.figure(figsize=(10,10))
sns.heatmap(correlation, cbar=True, square=True, fmt='.1f', annot=True, annot_kws={'size':8}, cmap='Blues')
"""converting the txt data to numerical values"""
calories_data.replace({"Gender":{'male':0,'female':1}}, inplace=True)
calories_data.head()
"""separating features and target"""
X = calories_data.drop(columns=['User_ID','Calories'],axis=1)
Y = calories_data['Calories']
print(X)
print(Y)
"""splitting the data into training and test data"""
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.2, random_state=2)
print(X.shape, X_train.shape, X_test.shape)
"""model training
XGBoost regressor
"""
#loading model
model = xgb.XGBRegressor()
#training the model with X_train
model.fit(X_train, Y_train)
"""Evaluation
Prediction on Test Data
"""
test_data_prediction = model.predict(X_test)
print(test_data_prediction)
"""Mean Absolute Error"""
mae = metrics.mean_absolute_error(Y_test, test_data_prediction)
print("Mean Absolute Error = ", mae)