-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathensembling.py
54 lines (43 loc) · 1.58 KB
/
ensembling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# -*- coding: utf-8 -*-
"""
@author: fornax
"""
from __future__ import division, print_function
import numpy as np
import pandas as pd
import utils
def sub(df, d):
"""
Function computes and substitutes ensembles of models for designated power lines.
Ensembling is done via a simply averaging of predictions.
:param df: data frame with predictions for each power line
:param d: dictionary having power line names as keys, predictions to ensemble
as values
:return: data frame with predictions ensembled
"""
for p_col in d.keys():
cols = [d[p_col][i][p_col] for i in range(len(d[p_col]))]
df[p_col] = np.mean(cols, axis=0)
return df
# whether to use a "mask" for NPWD2551 (see README for an explanation)
USE_MASK_2551 = False
# read all predictions
xgb = pd.read_csv('results_4y/xgb.csv', index_col=0)
nn_2451 = pd.read_csv('results_4y/nn_2451.csv', index_col=0)
nn_2851 = pd.read_csv('results_4y/nn_2851.csv', index_col=0)
# ensemble XGB and NN for two power lines
to_sub = {}
to_sub['NPWD2451'] = [xgb, nn_2451]
to_sub['NPWD2851'] = [xgb, nn_2851]
pout = xgb.copy()
pout = sub(pout, to_sub)
# apply mask for NPWD2551
if USE_MASK_2551:
Y = pd.read_csv('merged_data/dataset1.csv', index_col=0,
usecols=['ut_ms', 'm_year', 'mission_time', 'ATTT_current_305C_305O_305P_306C_306P'])
Y = utils.linear_interpolate(Y.mission_time, Y)
Y_test = Y.loc[Y.m_year == 3]
mask = 1-Y_test['ATTT_current_305C_305O_305P_306C_306P'].values
pout.NPWD2551 = pout.NPWD2551 * mask
# save the result
pout.to_csv('results_4y/ensemble.csv')