Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding RMSE With Uncertainty loss to CatBoost #1044

Merged
merged 31 commits into from
Jul 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
6e4d847
first commit
jonasracine May 5, 2022
e5808a4
back to it
jonasracine May 24, 2022
63216d7
commit before writing class
jonasracine Jun 2, 2022
04c5e7f
added catboost to init
jonasracine Jun 8, 2022
fc53fad
catboost works same as lgbm
jonasracine Jun 13, 2022
9501fbd
late
jonasracine Jun 13, 2022
2b6f970
catboost requirement
jonasracine Jun 13, 2022
a931254
Merge branch 'master' into jonas_catboost_861
hrzn Jun 14, 2022
7f122cc
renaming branch
jonasracine Jun 15, 2022
e345aac
tests pass for catboost
jonasracine Jun 16, 2022
391225d
removed if main
jonasracine Jun 16, 2022
7e433d7
Merge branch 'master' into jonas_catboost_861
jonasracine Jun 16, 2022
836bb91
test should pass
jonasracine Jun 16, 2022
41a0f92
cleaned up catboost logs
jonasracine Jun 16, 2022
9a4015e
Merge branch 'jonas_catboost_861' of github.com:unit8co/darts into jo…
jonasracine Jun 16, 2022
43f6ae7
Merge branch 'master' into jonas_catboost_861
hrzn Jun 17, 2022
41acaea
rmse_w_uncertainty almost passes tests
jonasracine Jun 20, 2022
0232dcf
some merging
jonasracine Jun 20, 2022
d7e611d
some cleaning
jonasracine Jun 21, 2022
726dedc
tests pass for all cases
jonasracine Jun 26, 2022
76222d5
cleaned up and multivariate sampling
jonasracine Jun 26, 2022
7dd2b4d
all good
jonasracine Jun 26, 2022
7bf0eec
removed catboost info
jonasracine Jun 26, 2022
4ecf777
some comments and cleaning
jonasracine Jun 27, 2022
1ebd3c0
Merge branch 'master' into feat/rmse-uncertainty-catboost
jonasracine Jun 27, 2022
919d5a0
retry sorting
jonasracine Jun 28, 2022
abe095d
switched to univariate normal sampling
jonasracine Jun 30, 2022
acdb11b
changed to num_samples
jonasracine Jul 1, 2022
b50b83b
pre shape wrangling
jonasracine Jul 6, 2022
2733a51
no shape wrangling, done cleaning the PR
jonasracine Jul 6, 2022
34f7094
Merge branch 'master' into feat/rmse-uncertainty-catboost
hrzn Jul 6, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 13 additions & 15 deletions darts/models/forecasting/catboost_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@

This is a wrapper that enables using the CatBoost regressor as model
"""

from typing import List, Optional, Sequence, Tuple, Union

import numpy as np
from catboost import CatBoostRegressor

from darts.logging import get_logger, raise_if
from darts.logging import get_logger
from darts.models.forecasting.regression_model import RegressionModel, _LikelihoodMixin
from darts.timeseries import TimeSeries

Expand Down Expand Up @@ -50,8 +49,10 @@ def __init__(
horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
be useful if the covariates don't extend far enough into the future.
likelihood
Can be set to `quantile` or 'poisson'. If set, the model will be probabilistic, allowing sampling at
prediction time.
Can be set to 'quantile', 'poisson' or 'gaussian'. If set, the model will be probabilistic,
allowing sampling at prediction time. When set to 'gaussian', the model will use CatBoost's
'RMSEWithUncertainty' loss function. When using this loss function, CatBoost returns a mean
and variance couple, which capture data (aleatoric) uncertainty.
quantiles
Fit the model to these quantiles if the `likelihood` is set to `quantile`.
random_state
Expand All @@ -68,21 +69,13 @@ def __init__(
self.likelihood = likelihood
self.quantiles = None

if "loss_function" in kwargs.keys():
raise_if(
kwargs["loss_function"] == "RMSEWithUncertainty",
"The loss function RMSEWithUncertainty is not supported by darts.",
)
elif "objective" in kwargs.keys():
raise_if(
kwargs["objective"] == "RMSEWithUncertainty",
"The objective RMSEWithUncertainty is not supported by darts.",
)
self.output_chunk_length = output_chunk_length

# to be extended to RMSEWithUncertainty
likelihood_map = {
"quantile": None,
"poisson": "Poisson",
"gaussian": "RMSEWithUncertainty",
"RMSEWithUncertainty": "RMSEWithUncertainty",
}

available_likelihoods = list(likelihood_map.keys())
Expand Down Expand Up @@ -193,10 +186,15 @@ def fit(
def _predict_and_sample(
self, x: np.ndarray, num_samples: int, **kwargs
) -> np.ndarray:
"""Override of RegressionModel's predict method,
to allow for the probabilistic case
"""
if self.likelihood == "quantile":
return self._predict_quantiles(x, num_samples, **kwargs)
elif self.likelihood == "poisson":
return self._predict_poisson(x, num_samples, **kwargs)
elif self.likelihood in ["gaussian", "RMSEWithUncertainty"]:
return self._predict_normal(x, num_samples, **kwargs)
else:
return super()._predict_and_sample(x, num_samples, **kwargs)

Expand Down
73 changes: 72 additions & 1 deletion darts/models/forecasting/regression_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

import numpy as np
import pandas as pd
from catboost import CatBoostRegressor
from sklearn.linear_model import LinearRegression
from sklearn.multioutput import MultiOutputRegressor

Expand Down Expand Up @@ -417,6 +418,15 @@ def fit(
self.model = MultiOutputRegressor(
self.model, n_jobs=n_jobs_multioutput_wrapper
)
elif isinstance(self.model, CatBoostRegressor):
if (
self.model.get_params()["loss_function"]
== "RMSEWithUncertainty"
):
self.model = MultiOutputRegressor(
self.model, n_jobs=n_jobs_multioutput_wrapper
)

# warn if n_jobs_multioutput_wrapper was provided but not used
if (
not isinstance(self.model, MultiOutputRegressor)
Expand Down Expand Up @@ -635,7 +645,7 @@ def __str__(self):

class _LikelihoodMixin:
"""
A class containing functions supporting quantile and poisson regression, to be used as a mixin for some
A class containing functions supporting quantile, poisson and gaussian regression, to be used as a mixin for some
`RegressionModel` subclasses.
"""

Expand Down Expand Up @@ -700,6 +710,67 @@ def _predict_quantiles(

return sampled

def _predict_normal(self, x: np.ndarray, num_samples: int, **kwargs) -> np.ndarray:
"""Method intended for CatBoost's RMSEWithUncertainty loss. Returns samples
computed from double-valued inputs [mean, variance].
X is of shape (n_series * n_samples, n_regression_features)
"""
k = x.shape[0]

# model_output shape:
# if univariate & output_chunk_length = 1: (num_samples, 2)
# else: (2, num_samples, n_components * output_chunk_length)
# where the axis with 2 dims is mu, sigma
model_output = self.model.predict(x, **kwargs)
output_dim = len(model_output.shape)

# deterministic case: we return the mean only
if num_samples == 1:
# univariate & single-chunk output
if output_dim <= 2:
output_slice = model_output[:, 0]
else:
output_slice = model_output[0, :, :]

return output_slice.reshape(k, self.output_chunk_length, -1)

# probabilistic case
# univariate & single-chunk output
if output_dim <= 2:
# embedding well shaped 2D output into 3D
model_output = np.expand_dims(model_output, axis=0)

else:
# we transpose to get mu, sigma couples on last axis
# shape becomes: (n_components * output_chunk_length, num_samples, 2)
model_output = model_output.transpose()

return self._normal_sampling(model_output, num_samples)

def _normal_sampling(self, model_output: np.ndarray, n_samples: int) -> np.ndarray:
"""Sampling method for CatBoost's [mean, variance] output.
model_output is of shape (n_components * output_chunk_length, n_samples, 2),
where the last 2 dimensions are mu and sigma.
"""
shape = model_output.shape
chunk_len = self.output_chunk_length

# treating each component separately
mu_sigma_list = [model_output[i, :, :] for i in range(shape[0])]

list_of_samples = [
self._rng.normal(
mu_sigma[:, 0], # mean vector
mu_sigma[:, 1], # diagonal covariance matrix
)
for mu_sigma in mu_sigma_list
]

samples_transposed = np.array(list_of_samples).transpose()
samples_reshaped = samples_transposed.reshape(n_samples, chunk_len, -1)

return samples_reshaped

def _predict_poisson(self, x: np.ndarray, num_samples: int, **kwargs) -> np.ndarray:
"""
X is of shape (n_series * n_samples, n_regression_features)
Expand Down
31 changes: 31 additions & 0 deletions darts/tests/models/forecasting/test_regression_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,11 @@ class RegressionModelsTestCase(DartsBaseTestClass):
likelihood="poisson",
random_state=42,
)
NormalCatBoostModel = partialclass(
CatBoostModel,
likelihood="gaussian",
random_state=42,
)
QuantileLightGBMModel = partialclass(
LightGBMModel,
likelihood="quantile",
Expand Down Expand Up @@ -207,6 +212,7 @@ class RegressionModelsTestCase(DartsBaseTestClass):
PoissonLightGBMModel,
PoissonLinearRegressionModel,
PoissonCatBoostModel,
NormalCatBoostModel,
]
)

Expand All @@ -222,6 +228,7 @@ class RegressionModelsTestCase(DartsBaseTestClass):
0.4, # PoissonLightGBMModel
0.4, # PoissonLinearRegressionModel
1e-01, # PoissonCatBoostModel
1e-05, # NormalCatBoostModel
]
multivariate_accuracies = [
0.3,
Expand All @@ -235,6 +242,7 @@ class RegressionModelsTestCase(DartsBaseTestClass):
0.4,
0.4,
0.15,
1e-05,
]
multivariate_multiseries_accuracies = [
0.05,
Expand All @@ -248,6 +256,7 @@ class RegressionModelsTestCase(DartsBaseTestClass):
0.4,
0.4,
1e-01,
1e-03,
]

# dummy feature and target TimeSeries instances
Expand Down Expand Up @@ -991,6 +1000,23 @@ def test_gradient_boosted_model_with_eval_set(self, lgb_fit_patch):
assert lgb_fit_patch.call_args[1]["eval_set"] is not None
assert lgb_fit_patch.call_args[1]["early_stopping_rounds"] == 2

@patch.object(darts.models.forecasting.catboost_model.CatBoostRegressor, "fit")
def test_catboost_model_with_eval_set(self, lgb_fit_patch):
"""Test whether these evaluation set parameters are passed to CatBoostRegressor"""
model = CatBoostModel(lags=4, lags_past_covariates=2)
model.fit(
series=self.sine_univariate1,
past_covariates=self.sine_multivariate1,
val_series=self.sine_univariate1,
val_past_covariates=self.sine_multivariate1,
early_stopping_rounds=2,
)

lgb_fit_patch.assert_called_once()

assert lgb_fit_patch.call_args[1]["eval_set"] is not None
assert lgb_fit_patch.call_args[1]["early_stopping_rounds"] == 2

class ProbabilisticRegressionModelsTestCase(DartsBaseTestClass):
models_cls_kwargs_errs = [
(
Expand Down Expand Up @@ -1033,6 +1059,11 @@ class ProbabilisticRegressionModelsTestCase(DartsBaseTestClass):
{"lags": 2, "likelihood": "poisson", "random_state": 42},
0.6,
),
(
CatBoostModel,
{"lags": 2, "likelihood": "gaussian", "random_state": 42},
0.05,
),
(
LinearRegressionModel,
{"lags": 2, "likelihood": "quantile", "random_state": 42},
Expand Down