Skip to content

Add default params_to_tune for LabelEncoderTransform #1242

Merged
merged 5 commits into from
Apr 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add default `params_to_tune` for `MedianOutliersTransform`, `DensityOutliersTransform` and `PredictionIntervalOutliersTransform` ([#1231](https://github.com/tinkoff-ai/etna/pull/1231))
- Add default `params_to_tune` for `TimeSeriesImputerTransform` ([#1232](https://github.com/tinkoff-ai/etna/pull/1232))
- Add default `params_to_tune` for `DifferencingTransform`, `MedianTransform`, `MaxTransform`, `MinTransform`, `QuantileTransform`, `StdTransform`, `MeanTransform`, `MADTransform`, `MinMaxDifferenceTransform`, `SumTransform`, `BoxCoxTransform`, `YeoJohnsonTransform`, `MaxAbsScalerTransform`, `MinMaxScalerTransform`, `RobustScalerTransform` and `StandardScalerTransform` ([#1233](https://github.com/tinkoff-ai/etna/pull/1233))
- Add default `params_to_tune` for `LabelEncoderTransform` ([#1242](https://github.com/tinkoff-ai/etna/pull/1242))
### Fixed
- Fix bug in `GaleShapleyFeatureSelectionTransform` with wrong number of remaining features ([#1110](https://github.com/tinkoff-ai/etna/pull/1110))
- `ProphetModel` fails with additional seasonality set ([#1157](https://github.com/tinkoff-ai/etna/pull/1157))
Expand All @@ -68,6 +69,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Fix `DifferencingTransform` to raise error on new segments during `transform` and `inverse_transform` in inplace mode ([#1141](https://github.com/tinkoff-ai/etna/pull/1141))
- Teach `DifferencingTransform` to `inverse_transform` with NaNs ([#1155](https://github.com/tinkoff-ai/etna/pull/1155))
- Teach `BaseMixin.set_params` to work with nested `list` and `tuple` ([#1201](https://github.com/tinkoff-ai/etna/pull/1201))
-
### Removed
- `sample_acf_plot`, `sample_pacf_plot`, `CatBoostModelPerSegment`, `CatBoostModelMultiSegment` ([#1118](https://github.com/tinkoff-ai/etna/pull/1118))
- `PytorchForecastingTransform` ([#971](https://github.com/tinkoff-ai/etna/pull/971))
Expand Down
20 changes: 20 additions & 0 deletions etna/transforms/encoders/categorical.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from enum import Enum
from typing import Dict
from typing import List
from typing import Optional

Expand All @@ -8,9 +9,14 @@
from sklearn.utils._encode import _check_unknown
from sklearn.utils._encode import _encode

from etna import SETTINGS
from etna.datasets import TSDataset
from etna.transforms.base import IrreversibleTransform

if SETTINGS.auto_required:
from optuna.distributions import BaseDistribution
from optuna.distributions import CategoricalDistribution


class ImputerMode(str, Enum):
"""Enum for different imputation strategy."""
Expand Down Expand Up @@ -130,6 +136,20 @@ def _get_column_name(self) -> str:
return self.out_column
return self.__repr__()

def params_to_tune(self) -> Dict[str, "BaseDistribution"]:
"""Get default grid for tuning hyperparameters.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add line

This grid tunes only ``strategy`` parameter. Other parameters are expected to be set by the user.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like here

This grid tunes only ``strategy`` parameter. Other parameters are expected to be set by the user.

Returns
-------
:
Grid to tune.
"""
return {
"strategy": CategoricalDistribution(["new_value", "mean"]),
}


class OneHotEncoderTransform(IrreversibleTransform):
"""Encode categorical feature as a one-hot numeric features.
Expand Down
14 changes: 14 additions & 0 deletions tests/test_transforms/test_encoders/test_categorical_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from etna.transforms import FilterFeaturesTransform
from etna.transforms.encoders.categorical import LabelEncoderTransform
from etna.transforms.encoders.categorical import OneHotEncoderTransform
from tests.test_transforms.utils import assert_sampling_is_valid
from tests.test_transforms.utils import assert_transformation_equals_loaded_original


Expand Down Expand Up @@ -350,3 +351,16 @@ def test_save_load_ohe(dtype):
def test_get_regressors_info_not_fitted(transform):
with pytest.raises(ValueError, match="Fit the transform to get the correct regressors info!"):
_ = transform.get_regressors_info()


def test_params_to_tune_ohe():
transform = OneHotEncoderTransform(in_column="regressor_0")
assert len(transform.params_to_tune()) == 0


def test_params_to_tune_label_encoder(ts_for_label_encoding):
ts, _ = ts_for_label_encoding
for i in range(3):
transform = LabelEncoderTransform(in_column=f"regressor_{i}", out_column="test")
assert len(transform.params_to_tune()) > 0
assert_sampling_is_valid(transform=transform, ts=ts)
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,8 @@ def test_fit_transform_with_nans(ts_diff_endings):
def test_save_load(almost_constant_ts):
transform = MeanSegmentEncoderTransform()
assert_transformation_equals_loaded_original(transform=transform, ts=almost_constant_ts)


def test_params_to_tune():
transform = MeanSegmentEncoderTransform()
assert len(transform.params_to_tune()) == 0
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,8 @@ def test_new_segments_error(simple_ts):
def test_save_load(example_tsds):
transform = SegmentEncoderTransform()
assert_transformation_equals_loaded_original(transform=transform, ts=example_tsds)


def test_params_to_tune():
transform = SegmentEncoderTransform()
assert len(transform.params_to_tune()) == 0