From e72057a35553dda51bb22b28f40e0ebe1ebe5f04 Mon Sep 17 00:00:00 2001 From: Julia Shenshina Date: Wed, 27 Jul 2022 15:30:02 +0300 Subject: [PATCH 1/4] Fix constant_value in imputer --- etna/transforms/missing_values/imputation.py | 12 ++++-- .../test_impute_transform.py | 42 +++++++++++++++---- 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/etna/transforms/missing_values/imputation.py b/etna/transforms/missing_values/imputation.py index 85450a2f0..cc22e3a71 100644 --- a/etna/transforms/missing_values/imputation.py +++ b/etna/transforms/missing_values/imputation.py @@ -39,7 +39,7 @@ def __init__( window: int, seasonality: int, default_value: Optional[float], - constant_value: int = 0, + constant_value: float = 0, ): """ Create instance of _OneSegmentTimeSeriesImputerTransform. @@ -74,6 +74,8 @@ def __init__( the length of the seasonality default_value: value which will be used to impute the NaNs left after applying the imputer with the chosen strategy + constant_value: + value to fill gaps in "constant" strategy Raises ------ @@ -86,7 +88,7 @@ def __init__( self.window = window self.seasonality = seasonality self.default_value = default_value - self.fill_value: Optional[int] = None + self.fill_value: Optional[float] = None self.nan_timestamps: Optional[List[pd.Timestamp]] = None def fit(self, df: pd.DataFrame) -> "_OneSegmentTimeSeriesImputerTransform": @@ -227,7 +229,7 @@ def __init__( window: int = -1, seasonality: int = 1, default_value: Optional[float] = None, - constant_value: int = 0, + constant_value: float = 0, ): """ Create instance of TimeSeriesImputerTransform. @@ -262,6 +264,8 @@ def __init__( the length of the seasonality default_value: value which will be used to impute the NaNs left after applying the imputer with the chosen strategy + constant_value: + value to fill gaps in "constant" strategy Raises ------ @@ -273,6 +277,7 @@ def __init__( self.window = window self.seasonality = seasonality self.default_value = default_value + self.constant_value = constant_value super().__init__( transform=_OneSegmentTimeSeriesImputerTransform( in_column=self.in_column, @@ -280,6 +285,7 @@ def __init__( window=self.window, seasonality=self.seasonality, default_value=self.default_value, + constant_value=self.constant_value, ) ) diff --git a/tests/test_transforms/test_missing_values/test_impute_transform.py b/tests/test_transforms/test_missing_values/test_impute_transform.py index 5b969bc62..a53011c65 100644 --- a/tests/test_transforms/test_missing_values/test_impute_transform.py +++ b/tests/test_transforms/test_missing_values/test_impute_transform.py @@ -78,25 +78,37 @@ def test_all_missing_impute_fail_two_segments(df_all_missing_two_segments: pd.Da _ = imputer.fit_transform(df_all_missing_two_segments) -def test_one_missing_value_zero(df_with_missing_value_x_index: pd.DataFrame): - """Check that imputer with constant-strategy with zero value correctly in case of one missing value in data.""" +@pytest.mark.parametrize("constant_value", (0, 42)) +def test_one_missing_value_zero(df_with_missing_value_x_index: pd.DataFrame, constant_value: float): + """Check that imputer with constant-strategy works correctly in case of one missing value in data.""" df, idx = df_with_missing_value_x_index imputer = _OneSegmentTimeSeriesImputerTransform( - in_column="target", strategy="constant", window=-1, seasonality=1, default_value=None + in_column="target", + strategy="constant", + window=-1, + seasonality=1, + default_value=None, + constant_value=constant_value, ) result = imputer.fit_transform(df)["target"] - assert result.loc[idx] == 0 + assert result.loc[idx] == constant_value assert not result.isna().any() -def test_range_missing_zero(df_with_missing_range_x_index: pd.DataFrame): - """Check that imputer with constant-strategy with zero value works correctly in case of range of missing values in data.""" +@pytest.mark.parametrize("constant_value", (0, 42)) +def test_range_missing_constant(df_with_missing_range_x_index: pd.DataFrame, constant_value: float): + """Check that imputer with constant-strategy works correctly in case of range of missing values in data.""" df, rng = df_with_missing_range_x_index imputer = _OneSegmentTimeSeriesImputerTransform( - in_column="target", strategy="constant", window=-1, seasonality=1, default_value=None + in_column="target", + strategy="constant", + window=-1, + seasonality=1, + default_value=None, + constant_value=constant_value, ) result = imputer.fit_transform(df)["target"] - expected_series = pd.Series(index=rng, data=[0 for _ in rng], name="target") + expected_series = pd.Series(index=rng, data=[constant_value for _ in rng], name="target") np.testing.assert_array_almost_equal(result.loc[rng].reset_index(drop=True), expected_series) assert not result.isna().any() @@ -360,3 +372,17 @@ def test_fit_transform_nans_at_the_end(fill_strategy, ts_diff_endings): imputer = TimeSeriesImputerTransform(in_column="target", strategy=fill_strategy) ts_diff_endings.fit_transform([imputer]) assert (ts_diff_endings[:, :, "target"].isna()).sum().sum() == 0 + + +@pytest.mark.parametrize("constant_value", (0, 32)) +def test_constant_fill_strategy(df_with_missing_range_x_index_two_segments: pd.DataFrame, constant_value: float): + raw_df, rng = df_with_missing_range_x_index_two_segments + inferred_freq = pd.infer_freq(raw_df.index[-5:]) + ts = TSDataset(raw_df, freq=inferred_freq) + imputer = TimeSeriesImputerTransform( + in_column="target", strategy="constant", constant_value=constant_value, default_value=constant_value - 1 + ) + ts.fit_transform([imputer]) + df = ts.to_pandas(flatten=False) + for segment in ["segment_1", "segment_2"]: + np.testing.assert_array_equal(df.loc[rng][segment]["target"].values, [constant_value] * 5) From 3852f5b6d6a3bf8da7d28b3bb4ddb4118dbb313a Mon Sep 17 00:00:00 2001 From: Julia Shenshina Date: Wed, 27 Jul 2022 15:33:04 +0300 Subject: [PATCH 2/4] Upd CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b5523630..c656f3b3d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,7 +34,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - - ### Fixed -- +- Fix missing `constant_value` in `TimeSeriesImputerTransform` ([#819](https://github.com/tinkoff-ai/etna/pull/819)) - - - Make in-sample predictions of SARIMAXModel non-dynamic in all cases ([#812](https://github.com/tinkoff-ai/etna/pull/812)) From 3a79182cf1444dc08117496c5372b15a56aa1009 Mon Sep 17 00:00:00 2001 From: Julia Shenshina Date: Fri, 29 Jul 2022 09:12:32 +0300 Subject: [PATCH 3/4] Fix test name, fix deprecation warning --- etna/transforms/missing_values/imputation.py | 2 +- .../test_missing_values/test_impute_transform.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/etna/transforms/missing_values/imputation.py b/etna/transforms/missing_values/imputation.py index cc22e3a71..eff65e180 100644 --- a/etna/transforms/missing_values/imputation.py +++ b/etna/transforms/missing_values/imputation.py @@ -112,7 +112,7 @@ def fit(self, df: pd.DataFrame) -> "_OneSegmentTimeSeriesImputerTransform": self.nan_timestamps = series[series.isna()].index if self.strategy == ImputerMode.zero: warnings.warn( - "zero strategy will be removed in etna 1.12.0. Use constant strategy instead.", + "zero strategy will be removed in etna 2.0.0. Use constant strategy instead.", DeprecationWarning, stacklevel=2, ) diff --git a/tests/test_transforms/test_missing_values/test_impute_transform.py b/tests/test_transforms/test_missing_values/test_impute_transform.py index a53011c65..ea9154ff7 100644 --- a/tests/test_transforms/test_missing_values/test_impute_transform.py +++ b/tests/test_transforms/test_missing_values/test_impute_transform.py @@ -79,7 +79,7 @@ def test_all_missing_impute_fail_two_segments(df_all_missing_two_segments: pd.Da @pytest.mark.parametrize("constant_value", (0, 42)) -def test_one_missing_value_zero(df_with_missing_value_x_index: pd.DataFrame, constant_value: float): +def test_one_missing_value_constant(df_with_missing_value_x_index: pd.DataFrame, constant_value: float): """Check that imputer with constant-strategy works correctly in case of one missing value in data.""" df, idx = df_with_missing_value_x_index imputer = _OneSegmentTimeSeriesImputerTransform( From 31b5909980a6071891a1ff1c57f99bae0cdd4259 Mon Sep 17 00:00:00 2001 From: Julia Shenshina Date: Fri, 29 Jul 2022 09:49:43 +0300 Subject: [PATCH 4/4] Fix init order --- etna/transforms/missing_values/imputation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etna/transforms/missing_values/imputation.py b/etna/transforms/missing_values/imputation.py index eff65e180..a429307ab 100644 --- a/etna/transforms/missing_values/imputation.py +++ b/etna/transforms/missing_values/imputation.py @@ -84,10 +84,10 @@ def __init__( """ self.in_column = in_column self.strategy = ImputerMode(strategy) - self.constant_value = constant_value self.window = window self.seasonality = seasonality self.default_value = default_value + self.constant_value = constant_value self.fill_value: Optional[float] = None self.nan_timestamps: Optional[List[pd.Timestamp]] = None