From 88627b787bf577aad988298d5c5ee30b079f8118 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 Feb 2024 18:44:49 +0000 Subject: [PATCH 1/3] Handle some mypy errors and increase test coverage --- mlos_core/mlos_core/optimizers/optimizer.py | 8 +- .../tests/optimizers/one_hot_test.py | 90 ++++++++++++++++--- 2 files changed, 83 insertions(+), 15 deletions(-) diff --git a/mlos_core/mlos_core/optimizers/optimizer.py b/mlos_core/mlos_core/optimizers/optimizer.py index 1d4e5762af5..d4e8759e2a9 100644 --- a/mlos_core/mlos_core/optimizers/optimizer.py +++ b/mlos_core/mlos_core/optimizers/optimizer.py @@ -8,7 +8,7 @@ import collections from abc import ABCMeta, abstractmethod -from typing import List, Optional, Tuple +from typing import List, Optional, Tuple, Union import ConfigSpace import numpy as np @@ -237,7 +237,7 @@ def _from_1hot(self, config: npt.NDArray) -> pd.DataFrame: j += 1 return pd.DataFrame(df_dict) - def _to_1hot(self, config: pd.DataFrame) -> npt.NDArray: + def _to_1hot(self, config: Union[pd.DataFrame, pd.Series]) -> npt.NDArray: """ Convert pandas DataFrame to one-hot-encoded numpy array. """ @@ -253,10 +253,14 @@ def _to_1hot(self, config: pd.DataFrame) -> npt.NDArray: j = 0 for param in self.optimizer_parameter_space.values(): if config.ndim > 1: + assert isinstance(config, pd.DataFrame) col = config.columns.get_loc(param.name) + assert isinstance(col, int) val = config.iloc[i, col] else: + assert isinstance(config, pd.Series) col = config.index.get_loc(param.name) + assert isinstance(col, int) val = config.iloc[col] if isinstance(param, ConfigSpace.CategoricalHyperparameter): offset = param.choices.index(val) diff --git a/mlos_core/mlos_core/tests/optimizers/one_hot_test.py b/mlos_core/mlos_core/tests/optimizers/one_hot_test.py index 748940fb40c..255b01290cf 100644 --- a/mlos_core/mlos_core/tests/optimizers/one_hot_test.py +++ b/mlos_core/mlos_core/tests/optimizers/one_hot_test.py @@ -32,7 +32,7 @@ def data_frame() -> pd.DataFrame: @pytest.fixture -def one_hot() -> npt.NDArray: +def one_hot_data_frame() -> npt.NDArray: """ One-hot encoding of the `data_frame` above. The columns follow the order of the hyperparameters in `configuration_space`. @@ -44,25 +44,69 @@ def one_hot() -> npt.NDArray: ]) -def test_to_1hot(configuration_space: CS.ConfigurationSpace, - data_frame: pd.DataFrame, one_hot: npt.NDArray) -> None: +@pytest.fixture +def series() -> pd.Series: + """ + Toy series corresponding to the `configuration_space` hyperparameters. + The columns are deliberately *not* in alphabetic order. + """ + return pd.Series({ + 'y': 'b', + 'x': 0.4, + 'z': 3, + }) + + +@pytest.fixture +def one_hot_series() -> npt.NDArray: + """ + One-hot encoding of the `series` above. + The columns follow the order of the hyperparameters in `configuration_space`. + """ + return np.array([ + [0.4, 0.0, 1.0, 0.0, 3], + ]) + + +def test_to_1hot_data_frame(configuration_space: CS.ConfigurationSpace, + data_frame: pd.DataFrame, one_hot_data_frame: npt.NDArray) -> None: + """ + Toy problem to test one-hot encoding of dataframe. + """ + optimizer = SmacOptimizer(parameter_space=configuration_space) + assert optimizer._to_1hot(data_frame) == pytest.approx(one_hot_data_frame) + + +def test_to_1hot_series(configuration_space: CS.ConfigurationSpace, + series: pd.Series, one_hot_series: npt.NDArray) -> None: """ - Toy problem to test one-hot encoding. + Toy problem to test one-hot encoding of series. """ optimizer = SmacOptimizer(parameter_space=configuration_space) - assert optimizer._to_1hot(data_frame) == pytest.approx(one_hot) + assert optimizer._to_1hot(series) == pytest.approx(one_hot_series) -def test_from_1hot(configuration_space: CS.ConfigurationSpace, - data_frame: pd.DataFrame, one_hot: npt.NDArray) -> None: +def test_from_1hot_data_frame(configuration_space: CS.ConfigurationSpace, + data_frame: pd.DataFrame, one_hot_data_frame: npt.NDArray) -> None: """ - Toy problem to test one-hot decoding. + Toy problem to test one-hot decoding of dataframe. """ optimizer = SmacOptimizer(parameter_space=configuration_space) - assert optimizer._from_1hot(one_hot).to_dict() == data_frame.to_dict() + assert optimizer._from_1hot(one_hot_data_frame).to_dict() == data_frame.to_dict() -def test_round_trip(configuration_space: CS.ConfigurationSpace, data_frame: pd.DataFrame) -> None: +def test_from_1hot_series(configuration_space: CS.ConfigurationSpace, + series: pd.Series, one_hot_series: npt.NDArray) -> None: + """ + Toy problem to test one-hot decoding of series. + """ + optimizer = SmacOptimizer(parameter_space=configuration_space) + df = optimizer._from_1hot(one_hot_series) + assert df.shape[0] == 1, f"Unexpected number of rows ({df.shape[0]} != 1)" + assert df.iloc[0].to_dict() == series.to_dict() + + +def test_round_trip_data_frame(configuration_space: CS.ConfigurationSpace, data_frame: pd.DataFrame) -> None: """ Round-trip test for one-hot-encoding and then decoding a data frame. """ @@ -73,10 +117,30 @@ def test_round_trip(configuration_space: CS.ConfigurationSpace, data_frame: pd.D assert (df_round_trip.z == data_frame.z).all() -def test_round_trip_reverse(configuration_space: CS.ConfigurationSpace, one_hot: npt.NDArray) -> None: +def test_round_trip_series(configuration_space: CS.ConfigurationSpace, series: pd.DataFrame) -> None: + """ + Round-trip test for one-hot-encoding and then decoding a series. + """ + optimizer = SmacOptimizer(parameter_space=configuration_space) + series_round_trip = optimizer._from_1hot(optimizer._to_1hot(series)) + assert series_round_trip.x.to_numpy() == pytest.approx(series.x) + assert (series_round_trip.y == series.y).all() + assert (series_round_trip.z == series.z).all() + + +def test_round_trip_reverse_data_frame(configuration_space: CS.ConfigurationSpace, one_hot_data_frame: npt.NDArray) -> None: + """ + Round-trip test for one-hot-decoding and then encoding of a numpy array. + """ + optimizer = SmacOptimizer(parameter_space=configuration_space) + round_trip = optimizer._to_1hot(optimizer._from_1hot(one_hot_data_frame)) + assert round_trip == pytest.approx(one_hot_data_frame) + + +def test_round_trip_reverse_series(configuration_space: CS.ConfigurationSpace, one_hot_series: npt.NDArray) -> None: """ Round-trip test for one-hot-decoding and then encoding of a numpy array. """ optimizer = SmacOptimizer(parameter_space=configuration_space) - round_trip = optimizer._to_1hot(optimizer._from_1hot(one_hot)) - assert round_trip == pytest.approx(one_hot) + round_trip = optimizer._to_1hot(optimizer._from_1hot(one_hot_series)) + assert round_trip == pytest.approx(one_hot_series) From eef754d1a4fedc08daa06eefb370ca7cf2405aa6 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 Feb 2024 19:18:43 +0000 Subject: [PATCH 2/3] pylint --- mlos_core/mlos_core/tests/optimizers/one_hot_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlos_core/mlos_core/tests/optimizers/one_hot_test.py b/mlos_core/mlos_core/tests/optimizers/one_hot_test.py index 255b01290cf..0c36859b685 100644 --- a/mlos_core/mlos_core/tests/optimizers/one_hot_test.py +++ b/mlos_core/mlos_core/tests/optimizers/one_hot_test.py @@ -101,9 +101,9 @@ def test_from_1hot_series(configuration_space: CS.ConfigurationSpace, Toy problem to test one-hot decoding of series. """ optimizer = SmacOptimizer(parameter_space=configuration_space) - df = optimizer._from_1hot(one_hot_series) - assert df.shape[0] == 1, f"Unexpected number of rows ({df.shape[0]} != 1)" - assert df.iloc[0].to_dict() == series.to_dict() + one_hot_df = optimizer._from_1hot(one_hot_series) + assert one_hot_df.shape[0] == 1, f"Unexpected number of rows ({one_hot_df.shape[0]} != 1)" + assert one_hot_df.iloc[0].to_dict() == series.to_dict() def test_round_trip_data_frame(configuration_space: CS.ConfigurationSpace, data_frame: pd.DataFrame) -> None: From 983876e6ba63a6be70fd203c36e48d47d4484506 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 19 Feb 2024 20:52:14 +0000 Subject: [PATCH 3/3] address some warnings from pandas about pyarrow being required in the future --- conda-envs/mlos-3.10.yml | 1 + conda-envs/mlos-3.11.yml | 1 + conda-envs/mlos-3.8.yml | 1 + conda-envs/mlos-3.9.yml | 1 + conda-envs/mlos-windows.yml | 1 + conda-envs/mlos.yml | 1 + 6 files changed, 6 insertions(+) diff --git a/conda-envs/mlos-3.10.yml b/conda-envs/mlos-3.10.yml index dad480d2fbb..7d7e909e151 100644 --- a/conda-envs/mlos-3.10.yml +++ b/conda-envs/mlos-3.10.yml @@ -35,6 +35,7 @@ dependencies: - types-pytest-lazy-fixture - types-requests - types-setuptools + - pyarrow - "--editable ../mlos_core[full-tests]" - "--editable ../mlos_bench[full-tests]" - "--editable ../mlos_viz[full-tests]" diff --git a/conda-envs/mlos-3.11.yml b/conda-envs/mlos-3.11.yml index 3e7590a562d..7fa88c65d16 100644 --- a/conda-envs/mlos-3.11.yml +++ b/conda-envs/mlos-3.11.yml @@ -35,6 +35,7 @@ dependencies: - types-pytest-lazy-fixture - types-requests - types-setuptools + - pyarrow - "--editable ../mlos_core[full-tests]" - "--editable ../mlos_bench[full-tests]" - "--editable ../mlos_viz[full-tests]" diff --git a/conda-envs/mlos-3.8.yml b/conda-envs/mlos-3.8.yml index 8360168837c..0a426f7250e 100644 --- a/conda-envs/mlos-3.8.yml +++ b/conda-envs/mlos-3.8.yml @@ -35,6 +35,7 @@ dependencies: - types-pytest-lazy-fixture - types-requests - types-setuptools + - pyarrow - "--editable ../mlos_core[full-tests]" - "--editable ../mlos_bench[full-tests]" - "--editable ../mlos_viz[full-tests]" diff --git a/conda-envs/mlos-3.9.yml b/conda-envs/mlos-3.9.yml index 2ac95f127fd..bbc1bc58e37 100644 --- a/conda-envs/mlos-3.9.yml +++ b/conda-envs/mlos-3.9.yml @@ -35,6 +35,7 @@ dependencies: - types-pytest-lazy-fixture - types-requests - types-setuptools + - pyarrow - "--editable ../mlos_core[full-tests]" - "--editable ../mlos_bench[full-tests]" - "--editable ../mlos_viz[full-tests]" diff --git a/conda-envs/mlos-windows.yml b/conda-envs/mlos-windows.yml index 13089222e75..ad2ab72a314 100644 --- a/conda-envs/mlos-windows.yml +++ b/conda-envs/mlos-windows.yml @@ -39,6 +39,7 @@ dependencies: - types-pytest-lazy-fixture - types-requests - types-setuptools + - pyarrow - "--editable ../mlos_core[full-tests]" - "--editable ../mlos_bench[full-tests]" - "--editable ../mlos_viz[full-tests]" diff --git a/conda-envs/mlos.yml b/conda-envs/mlos.yml index 445832f8747..b6aefb7ee02 100644 --- a/conda-envs/mlos.yml +++ b/conda-envs/mlos.yml @@ -33,6 +33,7 @@ dependencies: - types-pytest-lazy-fixture - types-requests - types-setuptools + - pyarrow - "--editable ../mlos_core[full-tests]" - "--editable ../mlos_bench[full-tests]" - "--editable ../mlos_viz[full-tests]"