Skip to content

Commit

Permalink
add get_feature_names_out method
Browse files Browse the repository at this point in the history
  • Loading branch information
nicklamiller committed Jun 1, 2024
1 parent 46b18bc commit 9e168f2
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 0 deletions.
6 changes: 6 additions & 0 deletions python-package/lightgbm/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -1013,6 +1013,12 @@ def predict(
**predict_params,
)

def get_feature_names_out(self) -> np.ndarray:
""":obj:`array` of shape = [n_features]: Get output features of fitted model."""
if not self.__sklearn_is_fitted__():
raise LGBMNotFittedError("Output features cannot be determined. Need to call fit beforehand.")
return self.feature_names_in_

predict.__doc__ = _lgbmmodel_doc_predict.format(
description="Return the predicted value for each sample.",
X_shape="numpy array, pandas DataFrame, H2O DataTable's Frame , scipy.sparse, list of lists of int or float of shape = [n_samples, n_features]",
Expand Down
43 changes: 43 additions & 0 deletions tests/python_package_test/test_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -1317,6 +1317,49 @@ def test_getting_feature_names_in_pd_input():
np.testing.assert_array_equal(est.feature_names_in_, X.columns)


def test_get_feature_names_out_np_input():
# input is a numpy array, which doesn't have feature names. LightGBM adds
# feature names to the fitted model, which is inconsistent with sklearn's behavior
X, y = load_digits(n_class=2, return_X_y=True)
est = lgb.LGBMModel(n_estimators=5, objective="binary")
clf = lgb.LGBMClassifier(n_estimators=5)
reg = lgb.LGBMRegressor(n_estimators=5)
rnk = lgb.LGBMRanker(n_estimators=5)
models = (est, clf, reg, rnk)
group = np.full(shape=(X.shape[0] // 2,), fill_value=2) # Just an example group

for model in models:
with pytest.raises(lgb.compat.LGBMNotFittedError):
check_is_fitted(model)
if isinstance(model, lgb.LGBMRanker):
model.fit(X, y, group=group)
else:
model.fit(X, y)
np.testing.assert_array_equal(
model.get_feature_names_out(), np.array([f"Column_{i}" for i in range(X.shape[1])])
)


def test_get_feature_names_out_pd_input():
# as_frame=True means input has column names and these should propagate to fitted model
X, y = load_digits(n_class=2, return_X_y=True, as_frame=True)
est = lgb.LGBMModel(n_estimators=5, objective="binary")
clf = lgb.LGBMClassifier(n_estimators=5)
reg = lgb.LGBMRegressor(n_estimators=5)
rnk = lgb.LGBMRanker(n_estimators=5)
models = (est, clf, reg, rnk)
group = np.full(shape=(X.shape[0] // 2,), fill_value=2) # Just an example group

for model in models:
with pytest.raises(lgb.compat.LGBMNotFittedError):
check_is_fitted(model)
if isinstance(model, lgb.LGBMRanker):
model.fit(X, y, group=group)
else:
model.fit(X, y)
np.testing.assert_array_equal(model.get_feature_names_out(), X.columns)


@parametrize_with_checks([lgb.LGBMClassifier(), lgb.LGBMRegressor()])
def test_sklearn_integration(estimator, check):
estimator.set_params(min_child_samples=1, min_data_in_bin=1)
Expand Down

0 comments on commit 9e168f2

Please sign in to comment.