From 87e683f80ef441f8755163f06a6d838f3f8f8912 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 31 Jan 2022 13:59:10 +0000 Subject: [PATCH] Fix tabular with no categorical fields (#1144) --- CHANGELOG.md | 2 ++ flash/tabular/input.py | 4 ++-- .../test_data_model_integration.py | 19 ++++++++++++++----- .../regression/test_data_model_integration.py | 19 ++++++++++++++----- 4 files changed, 32 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d8d8de713..1a10324eff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -70,6 +70,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed a bug where the `labels` property would return `None` when using `ObjectDetectionData.from_fiftyone` ([#1136](https://github.com/PyTorchLightning/lightning-flash/pull/1136)) +- Fixed a bug where `TabularData` would not work correctly with no categorical variables ([#1144](https://github.com/PyTorchLightning/lightning-flash/pull/1144)) + ### Removed - Removed the `Seq2SeqData` base class (use `TranslationData` or `SummarizationData` directly) ([#1128](https://github.com/PyTorchLightning/lightning-flash/pull/1128)) diff --git a/flash/tabular/input.py b/flash/tabular/input.py index 6221c25c47..7ca846c9f3 100644 --- a/flash/tabular/input.py +++ b/flash/tabular/input.py @@ -120,8 +120,8 @@ def preprocess( num_vars = _to_num_vars_numpy(df, parameters["numerical_fields"]) num_samples = len(df) - cat_vars = np.stack(cat_vars, 1) if len(cat_vars) else np.zeros((num_samples, 0)) - num_vars = np.stack(num_vars, 1) if len(num_vars) else np.zeros((num_samples, 0)) + cat_vars = np.stack(cat_vars, 1) if len(cat_vars) else np.zeros((num_samples, 0), dtype=np.int64) + num_vars = np.stack(num_vars, 1) if len(num_vars) else np.zeros((num_samples, 0), dtype=np.float32) return cat_vars, num_vars diff --git a/tests/tabular/classification/test_data_model_integration.py b/tests/tabular/classification/test_data_model_integration.py index 8cb84cc5e7..3294b8e22d 100644 --- a/tests/tabular/classification/test_data_model_integration.py +++ b/tests/tabular/classification/test_data_model_integration.py @@ -32,16 +32,25 @@ @pytest.mark.skipif(not _TABULAR_TESTING, reason="tabular libraries aren't installed.") @pytest.mark.parametrize( - "backbone", ["tabnet", "tabtransformer", "fttransformer", "autoint", "node", "category_embedding"] + "backbone,fields", + [ + ("tabnet", {"categorical_fields": ["category"], "numerical_fields": ["scalar_a", "scalar_b"]}), + ("tabtransformer", {"categorical_fields": ["category"], "numerical_fields": ["scalar_a", "scalar_b"]}), + ("fttransformer", {"categorical_fields": ["category"], "numerical_fields": ["scalar_a", "scalar_b"]}), + ("autoint", {"categorical_fields": ["category"], "numerical_fields": ["scalar_a", "scalar_b"]}), + ("node", {"categorical_fields": ["category"], "numerical_fields": ["scalar_a", "scalar_b"]}), + ("category_embedding", {"categorical_fields": ["category"], "numerical_fields": ["scalar_a", "scalar_b"]}), + # No categorical / numerical fields + ("tabnet", {"categorical_fields": ["category"]}), + ("tabnet", {"numerical_fields": ["scalar_a", "scalar_b"]}), + ], ) -def test_classification(backbone, tmpdir): - +def test_classification(backbone, fields, tmpdir): train_data_frame = TEST_DF_1.copy() val_data_frame = TEST_DF_1.copy() test_data_frame = TEST_DF_1.copy() data = TabularClassificationData.from_data_frame( - categorical_fields=["category"], - numerical_fields=["scalar_a", "scalar_b"], + **fields, target_fields="label", train_data_frame=train_data_frame, val_data_frame=val_data_frame, diff --git a/tests/tabular/regression/test_data_model_integration.py b/tests/tabular/regression/test_data_model_integration.py index 82582512d4..8aedf75ed2 100644 --- a/tests/tabular/regression/test_data_model_integration.py +++ b/tests/tabular/regression/test_data_model_integration.py @@ -32,16 +32,25 @@ @pytest.mark.skipif(not _TABULAR_TESTING, reason="tabular libraries aren't installed.") @pytest.mark.parametrize( - "backbone", ["tabnet", "tabtransformer", "fttransformer", "autoint", "node", "category_embedding"] + "backbone,fields", + [ + ("tabnet", {"categorical_fields": ["category"], "numerical_fields": ["scalar_a", "scalar_b"]}), + ("tabtransformer", {"categorical_fields": ["category"], "numerical_fields": ["scalar_a", "scalar_b"]}), + ("fttransformer", {"categorical_fields": ["category"], "numerical_fields": ["scalar_a", "scalar_b"]}), + ("autoint", {"categorical_fields": ["category"], "numerical_fields": ["scalar_a", "scalar_b"]}), + ("node", {"categorical_fields": ["category"], "numerical_fields": ["scalar_a", "scalar_b"]}), + ("category_embedding", {"categorical_fields": ["category"], "numerical_fields": ["scalar_a", "scalar_b"]}), + # No categorical / numerical fields + ("tabnet", {"categorical_fields": ["category"]}), + ("tabnet", {"numerical_fields": ["scalar_a", "scalar_b"]}), + ], ) -def test_regression(backbone, tmpdir): - +def test_regression(backbone, fields, tmpdir): train_data_frame = TEST_DF_1.copy() val_data_frame = TEST_DF_1.copy() test_data_frame = TEST_DF_1.copy() data = TabularRegressionData.from_data_frame( - categorical_fields=["category"], - numerical_fields=["scalar_a", "scalar_b"], + **fields, target_field="label", train_data_frame=train_data_frame, val_data_frame=val_data_frame,