From ea540d767e38f7bf00864101805dbae4a575e7ec Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Mon, 4 Dec 2023 11:51:03 -0800 Subject: [PATCH 1/4] Override the seed for version of Spark before 3.4.0 --- integration_tests/src/main/python/conftest.py | 4 +++- integration_tests/src/main/python/map_test.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/integration_tests/src/main/python/conftest.py b/integration_tests/src/main/python/conftest.py index cf21c405899..499ea0c5254 100644 --- a/integration_tests/src/main/python/conftest.py +++ b/integration_tests/src/main/python/conftest.py @@ -154,7 +154,9 @@ def pytest_runtest_setup(item): _inject_oom = item.get_closest_marker('inject_oom') datagen_overrides = item.get_closest_marker('datagen_overrides') if datagen_overrides: - _test_datagen_random_seed = datagen_overrides.kwargs.get('seed', _test_datagen_random_seed) + override_seed = datagen_overrides.kwargs.get('condition', True) + if override_seed: + _test_datagen_random_seed = datagen_overrides.kwargs.get('seed', _test_datagen_random_seed) order = item.get_closest_marker('ignore_order') if order: diff --git a/integration_tests/src/main/python/map_test.py b/integration_tests/src/main/python/map_test.py index 5daeb916e22..1dcb49ec289 100644 --- a/integration_tests/src/main/python/map_test.py +++ b/integration_tests/src/main/python/map_test.py @@ -192,7 +192,7 @@ def query_map_scalar(spark): @allow_non_gpu('WindowLocalExec') -@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9683') +@datagen_overrides(condition=is_before_spark_340(), seed=0, reason="https://issues.apache.org/jira/browse/SPARK-40089") @pytest.mark.parametrize('data_gen', supported_key_map_gens, ids=idfn) @pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653') def test_map_scalars_supported_key_types(data_gen): From 0d6f92d58c3403b707afe18ca840b72f27d4ab81 Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Mon, 4 Dec 2023 12:04:57 -0800 Subject: [PATCH 2/4] Signing off Signed-off-by: Raza Jafri From 348777493d8e1700b68ca0a323db7f7411887a0a Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Mon, 4 Dec 2023 17:06:50 -0800 Subject: [PATCH 3/4] addressed review comments --- integration_tests/src/main/python/map_test.py | 4 +++- integration_tests/src/main/python/spark_session.py | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/integration_tests/src/main/python/map_test.py b/integration_tests/src/main/python/map_test.py index 1dcb49ec289..b121d8802a6 100644 --- a/integration_tests/src/main/python/map_test.py +++ b/integration_tests/src/main/python/map_test.py @@ -192,7 +192,9 @@ def query_map_scalar(spark): @allow_non_gpu('WindowLocalExec') -@datagen_overrides(condition=is_before_spark_340(), seed=0, reason="https://issues.apache.org/jira/browse/SPARK-40089") +@datagen_overrides(condition=is_before_spark_314() + or (not is_before_spark_320() and is_before_spark_323()) + or (not is_before_spark_330() and is_before_spark_331()), seed=0, reason="https://issues.apache.org/jira/browse/SPARK-40089") @pytest.mark.parametrize('data_gen', supported_key_map_gens, ids=idfn) @pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653') def test_map_scalars_supported_key_types(data_gen): diff --git a/integration_tests/src/main/python/spark_session.py b/integration_tests/src/main/python/spark_session.py index aa27503c8eb..57602a9bdca 100644 --- a/integration_tests/src/main/python/spark_session.py +++ b/integration_tests/src/main/python/spark_session.py @@ -128,12 +128,18 @@ def is_before_spark_312(): def is_before_spark_313(): return spark_version() < "3.1.3" +def is_before_spark_314(): + return spark_version() < "3.1.4" + def is_before_spark_320(): return spark_version() < "3.2.0" def is_before_spark_322(): return spark_version() < "3.2.2" +def is_before_spark_323(): + return spark_version() < "3.2.3" + def is_before_spark_330(): return spark_version() < "3.3.0" From a32a6b3372a84a9a8ec37dd2311c18753fcd20a9 Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Tue, 5 Dec 2023 10:00:40 -0800 Subject: [PATCH 4/4] made seed param mandatory --- integration_tests/src/main/python/conftest.py | 7 ++++++- integration_tests/src/main/python/map_test.py | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/integration_tests/src/main/python/conftest.py b/integration_tests/src/main/python/conftest.py index 499ea0c5254..a9b2f6146ec 100644 --- a/integration_tests/src/main/python/conftest.py +++ b/integration_tests/src/main/python/conftest.py @@ -154,9 +154,14 @@ def pytest_runtest_setup(item): _inject_oom = item.get_closest_marker('inject_oom') datagen_overrides = item.get_closest_marker('datagen_overrides') if datagen_overrides: + try: + seed = datagen_overrides.kwargs["seed"] + except KeyError: + raise Exception("datagen_overrides requires an override seed value") + override_seed = datagen_overrides.kwargs.get('condition', True) if override_seed: - _test_datagen_random_seed = datagen_overrides.kwargs.get('seed', _test_datagen_random_seed) + _test_datagen_random_seed = seed order = item.get_closest_marker('ignore_order') if order: diff --git a/integration_tests/src/main/python/map_test.py b/integration_tests/src/main/python/map_test.py index b121d8802a6..550e4302210 100644 --- a/integration_tests/src/main/python/map_test.py +++ b/integration_tests/src/main/python/map_test.py @@ -192,9 +192,9 @@ def query_map_scalar(spark): @allow_non_gpu('WindowLocalExec') -@datagen_overrides(condition=is_before_spark_314() +@datagen_overrides(seed=0, condition=is_before_spark_314() or (not is_before_spark_320() and is_before_spark_323()) - or (not is_before_spark_330() and is_before_spark_331()), seed=0, reason="https://issues.apache.org/jira/browse/SPARK-40089") + or (not is_before_spark_330() and is_before_spark_331()), reason="https://issues.apache.org/jira/browse/SPARK-40089") @pytest.mark.parametrize('data_gen', supported_key_map_gens, ids=idfn) @pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653') def test_map_scalars_supported_key_types(data_gen):