From ce536575b857d571d7e52fcc30dca15fbaa9c399 Mon Sep 17 00:00:00 2001 From: Alessandro Bellina Date: Thu, 16 Nov 2023 12:41:01 -0600 Subject: [PATCH] Set seed=0 for the delta lake part roundtrip tests (#9741) Signed-off-by: Alessandro Bellina --- integration_tests/pytest.ini | 1 + integration_tests/src/main/python/delta_lake_write_test.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/integration_tests/pytest.ini b/integration_tests/pytest.ini index 02baafce3a2..64660603834 100644 --- a/integration_tests/pytest.ini +++ b/integration_tests/pytest.ini @@ -35,5 +35,6 @@ markers = regexp: Mark a test that tests regular expressions on the GPU (only works when UTF-8 is enabled) large_data_test: Mark tests with large data pyarrow_test: Mark pyarrow tests + datagen_overrides: Mark that allows overriding datagen settings (i.e. seed) for a test filterwarnings = ignore:.*pytest.mark.order.*:_pytest.warning_types.PytestUnknownMarkWarning diff --git a/integration_tests/src/main/python/delta_lake_write_test.py b/integration_tests/src/main/python/delta_lake_write_test.py index f6158624dbe..0edc417573f 100644 --- a/integration_tests/src/main/python/delta_lake_write_test.py +++ b/integration_tests/src/main/python/delta_lake_write_test.py @@ -93,6 +93,7 @@ def test_delta_write_round_trip_unmanaged(spark_tmp_path): @ignore_order @pytest.mark.parametrize("gens", parquet_part_write_gens, ids=idfn) @pytest.mark.skipif(is_before_spark_320(), reason="Delta Lake writes are not supported before Spark 3.2.x") +@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9738') def test_delta_part_write_round_trip_unmanaged(spark_tmp_path, gens): gen_list = [("a", RepeatSeqGen(gens, 10)), ("b", gens)] data_path = spark_tmp_path + "/DELTA_DATA" @@ -110,6 +111,7 @@ def test_delta_part_write_round_trip_unmanaged(spark_tmp_path, gens): @ignore_order @pytest.mark.parametrize("gens", parquet_part_write_gens, ids=idfn) @pytest.mark.skipif(is_before_spark_320(), reason="Delta Lake writes are not supported before Spark 3.2.x") +@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9738') def test_delta_multi_part_write_round_trip_unmanaged(spark_tmp_path, gens): gen_list = [("a", RepeatSeqGen(gens, 10)), ("b", gens), ("c", SetValuesGen(StringType(), ["x", "y", "z"]))] data_path = spark_tmp_path + "/DELTA_DATA"