From 8d43f8e1d09929ad304433e6ff4c87b51ac81482 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Thu, 21 Dec 2023 02:34:03 -0800 Subject: [PATCH 1/4] Don't use \\z Signed-off-by: Gera Shegalov --- .../src/main/python/conditionals_test.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/integration_tests/src/main/python/conditionals_test.py b/integration_tests/src/main/python/conditionals_test.py index be0eed865f9..e983d33e1be 100644 --- a/integration_tests/src/main/python/conditionals_test.py +++ b/integration_tests/src/main/python/conditionals_test.py @@ -137,7 +137,7 @@ def test_coalesce(data_gen): s1 = with_cpu_session( lambda spark: gen_scalar(data_gen, force_no_nulls=not isinstance(data_gen, NullGen))) # we want lots of nulls - gen = StructGen([('_c' + str(x), data_gen.copy_special_case(None, weight=1000.0)) + gen = StructGen([('_c' + str(x), data_gen.copy_special_case(None, weight=1000.0)) for x in range(0, num_cols)], nullable=False) command_args = [f.col('_c' + str(x)) for x in range(0, num_cols)] command_args.append(s1) @@ -213,7 +213,7 @@ def test_conditional_with_side_effects_cast(data_gen): ansi_enabled_conf, {'spark.rapids.sql.regexp.enabled': True}) assert_gpu_and_cpu_are_equal_collect( lambda spark : unary_op_df(spark, data_gen).selectExpr( - 'IF(a RLIKE "^[0-9]{1,5}\\z", CAST(a AS INT), 0)'), + r'IF(a RLIKE "^\\d{5,}", CAST(SUBSTR(a, 0, 5) AS INT), 0)'), conf = test_conf) @pytest.mark.parametrize('data_gen', [mk_str_gen('[0-9]{1,9}')], ids=idfn) @@ -222,11 +222,10 @@ def test_conditional_with_side_effects_case_when(data_gen): test_conf=copy_and_update( ansi_enabled_conf, {'spark.rapids.sql.regexp.enabled': True}) assert_gpu_and_cpu_are_equal_collect( - lambda spark : unary_op_df(spark, data_gen).selectExpr( - 'CASE \ - WHEN a RLIKE "^[0-9]{1,3}\\z" THEN CAST(a AS INT) \ - WHEN a RLIKE "^[0-9]{4,6}\\z" THEN CAST(a AS INT) + 123 \ - ELSE -1 END'), + lambda spark : unary_op_df(spark, data_gen).select( + f.when(f.col('a').rlike(r"^[0-9]{1,3}"), f.col('a').substr(0, 1).cast('INT'))\ + .when(f.col('a').rlike(r"^[0-9]{4,6}"), f.col('a').substr(0, 4).cast('INT') + f.lit(123))\ + .otherwise(f.lit(-1))), conf = test_conf) @pytest.mark.parametrize('data_gen', [mk_str_gen('[a-z]{0,3}')], ids=idfn) From 12aefc0a97f02e49165d3239626e5e7918c08064 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Thu, 21 Dec 2023 02:35:42 -0800 Subject: [PATCH 2/4] fixes #9992 From 2e34da1a6256179d78e1e0a283466b93574659c6 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Thu, 21 Dec 2023 13:02:54 -0800 Subject: [PATCH 3/4] Restore SQL Signed-off-by: Gera Shegalov --- integration_tests/src/main/python/conditionals_test.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/integration_tests/src/main/python/conditionals_test.py b/integration_tests/src/main/python/conditionals_test.py index e983d33e1be..00783d2139b 100644 --- a/integration_tests/src/main/python/conditionals_test.py +++ b/integration_tests/src/main/python/conditionals_test.py @@ -222,10 +222,11 @@ def test_conditional_with_side_effects_case_when(data_gen): test_conf=copy_and_update( ansi_enabled_conf, {'spark.rapids.sql.regexp.enabled': True}) assert_gpu_and_cpu_are_equal_collect( - lambda spark : unary_op_df(spark, data_gen).select( - f.when(f.col('a').rlike(r"^[0-9]{1,3}"), f.col('a').substr(0, 1).cast('INT'))\ - .when(f.col('a').rlike(r"^[0-9]{4,6}"), f.col('a').substr(0, 4).cast('INT') + f.lit(123))\ - .otherwise(f.lit(-1))), + lambda spark : unary_op_df(spark, data_gen).selectExpr( + 'CASE \ + WHEN a RLIKE "^[0-9]{6}" THEN CAST(SUBSTR(a, 0, 6) AS INT) + 123 \ + WHEN a RLIKE "^[0-9]{3}" THEN CAST(SUBSTR(a, 0, 3) AS INT) \ + ELSE -1 END'), conf = test_conf) @pytest.mark.parametrize('data_gen', [mk_str_gen('[a-z]{0,3}')], ids=idfn) From 8744364e57a16282bade2b774edbff02ca6113d4 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Thu, 21 Dec 2023 14:16:58 -0800 Subject: [PATCH 4/4] restore [0-9] Signed-off-by: Gera Shegalov --- integration_tests/src/main/python/conditionals_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration_tests/src/main/python/conditionals_test.py b/integration_tests/src/main/python/conditionals_test.py index 751ff1ca8e4..1deffe50672 100644 --- a/integration_tests/src/main/python/conditionals_test.py +++ b/integration_tests/src/main/python/conditionals_test.py @@ -214,7 +214,7 @@ def test_conditional_with_side_effects_cast(data_gen): ansi_enabled_conf, {'spark.rapids.sql.regexp.enabled': True}) assert_gpu_and_cpu_are_equal_collect( lambda spark : unary_op_df(spark, data_gen).selectExpr( - r'IF(a RLIKE "^\\d{5,}", CAST(SUBSTR(a, 0, 5) AS INT), 0)'), + 'IF(a RLIKE "^[0-9]{5,}", CAST(SUBSTR(a, 0, 5) AS INT), 0)'), conf = test_conf) @pytest.mark.parametrize('data_gen', [mk_str_gen('[0-9]{1,9}')], ids=idfn)