Skip to content

Commit

Permalink
Snowflake Dialect pt. 5 (#10528)
Browse files Browse the repository at this point in the history
- Related to #9486
- Batching of expression tests
- Fixing arithmetic by simplifying `%` and `/` operations
- Trying to share some more tables, sometimes improving performance sometimes not really
- Adding sorting and other fixes to tests to make them pass: Missing_Values_Spec, Filter_Spec, Map_Spec
- Fixing warnings related to materialization of Decimal->Integer, thus fixing Join_Spec.
  • Loading branch information
radeusgd authored Jul 16, 2024
1 parent a992c8a commit a30b0c6
Show file tree
Hide file tree
Showing 14 changed files with 212 additions and 274 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2618,7 +2618,9 @@ type DB_Table
or changing Float32 to Float64 are silently ignored.
However, bigger changes, like a Binary type column getting coerced to Mixed - _will_ still be reported.
if expected_type_kind == actual_type_kind then Nothing else
warnings_builder.append (Inexact_Type_Coercion.Warning expected_type actual_type)
# If the reverse was an implicit conversion, undoing it also should not yield warnings:
if self.connection.dialect.get_type_mapping.is_implicit_conversion actual_type expected_type then Nothing else
warnings_builder.append (Inexact_Type_Coercion.Warning expected_type actual_type)

result = max_rows.attach_warning materialized_table
Problem_Behavior.Report_Warning.attach_problems_before warnings_builder.to_vector result
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -495,15 +495,15 @@ floating_point_div = Base_Generator.lift_binary_op "/" x-> y->

## PRIVATE
mod_op = Base_Generator.lift_binary_op "MOD" x-> y->
x ++ " - FLOOR(CAST(" ++ x ++ " AS float) / CAST(" ++ y ++ " AS float)) * " ++ y
x ++ " % " ++ y

## PRIVATE
decimal_div = Base_Generator.lift_binary_op "DECIMAL_DIV" x-> y->
SQL_Builder.code "CAST(" ++ x ++ " AS decimal) / CAST(" ++ y ++ " AS decimal)"
x ++ " / " ++ y

## PRIVATE
decimal_mod = Base_Generator.lift_binary_op "DECIMAL_MOD" x-> y->
x ++ " - FLOOR(CAST(" ++ x ++ " AS decimal) / CAST(" ++ y ++ " AS decimal)) * " ++ y
x ++ " % " ++ y

## PRIVATE
supported_replace_params : Hashset Replace_Params
Expand Down
36 changes: 28 additions & 8 deletions test/Snowflake_Tests/src/Snowflake_Spec.enso
Original file line number Diff line number Diff line change
Expand Up @@ -197,21 +197,36 @@ snowflake_specific_spec suite_builder default_connection db_name setup =
data.t.at "doubles" . value_type . is_floating_point . should_be_true

group_builder.specify "will report true integer types but infer smartly when materialized (small numbers become Integer in-memory, not Decimal)" <|
t1 = table_builder [["small_ints", [1, 2, 3]], ["big_ints", [2^100, 2^110, 1]]] . sort "small_ints"
t1 = table_builder [["small_ints", [1, 2, 3]]]

# Integer types are NUMBER(38, 0) in Snowflake so they are all mapped to decimal
t1.at "small_ints" . value_type . should_equal (Value_Type.Decimal 38 0)
t1.at "big_ints" . value_type . should_equal (Value_Type.Decimal 38 0)
# The fact that Integer is coerced to Decimal is an expected thing in Snowflake, so we don't warn about this.
Problems.assume_no_problems t1

in_memory = t1.read
in_memory1 = t1.read
# But when read back to in-memory, they are inferred as Integer type to avoid the BigInteger overhead
in_memory.at "small_ints" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
in_memory1.at "small_ints" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)

# Again, when materialized the conversion Decimal->Integer is a feature, so it should not cause warning.
Problems.assume_no_problems in_memory1
in_memory1.at "small_ints" . to_vector . should_contain_the_same_elements_as [1, 2, 3]

t2 = table_builder [["big_ints", [2^100, 2^110, 1]]]
t2.at "big_ints" . value_type . should_equal (Value_Type.Decimal 38 0)
# For the decimal column we get a warning because the type changed:
w = Problems.expect_only_warning Inexact_Type_Coercion t2
w.requested_type . should_equal (Value_Type.Decimal Nothing 0)
w.actual_type . should_equal (Value_Type.Decimal 38 0)

in_memory2 = t2.remove_warnings.read
# Unless the values are actually big, then the Decimal type is kept, but its precision is lost, as in-memory BigInteger does not store it.
in_memory.at "big_ints" . value_type . should_equal (Value_Type.Decimal Nothing 0)
in_memory2.at "big_ints" . value_type . should_equal (Value_Type.Decimal Nothing 0)
# The Decimal type loses 'precision' but that is no reason to warn, so we should not see any warnings here:
Problems.assume_no_problems in_memory2

# Check correctness of values
in_memory.at "small_ints" . to_vector . should_equal [1, 2, 3]
in_memory.at "big_ints" . to_vector . should_equal [2^100, 2^110, 1]
in_memory2.at "big_ints" . to_vector . should_contain_the_same_elements_as [2^100, 2^110, 1]

group_builder.specify "correctly handles Decimal and Float types" <|
table_name = Name_Generator.random_name "DecimalFloat"
Expand All @@ -222,6 +237,11 @@ snowflake_specific_spec suite_builder default_connection db_name setup =
t1.at "d3" . value_type . should_equal (Value_Type.Decimal 38 0)
t1.at "f" . value_type . should_equal Value_Type.Float

# We expect warnings about coercing Decimal types
w1 = Problems.expect_warning Inexact_Type_Coercion t1
w1.requested_type . should_equal (Value_Type.Decimal 24 -3)
w1.actual_type . should_equal (Value_Type.Decimal 38 0)

t1.update_rows (Table.new [["d1", [1.2345678910]], ["d2", [12.3456]], ["d3", [1234567.8910]], ["f", [1.5]]]) update_action=Update_Action.Insert . should_succeed

m1 = t1.read
Expand Down Expand Up @@ -593,7 +613,7 @@ add_snowflake_specs suite_builder create_connection_fn db_name =

Common_Spec.add_specs suite_builder prefix create_connection_fn

common_selection = Common_Table_Operations.Main.Test_Selection.Config supports_case_sensitive_columns=True order_by_unicode_normalization_by_default=True allows_mixed_type_comparisons=False fixed_length_text_columns=False different_size_integer_types=False removes_trailing_whitespace_casting_from_char_to_varchar=True supports_decimal_type=True supported_replace_params=supported_replace_params run_advanced_edge_case_tests_by_default=False supports_date_time_without_timezone=True supports_nanoseconds_in_time=True
common_selection = Common_Table_Operations.Main.Test_Selection.Config supports_case_sensitive_columns=True order_by_unicode_normalization_by_default=True allows_mixed_type_comparisons=False fixed_length_text_columns=False different_size_integer_types=False removes_trailing_whitespace_casting_from_char_to_varchar=True supports_decimal_type=True supported_replace_params=supported_replace_params run_advanced_edge_case_tests_by_default=False supports_date_time_without_timezone=True supports_nanoseconds_in_time=True is_nan_comparable=True
aggregate_selection = Common_Table_Operations.Aggregate_Spec.Test_Selection.Config first_last=False first_last_row_order=False aggregation_problems=False text_concat=False
agg_in_memory_table = ((Project_Description.new enso_dev.Table_Tests).data / "data.csv") . read

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ type Names_Data
t = table_builder [["a", [1, 2, 3]], ["b", ['x', 'y', 'z']], ["c", [1.0, 2.0, 3.0]], ["d", [True, False, True]]] . sort "a"
[t]

type Lazy_Ref
Value ~get

add_specs suite_builder setup =
prefix = setup.prefix
Expand Down Expand Up @@ -845,14 +847,17 @@ add_specs suite_builder setup =

case setup.test_selection.is_nan_and_nothing_distinct of
True ->
t = build_sorted_table [["X", [1.5, 3.0, Number.positive_infinity, Number.negative_infinity, Number.nan, Nothing]], ["Y", [1, 2, 3, 4, 5, Nothing]], ["Z", ["1", "2", "3", "4", "5", Nothing]]]
table = Lazy_Ref.Value <|
build_sorted_table [["X", [1.5, 3.0, Number.positive_infinity, Number.negative_infinity, Number.nan, Nothing]], ["Y", [1, 2, 3, 4, 5, Nothing]], ["Z", ["1", "2", "3", "4", "5", Nothing]]]

group_builder.specify "should support is_nan" <|
t = table.get
t.at "X" . is_nan . to_vector . should_equal [False, False, False, False, True, Nothing]
t.at "Y" . is_nan . to_vector . should_equal [False, False, False, False, False, Nothing]
t.at "Z" . is_nan . to_vector . should_fail_with Invalid_Value_Type

group_builder.specify "should support is_infinite" <|
t = table.get
t.at "X" . is_infinite . to_vector . should_equal [False, False, True, True, False, Nothing]
t.at "Y" . is_infinite . to_vector . should_equal [False, False, False, False, False, Nothing]
t.at "Z" . is_infinite . to_vector . should_fail_with Invalid_Value_Type
Expand Down
102 changes: 65 additions & 37 deletions test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso
Original file line number Diff line number Diff line change
Expand Up @@ -13,53 +13,81 @@ from Standard.Test import all


from project.Util import all
from project.Common_Table_Operations.Util import run_default_backend
from project.Common_Table_Operations.Util import run_default_backend, build_sorted_table

main filter=Nothing = run_default_backend (add_specs detailed=True) filter

type Lazy_Ref
Value ~get

add_specs suite_builder detailed setup =
prefix = setup.prefix
table_builder = setup.table_builder

# Create Test Table
column_a = ["A", [1, 2, 3, 4, 5]]
column_b = ["B", [1.0, 1.5, 2.5, 4, 6]]
column_c = ["C", ["Hello", "World", "Hello World!", "", Nothing]]
column_odd = ["Bad] Name", [True, False, True, False, True]]
test_table = table_builder [column_a, column_b, column_c, column_odd]
table_builder = build_sorted_table setup
column_a_description = ["A", [1, 2, 3, 4, 5]]
column_odd_description = ["Bad] Name", [True, False, True, False, True]]
test_table = Lazy_Ref.Value <|
column_b = ["B", [1.0, 1.5, 2.5, 4, 6]]
column_c = ["C", ["Hello", "World", "Hello World!", "", Nothing]]
table_builder [column_a_description, column_b, column_c, column_odd_description]

pending_datetime = if setup.test_selection.date_time.not then "Date/Time operations are not supported by this backend."

epsilon=0.0000000001

tester expression value = Test.with_clue "{expr = {"+expression+"}}: " <|
new_column = test_table.evaluate_expression expression
expected = case value of
_ : Vector -> value
_ -> Vector.new test_table.row_count _->value

values = new_column.to_vector
values . each_with_index i->v->
e = expected.at i
check_results got_values expected =
expected_vec = case expected of
_ : Vector -> expected
_ -> Vector.new test_table.get.row_count _->expected

got_values . each_with_index i->v->
e = expected_vec.at i
match = case e of
_ : Number -> case v of
_ : Number -> e.equals v epsilon
# If the backend returns Decimal for that case, we convert it to Float before comparing:
_ : Decimal -> e.equals v.to_float epsilon
_ -> Test.fail "Expected cell to be a number "+e.pretty+" but got a value of non-numeric type: "+v.pretty
_ -> e == v
if match.not then values.should_equal expected
if match.not then got_values.should_equal expected_vec

tester expression value = Test.with_clue "{expr = {"+expression+"}}: " <|
new_column = test_table.get.evaluate_expression expression
new_column.name . should_equal expression
check_results new_column.to_vector value

specify_test label group_builder action expression_test=tester pending=Nothing = case pending of
specify_test label group_builder action pending=Nothing = case pending of
Nothing ->
case detailed of
True ->
specify_tester expression value =
group_builder.specify (label + ": " + expression) <|
expression_test expression value
tester expression value
action specify_tester
False ->
group_builder.specify label (action expression_test)
# We will batch the operation for better performance.
group_builder.specify label <|
batch = Vector.build batch_builder->
add_to_batch expression value =
new_column = test_table.get.evaluate_expression expression
new_column.name . should_equal expression
batch_builder.append [batch_builder.length, new_column, value]
action add_to_batch

expr_column_name ix = "expr_"+ix.to_text
batched_expression = batch.fold test_table.get acc-> entry->
ix = entry.at 0
new_column = entry.at 1
acc.set new_column as=(expr_column_name ix) set_mode=..Add
batched_expression_without_columns = batched_expression.select_columns "expr_.*".to_regex
materialized = batched_expression_without_columns.read
batch.each entry->
ix = entry.at 0
new_column = entry.at 1
expected = entry.at 2
Test.with_clue "{expr = {"+new_column.name+"}}: " <|
got_vector = materialized.at (expr_column_name ix) . to_vector
check_results got_vector expected

_ -> group_builder.specify label Nothing pending

suite_builder.group prefix+"Expression Integer literals" group_builder->
Expand Down Expand Up @@ -96,8 +124,8 @@ add_specs suite_builder detailed setup =

suite_builder.group prefix+"Expression Text literals" group_builder->
specify_test "should be able to get a Column" group_builder expression_test->
expression_test "[A]" (column_a.at 1)
expression_test "[Bad]] Name]" (column_odd.at 1)
expression_test "[A]" column_a_description.second
expression_test "[Bad]] Name]" column_odd_description.second

group_builder.specify "should sanitize names" <|
t = table_builder [["X", ['\0', 'x', '']]]
Expand Down Expand Up @@ -348,23 +376,23 @@ add_specs suite_builder detailed setup =
expression_test "max([A], [B], 3)" [3, 3, 3, 4, 6]

suite_builder.group prefix+"Expression Errors should be handled" group_builder->
error_tester expression fail_ctor =
test_table.set (expr expression) as="NEW_COL" . should_fail_with Expression_Error
test_table.set (expr expression) as="NEW_COL" . catch . should_be_a fail_ctor
expect_error ctor expr =
expr.should_fail_with Expression_Error
expr.catch.should_be_a ctor

specify_test "should fail with Syntax_Error if badly formed" group_builder expression_test=error_tester expression_test->
expression_test "IIF [A] THEN 1 ELSE 2" Expression_Error.Syntax_Error
expression_test "A + B" Expression_Error.Syntax_Error
expression_test "#2022-31-21#" Expression_Error.Syntax_Error
group_builder.specify "should fail with Syntax_Error if badly formed" <|
expect_error Expression_Error.Syntax_Error <| test_table.get.evaluate_expression "IIF [A] THEN 1 ELSE 2"
expect_error Expression_Error.Syntax_Error <| test_table.get.evaluate_expression "A + B"
expect_error Expression_Error.Syntax_Error <| test_table.get.evaluate_expression "#2022-31-21#"

specify_test "should fail with Unsupported_Operation if not sufficient arguments" group_builder expression_test=error_tester expression_test->
expression_test "unknown([C])" Expression_Error.Unsupported_Operation
group_builder.specify "should fail with Unsupported_Operation if not sufficient arguments" <|
expect_error Expression_Error.Unsupported_Operation <| test_table.get.evaluate_expression "unknown([C])"

specify_test "should fail with Argument_Mismatch if not sufficient arguments" group_builder expression_test=error_tester expression_test->
expression_test "starts_with([C])" Expression_Error.Argument_Mismatch
group_builder.specify "should fail with Argument_Mismatch if not sufficient arguments" <|
expect_error Expression_Error.Argument_Mismatch <| test_table.get.evaluate_expression "starts_with([C])"

specify_test "should fail with Argument_Mismatch if too many arguments" group_builder expression_test=error_tester expression_test->
expression_test "is_empty([C], 'Hello')" Expression_Error.Argument_Mismatch
group_builder.specify "should fail with Argument_Mismatch if too many arguments" <|
expect_error Expression_Error.Argument_Mismatch <| test_table.get.evaluate_expression "is_empty([C], 'Hello')"

suite_builder.group prefix+"Expression Warnings should be reported" group_builder->
group_builder.specify "should report floating point equality" <|
Expand Down
Loading

0 comments on commit a30b0c6

Please sign in to comment.