diff --git a/src/snowflake/snowpark/functions.py b/src/snowflake/snowpark/functions.py index 1e795024ab..5118861e7c 100644 --- a/src/snowflake/snowpark/functions.py +++ b/src/snowflake/snowpark/functions.py @@ -3803,7 +3803,7 @@ def _concat_ws_ignore_nulls(sep: str, *cols: ColumnOrName) -> Column: """ # TODO: SNOW-1831917 create ast columns = [_to_col_if_str(c, "_concat_ws_ignore_nulls") for c in cols] - names = ",".join([c.get_name() for c in columns]) + names = ",".join([c.get_name() or f"COL{i}" for i, c in enumerate(columns)]) # The implementation of this function is as follows with example input of # sep = "," and row = [a, NULL], b, NULL, c: @@ -3815,7 +3815,7 @@ def _concat_ws_ignore_nulls(sep: str, *cols: ColumnOrName) -> Column: # [a, NULL, b, c] # 4. Filter out nulls (array_remove_nulls). # [a, b, c] - # 5. Concatenate the non-null values into a single string (concat_strings_with_sep). + # 5. Concatenate the non-null values into a single string (array_to_string). # "a,b,c" def array_remove_nulls(col: Column) -> Column: @@ -3824,21 +3824,8 @@ def array_remove_nulls(col: Column) -> Column: col, sql_expr("x -> NOT IS_NULL_VALUE(x)", _emit_ast=False) ) - def concat_strings_with_sep(col: Column) -> Column: - """ - Expects an array of strings and returns a single string - with the values concatenated with the separator. - """ - return substring( - builtin("reduce", _emit_ast=False)( - col, lit(""), sql_expr(f"(l, r) -> l || '{sep}' || r", _emit_ast=False) - ), - len(sep) + 1, - _emit_ast=False, - ) - - return concat_strings_with_sep( - array_remove_nulls( + return array_to_string( + array=array_remove_nulls( array_flatten( array_construct_compact( *[c.cast(ArrayType(), _emit_ast=False) for c in columns], @@ -3846,7 +3833,9 @@ def concat_strings_with_sep(col: Column) -> Column: ), _emit_ast=False, ) - ) + ), + separator=lit(sep, _emit_ast=False), + _emit_ast=False, ).alias(f"CONCAT_WS_IGNORE_NULLS('{sep}', {names})", _emit_ast=False) diff --git a/src/snowflake/snowpark/mock/_nop_plan.py b/src/snowflake/snowpark/mock/_nop_plan.py index 4a507967b0..b20e742954 100644 --- a/src/snowflake/snowpark/mock/_nop_plan.py +++ b/src/snowflake/snowpark/mock/_nop_plan.py @@ -152,9 +152,15 @@ def resolve_attributes( elif isinstance(plan, TableFunctionJoin): left_attributes = resolve_attributes(plan.children[0], session) - output_schema = session.udtf.get_udtf( - plan.table_function.func_name - )._output_schema + try: + output_schema = session.udtf.get_udtf( + plan.table_function.func_name + )._output_schema + except KeyError: + if session is not None and session._conn._suppress_not_implemented_error: + return [] + else: + raise if isinstance(output_schema, PandasDataFrameType): right_attributes = [ Attribute(col_name, col_type, True) diff --git a/tests/ast/data/DataFrame.join_table_function.test b/tests/ast/data/DataFrame.join_table_function.test new file mode 100644 index 0000000000..b480f22512 --- /dev/null +++ b/tests/ast/data/DataFrame.join_table_function.test @@ -0,0 +1,275 @@ +## TEST CASE + +df1 = session.create_dataframe( + [ + ["foo", "The quick brown fox jumps over the lazy dog"], + ["bar", "Lorem ipsum dolor sit amet, consectetur adipiscing elit"], + ], + schema=["name", "text"], +) + +df2 = df1.join_table_function("STRTOK_SPLIT_TO_TABLE", df1["text"], lit(" ")) + +# The following part of the tests is impossible to execute in the mock environment. + +# tokenize_text = ( +# call_table_function("STRTOK_SPLIT_TO_TABLE", df1["text"], lit(" ")) +# .over(partition_by="name") +# .over(order_by="text") +# .alias("ignored1", "ignored2", "ignored3") +# .alias("original_row_number", "token_number", "token") +# ) + +# df3 = df1.join_table_function(tokenize_text) + +# tokenize_text_ref = table_function("STRTOK_SPLIT_TO_TABLE") +# df4 = df1.join_table_function( +# tokenize_text_ref(df1["text"], lit(" ")) +# .over(partition_by="name", order_by="text") +# .alias("original_row_number", "token_number", "token") +# ) + +# df5 = df1.join_table_function( +# tokenize_text_ref(df1["text"], lit(" ,")) +# .over(partition_by="name", order_by="text") +# .alias("row_number", "token_number", "token") +# ) + +## EXPECTED UNPARSER OUTPUT + +df1 = session.create_dataframe([["foo", "The quick brown fox jumps over the lazy dog"], ["bar", "Lorem ipsum dolor sit amet, consectetur adipiscing elit"]], schema=["name", "text"]) + +df2 = df1.join_table_function("STRTOK_SPLIT_TO_TABLE", df1["text"], lit(" ")) + +## EXPECTED ENCODED AST + +interned_value_table { + string_values { + key: -1 + } + string_values { + key: 2 + value: "SRC_POSITION_TEST_MODE" + } +} +body { + assign { + expr { + create_dataframe { + data { + dataframe_data__list { + vs { + list_val { + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + vs { + string_val { + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + v: "foo" + } + } + vs { + string_val { + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + v: "The quick brown fox jumps over the lazy dog" + } + } + } + } + vs { + list_val { + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + vs { + string_val { + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + v: "bar" + } + } + vs { + string_val { + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + v: "Lorem ipsum dolor sit amet, consectetur adipiscing elit" + } + } + } + } + } + } + schema { + dataframe_schema__list { + vs: "name" + vs: "text" + } + } + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + } + } + symbol { + value: "df1" + } + uid: 1 + var_id { + bitfield1: 1 + } + } +} +body { + assign { + expr { + dataframe_join_table_function { + fn { + apply_expr { + fn { + indirect_table_fn_name_ref { + name { + name { + name_flat { + name: "STRTOK_SPLIT_TO_TABLE" + } + } + } + } + } + pos_args { + dataframe_col { + col_name: "text" + df { + dataframe_ref { + id { + bitfield1: 1 + } + } + } + src { + end_column: 74 + end_line: 33 + file: 2 + start_column: 63 + start_line: 33 + } + } + } + pos_args { + apply_expr { + fn { + builtin_fn { + name { + name { + name_flat { + name: "lit" + } + } + } + } + } + pos_args { + string_val { + src { + end_column: 84 + end_line: 33 + file: 2 + start_column: 76 + start_line: 33 + } + v: " " + } + } + src { + end_column: 84 + end_line: 33 + file: 2 + start_column: 76 + start_line: 33 + } + } + } + src { + end_column: 85 + end_line: 33 + file: 2 + start_column: 14 + start_line: 33 + } + } + } + lhs { + dataframe_ref { + id { + bitfield1: 1 + } + } + } + src { + end_column: 85 + end_line: 33 + file: 2 + start_column: 14 + start_line: 33 + } + } + } + symbol { + value: "df2" + } + uid: 2 + var_id { + bitfield1: 2 + } + } +} +client_ast_version: 1 +client_language { + python_language { + version { + label: "final" + major: 3 + minor: 9 + patch: 1 + } + } +} +client_version { + major: 1 + minor: 29 +} diff --git a/tests/ast/data/DataFrame.join_table_function.test.DISABLED b/tests/ast/data/DataFrame.join_table_function.test.DISABLED deleted file mode 100644 index 88a5bf25a1..0000000000 --- a/tests/ast/data/DataFrame.join_table_function.test.DISABLED +++ /dev/null @@ -1,40 +0,0 @@ -## TEST CASE - -df1 = session.create_dataframe( - [ - ["foo", "The quick brown fox jumps over the lazy dog"], - ["bar", "Lorem ipsum dolor sit amet, consectetur adipiscing elit"], - ], - schema=["name", "text"], -) - -df2 = df1.join_table_function("STRTOK_SPLIT_TO_TABLE", df1["text"], lit(" ")) - -tokenize_text = ( - call_table_function("STRTOK_SPLIT_TO_TABLE", df1["text"], lit(" ")) - .over(partition_by="name") - .over(order_by="text") - .alias("ignored1", "ignored2", "ignored3") - .alias("original_row_number", "token_number", "token") -) - -df3 = df1.join_table_function(tokenize_text) - -df3 = df1.join_table_function(tokenize_text) - -tokenize_text_ref = table_function("STRTOK_SPLIT_TO_TABLE") -df4 = df1.join_table_function( - tokenize_text_ref(df1["text"], lit(" ")) - .over(partition_by="name", order_by="text") - .alias("original_row_number", "token_number", "token") -) - -df5 = df1.join_table_function( - tokenize_text_ref(df1["text"], lit(" ,")) - .over(partition_by="name", order_by="text") - .alias("row_number", "token_number", "token") -) - -## EXPECTED UNPARSER OUTPUT - -## EXPECTED ENCODED AST diff --git a/tests/ast/data/Dataframe.to_snowpark_pandas.test.DISABLED b/tests/ast/data/Dataframe.to_snowpark_pandas.test similarity index 90% rename from tests/ast/data/Dataframe.to_snowpark_pandas.test.DISABLED rename to tests/ast/data/Dataframe.to_snowpark_pandas.test index a2a422998c..71fa61150c 100644 --- a/tests/ast/data/Dataframe.to_snowpark_pandas.test.DISABLED +++ b/tests/ast/data/Dataframe.to_snowpark_pandas.test @@ -37,10 +37,10 @@ interned_value_table { body { assign { expr { - sp_table { + table { name { name { - sp_name_flat { + name_flat { name: "table1" } } @@ -53,7 +53,7 @@ body { start_line: 25 } variant { - sp_session_table: true + session_table: true } } } @@ -69,9 +69,9 @@ body { body { assign { expr { - sp_to_snowpark_pandas { + to_snowpark_pandas { df { - sp_dataframe_ref { + dataframe_ref { id { bitfield1: 1 } @@ -98,9 +98,9 @@ body { body { assign { expr { - sp_to_snowpark_pandas { + to_snowpark_pandas { df { - sp_dataframe_ref { + dataframe_ref { id { bitfield1: 1 } @@ -130,13 +130,13 @@ body { body { assign { expr { - sp_to_snowpark_pandas { + to_snowpark_pandas { columns { list: "B" list: "A" } df { - sp_dataframe_ref { + dataframe_ref { id { bitfield1: 1 } @@ -163,13 +163,13 @@ body { body { assign { expr { - sp_to_snowpark_pandas { + to_snowpark_pandas { columns { list: "C" list: "B" } df { - sp_dataframe_ref { + dataframe_ref { id { bitfield1: 1 } @@ -209,5 +209,5 @@ client_language { } client_version { major: 1 - minor: 27 + minor: 29 } diff --git a/tests/ast/data/Session.create_dataframe_from_pandas.test b/tests/ast/data/Session.create_dataframe_from_pandas.test new file mode 100644 index 0000000000..c358985250 --- /dev/null +++ b/tests/ast/data/Session.create_dataframe_from_pandas.test @@ -0,0 +1,183 @@ +## TEST CASE + +from snowflake.snowpark import Row + +from snowflake.snowpark.types import IntegerType, StringType, StructField + +import pandas as pd + +df = session.create_dataframe(pd.DataFrame([(1, 2, 3, 4)], columns=["a", "b", "c", "d"])) + +# Snowpark ignores provided schema for pandas Dataframe. +df2 = session.create_dataframe(pd.DataFrame([(99, 98)], columns=["a", "b"]), schema=["b", "c"]) + +## EXPECTED UNPARSER OUTPUT + +df = session.create_dataframe(pandas.DataFrame()), schema=StructType([StructField("\"a\"", LongType(), nullable=True), StructField("\"b\"", LongType(), nullable=True), StructField("\"c\"", LongType(), nullable=True), StructField("\"d\"", LongType(), nullable=True)], structured=False)) + +df2 = session.create_dataframe(pandas.DataFrame()), schema=StructType([StructField("\"a\"", LongType(), nullable=True), StructField("\"b\"", LongType(), nullable=True)], structured=False)) + +## EXPECTED ENCODED AST + +interned_value_table { + string_values { + key: -1 + } + string_values { + key: 2 + value: "SRC_POSITION_TEST_MODE" + } +} +body { + assign { + expr { + create_dataframe { + data { + dataframe_data__pandas { + v { + } + } + } + schema { + dataframe_schema__struct { + v { + fields { + list { + column_identifier { + column_name { + name: "\"a\"" + } + } + data_type { + long_type: true + } + nullable: true + } + list { + column_identifier { + column_name { + name: "\"b\"" + } + } + data_type { + long_type: true + } + nullable: true + } + list { + column_identifier { + column_name { + name: "\"c\"" + } + } + data_type { + long_type: true + } + nullable: true + } + list { + column_identifier { + column_name { + name: "\"d\"" + } + } + data_type { + long_type: true + } + nullable: true + } + } + } + } + } + src { + end_column: 97 + end_line: 31 + file: 2 + start_column: 13 + start_line: 31 + } + } + } + symbol { + value: "df" + } + uid: 1 + var_id { + bitfield1: 1 + } + } +} +body { + assign { + expr { + create_dataframe { + data { + dataframe_data__pandas { + v { + } + } + } + schema { + dataframe_schema__struct { + v { + fields { + list { + column_identifier { + column_name { + name: "\"a\"" + } + } + data_type { + long_type: true + } + nullable: true + } + list { + column_identifier { + column_name { + name: "\"b\"" + } + } + data_type { + long_type: true + } + nullable: true + } + } + } + } + } + src { + end_column: 103 + end_line: 34 + file: 2 + start_column: 14 + start_line: 34 + } + } + } + symbol { + value: "df2" + } + uid: 2 + var_id { + bitfield1: 2 + } + } +} +client_ast_version: 1 +client_language { + python_language { + version { + label: "final" + major: 3 + minor: 9 + patch: 1 + } + } +} +client_version { + major: 1 + minor: 29 +} diff --git a/tests/ast/data/Session.create_dataframe_from_pandas.test.DISABLED b/tests/ast/data/Session.create_dataframe_from_pandas.test.DISABLED deleted file mode 100644 index a7d7aeb654..0000000000 --- a/tests/ast/data/Session.create_dataframe_from_pandas.test.DISABLED +++ /dev/null @@ -1,20 +0,0 @@ -## TEST CASE - -from snowflake.snowpark import Row - -from snowflake.snowpark.types import IntegerType, StringType, StructField - -import pandas as pd - -df = session.create_dataframe(pd.DataFrame([(1, 2, 3, 4)], columns=["a", "b", "c", "d"])) - -# Snowpark ignores provided schema for pandas Dataframe. -df2 = session.create_dataframe(pd.DataFrame([(99, 98)], columns=["a", "b"]), schema=["b", "c"]) - -## EXPECTED ENCODED AST - -## EXPECTED UNPARSER OUTPUT - -res1 = session.create_dataframe(pandas.DataFrame()) - -res2 = session.create_dataframe(pandas.DataFrame()) diff --git a/tests/ast/data/functions.table_functions.test b/tests/ast/data/functions.table_functions.test new file mode 100644 index 0000000000..d8d5a7e4e1 --- /dev/null +++ b/tests/ast/data/functions.table_functions.test @@ -0,0 +1,1987 @@ +## TEST CASE + +df1 = session.create_dataframe( + [ + [1, [1, 2, 3], {"Ashi Garami": "Single Leg X"}, "Kimura"], + [2, [11, 22], {"Sankaku": "Triangle"}, "Coffee"], + ], + schema=["idx", "lists", "maps", "strs"], +) +df2 = df1.select(df1.idx, explode(df1.lists)).sort(col("idx")) + +df3 = df1.select(explode("maps").as_("primo", "secundo")).sort(col("primo")) + +df4 = session.create_dataframe( + [ + [1, [1, 2, 3], {"Ashi Garami": ["X", "Leg Entanglement"]}, "Kimura"], + [2, [11, 22], {"Sankaku": ["Triangle"]}, "Coffee"], + [3, [], {}, "empty"], + ], + schema=["idx", "lists", "maps", "strs"], +) +df5 = ( + df4.select(df4.idx, flatten(df4.lists, outer=True)) + .select("idx", "value") + .sort("idx") +) + +df6 = ( + df4.select(df4.strs, flatten(df4.maps, recursive=True)) + .select("strs", "key", "value") + .where("key is not NULL") + .sort("strs") +) + +df7 = ( + df4.select(df4.strs, flatten(df4.maps, recursive=True)) + .select("strs", "key", "value") + .where("key is NULL") + .sort("strs", "value") +) + +## EXPECTED UNPARSER OUTPUT + +df1 = session.create_dataframe([[1, [1, 2, 3], {"Ashi Garami": "Single Leg X"}, "Kimura"], [2, [11, 22], {"Sankaku": "Triangle"}, "Coffee"]], schema=["idx", "lists", "maps", "strs"]) + +df2 = df1.select(df1["idx"], (explode(df1["lists"]))) + +df2 = df2.sort(col("idx")) + +df3 = df1.select((explode("maps").alias("primo", "secundo"))) + +df3 = df3.sort(col("primo")) + +df4 = session.create_dataframe([[1, [1, 2, 3], {"Ashi Garami": ["X", "Leg Entanglement"]}, "Kimura"], [2, [11, 22], {"Sankaku": ["Triangle"]}, "Coffee"], [3, [], {}, "empty"]], schema=["idx", "lists", "maps", "strs"]) + +res6 = df4.select(df4["idx"], (flatten(df4["lists"], "", True, False, "both"))).select("idx", "value").sort("idx") + +res11 = df4.select(df4["strs"], (flatten(df4["maps"], "", False, True, "both"))).select("strs", "key", "value").filter("key is not NULL").sort("strs") + +res16 = df4.select(df4["strs"], (flatten(df4["maps"], "", False, True, "both"))).select("strs", "key", "value").filter("key is NULL").sort("strs", "value") + +## EXPECTED ENCODED AST + +interned_value_table { + string_values { + key: -1 + } + string_values { + key: 2 + value: "SRC_POSITION_TEST_MODE" + } +} +body { + assign { + expr { + create_dataframe { + data { + dataframe_data__list { + vs { + list_val { + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + vs { + int64_val { + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + v: 1 + } + } + vs { + list_val { + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + vs { + int64_val { + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + v: 1 + } + } + vs { + int64_val { + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + v: 2 + } + } + vs { + int64_val { + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + v: 3 + } + } + } + } + vs { + seq_map_val { + kvs { + vs { + string_val { + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + v: "Ashi Garami" + } + } + vs { + string_val { + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + v: "Single Leg X" + } + } + } + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + } + } + vs { + string_val { + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + v: "Kimura" + } + } + } + } + vs { + list_val { + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + vs { + int64_val { + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + v: 2 + } + } + vs { + list_val { + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + vs { + int64_val { + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + v: 11 + } + } + vs { + int64_val { + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + v: 22 + } + } + } + } + vs { + seq_map_val { + kvs { + vs { + string_val { + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + v: "Sankaku" + } + } + vs { + string_val { + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + v: "Triangle" + } + } + } + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + } + } + vs { + string_val { + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + v: "Coffee" + } + } + } + } + } + } + schema { + dataframe_schema__list { + vs: "idx" + vs: "lists" + vs: "maps" + vs: "strs" + } + } + src { + end_column: 9 + end_line: 31 + file: 2 + start_column: 14 + start_line: 25 + } + } + } + symbol { + value: "df1" + } + uid: 1 + var_id { + bitfield1: 1 + } + } +} +body { + assign { + expr { + apply_expr { + fn { + builtin_fn { + name { + name { + name_flat { + name: "explode" + } + } + } + } + } + pos_args { + dataframe_col { + col_name: "lists" + df { + dataframe_ref { + id { + bitfield1: 1 + } + } + } + src { + end_column: 51 + end_line: 32 + file: 2 + start_column: 42 + start_line: 32 + } + } + } + src { + end_column: 52 + end_line: 32 + file: 2 + start_column: 34 + start_line: 32 + } + } + } + symbol { + } + uid: 2 + var_id { + bitfield1: 2 + } + } +} +body { + assign { + expr { + dataframe_select__columns { + cols { + args { + dataframe_col { + col_name: "idx" + df { + dataframe_ref { + id { + bitfield1: 1 + } + } + } + src { + end_column: 32 + end_line: 32 + file: 2 + start_column: 25 + start_line: 32 + } + } + } + args { + apply_expr { + fn { + indirect_table_fn_id_ref { + id { + bitfield1: 2 + } + } + } + src { + end_column: 53 + end_line: 32 + file: 2 + start_column: 14 + start_line: 32 + } + } + } + variadic: true + } + df { + dataframe_ref { + id { + bitfield1: 1 + } + } + } + src { + end_column: 53 + end_line: 32 + file: 2 + start_column: 14 + start_line: 32 + } + } + } + symbol { + value: "df2" + } + uid: 3 + var_id { + bitfield1: 3 + } + } +} +body { + assign { + expr { + dataframe_sort { + cols { + args { + apply_expr { + fn { + builtin_fn { + name { + name { + name_flat { + name: "col" + } + } + } + } + } + pos_args { + string_val { + src { + end_column: 69 + end_line: 32 + file: 2 + start_column: 59 + start_line: 32 + } + v: "idx" + } + } + src { + end_column: 69 + end_line: 32 + file: 2 + start_column: 59 + start_line: 32 + } + } + } + variadic: true + } + df { + dataframe_ref { + id { + bitfield1: 3 + } + } + } + src { + end_column: 70 + end_line: 32 + file: 2 + start_column: 14 + start_line: 32 + } + } + } + symbol { + value: "df2" + } + uid: 4 + var_id { + bitfield1: 4 + } + } +} +body { + assign { + expr { + table_fn_call_alias { + aliases { + args { + string_val { + src { + end_column: 64 + end_line: 34 + file: 2 + start_column: 25 + start_line: 34 + } + v: "primo" + } + } + args { + string_val { + src { + end_column: 64 + end_line: 34 + file: 2 + start_column: 25 + start_line: 34 + } + v: "secundo" + } + } + variadic: true + } + lhs { + apply_expr { + fn { + builtin_fn { + name { + name { + name_flat { + name: "explode" + } + } + } + } + } + pos_args { + string_val { + src { + end_column: 40 + end_line: 34 + file: 2 + start_column: 25 + start_line: 34 + } + v: "maps" + } + } + src { + end_column: 40 + end_line: 34 + file: 2 + start_column: 25 + start_line: 34 + } + } + } + src { + end_column: 64 + end_line: 34 + file: 2 + start_column: 25 + start_line: 34 + } + } + } + symbol { + } + uid: 5 + var_id { + bitfield1: 5 + } + } +} +body { + assign { + expr { + dataframe_select__columns { + cols { + args { + apply_expr { + fn { + indirect_table_fn_id_ref { + id { + bitfield1: 5 + } + } + } + src { + end_column: 65 + end_line: 34 + file: 2 + start_column: 14 + start_line: 34 + } + } + } + variadic: true + } + df { + dataframe_ref { + id { + bitfield1: 1 + } + } + } + src { + end_column: 65 + end_line: 34 + file: 2 + start_column: 14 + start_line: 34 + } + } + } + symbol { + value: "df3" + } + uid: 6 + var_id { + bitfield1: 6 + } + } +} +body { + assign { + expr { + dataframe_sort { + cols { + args { + apply_expr { + fn { + builtin_fn { + name { + name { + name_flat { + name: "col" + } + } + } + } + } + pos_args { + string_val { + src { + end_column: 83 + end_line: 34 + file: 2 + start_column: 71 + start_line: 34 + } + v: "primo" + } + } + src { + end_column: 83 + end_line: 34 + file: 2 + start_column: 71 + start_line: 34 + } + } + } + variadic: true + } + df { + dataframe_ref { + id { + bitfield1: 6 + } + } + } + src { + end_column: 84 + end_line: 34 + file: 2 + start_column: 14 + start_line: 34 + } + } + } + symbol { + value: "df3" + } + uid: 7 + var_id { + bitfield1: 7 + } + } +} +body { + assign { + expr { + create_dataframe { + data { + dataframe_data__list { + vs { + list_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + vs { + int64_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + v: 1 + } + } + vs { + list_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + vs { + int64_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + v: 1 + } + } + vs { + int64_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + v: 2 + } + } + vs { + int64_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + v: 3 + } + } + } + } + vs { + seq_map_val { + kvs { + vs { + string_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + v: "Ashi Garami" + } + } + vs { + list_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + vs { + string_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + v: "X" + } + } + vs { + string_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + v: "Leg Entanglement" + } + } + } + } + } + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + } + } + vs { + string_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + v: "Kimura" + } + } + } + } + vs { + list_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + vs { + int64_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + v: 2 + } + } + vs { + list_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + vs { + int64_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + v: 11 + } + } + vs { + int64_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + v: 22 + } + } + } + } + vs { + seq_map_val { + kvs { + vs { + string_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + v: "Sankaku" + } + } + vs { + list_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + vs { + string_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + v: "Triangle" + } + } + } + } + } + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + } + } + vs { + string_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + v: "Coffee" + } + } + } + } + vs { + list_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + vs { + int64_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + v: 3 + } + } + vs { + list_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + } + } + vs { + seq_map_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + } + } + vs { + string_val { + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + v: "empty" + } + } + } + } + } + } + schema { + dataframe_schema__list { + vs: "idx" + vs: "lists" + vs: "maps" + vs: "strs" + } + } + src { + end_column: 9 + end_line: 43 + file: 2 + start_column: 14 + start_line: 36 + } + } + } + symbol { + value: "df4" + } + uid: 8 + var_id { + bitfield1: 8 + } + } +} +body { + assign { + expr { + apply_expr { + fn { + builtin_fn { + name { + name { + name_flat { + name: "flatten" + } + } + } + } + } + pos_args { + dataframe_col { + col_name: "lists" + df { + dataframe_ref { + id { + bitfield1: 8 + } + } + } + src { + end_column: 49 + end_line: 45 + file: 2 + start_column: 40 + start_line: 45 + } + } + } + pos_args { + string_val { + src { + end_column: 62 + end_line: 45 + file: 2 + start_column: 32 + start_line: 45 + } + } + } + pos_args { + bool_val { + src { + end_column: 62 + end_line: 45 + file: 2 + start_column: 32 + start_line: 45 + } + v: true + } + } + pos_args { + bool_val { + src { + end_column: 62 + end_line: 45 + file: 2 + start_column: 32 + start_line: 45 + } + } + } + pos_args { + string_val { + src { + end_column: 62 + end_line: 45 + file: 2 + start_column: 32 + start_line: 45 + } + v: "both" + } + } + src { + end_column: 62 + end_line: 45 + file: 2 + start_column: 32 + start_line: 45 + } + } + } + symbol { + } + uid: 9 + var_id { + bitfield1: 9 + } + } +} +body { + assign { + expr { + dataframe_select__columns { + cols { + args { + dataframe_col { + col_name: "idx" + df { + dataframe_ref { + id { + bitfield1: 8 + } + } + } + src { + end_column: 30 + end_line: 45 + file: 2 + start_column: 23 + start_line: 45 + } + } + } + args { + apply_expr { + fn { + indirect_table_fn_id_ref { + id { + bitfield1: 9 + } + } + } + src { + end_column: 63 + end_line: 45 + file: 2 + start_column: 12 + start_line: 45 + } + } + } + variadic: true + } + df { + dataframe_ref { + id { + bitfield1: 8 + } + } + } + src { + end_column: 63 + end_line: 45 + file: 2 + start_column: 12 + start_line: 45 + } + } + } + symbol { + } + uid: 10 + var_id { + bitfield1: 10 + } + } +} +body { + assign { + expr { + dataframe_select__columns { + cols { + args { + string_val { + src { + end_column: 35 + end_line: 46 + file: 2 + start_column: 13 + start_line: 46 + } + v: "idx" + } + } + args { + string_val { + src { + end_column: 35 + end_line: 46 + file: 2 + start_column: 13 + start_line: 46 + } + v: "value" + } + } + variadic: true + } + df { + dataframe_ref { + id { + bitfield1: 10 + } + } + } + src { + end_column: 35 + end_line: 46 + file: 2 + start_column: 13 + start_line: 46 + } + } + } + symbol { + } + uid: 11 + var_id { + bitfield1: 11 + } + } +} +body { + assign { + expr { + dataframe_sort { + cols { + args { + string_val { + src { + end_column: 24 + end_line: 47 + file: 2 + start_column: 13 + start_line: 47 + } + v: "idx" + } + } + variadic: true + } + df { + dataframe_ref { + id { + bitfield1: 11 + } + } + } + src { + end_column: 24 + end_line: 47 + file: 2 + start_column: 13 + start_line: 47 + } + } + } + symbol { + } + uid: 12 + var_id { + bitfield1: 12 + } + } +} +body { + assign { + expr { + apply_expr { + fn { + builtin_fn { + name { + name { + name_flat { + name: "flatten" + } + } + } + } + } + pos_args { + dataframe_col { + col_name: "maps" + df { + dataframe_ref { + id { + bitfield1: 8 + } + } + } + src { + end_column: 49 + end_line: 51 + file: 2 + start_column: 41 + start_line: 51 + } + } + } + pos_args { + string_val { + src { + end_column: 66 + end_line: 51 + file: 2 + start_column: 33 + start_line: 51 + } + } + } + pos_args { + bool_val { + src { + end_column: 66 + end_line: 51 + file: 2 + start_column: 33 + start_line: 51 + } + } + } + pos_args { + bool_val { + src { + end_column: 66 + end_line: 51 + file: 2 + start_column: 33 + start_line: 51 + } + v: true + } + } + pos_args { + string_val { + src { + end_column: 66 + end_line: 51 + file: 2 + start_column: 33 + start_line: 51 + } + v: "both" + } + } + src { + end_column: 66 + end_line: 51 + file: 2 + start_column: 33 + start_line: 51 + } + } + } + symbol { + } + uid: 13 + var_id { + bitfield1: 13 + } + } +} +body { + assign { + expr { + dataframe_select__columns { + cols { + args { + dataframe_col { + col_name: "strs" + df { + dataframe_ref { + id { + bitfield1: 8 + } + } + } + src { + end_column: 31 + end_line: 51 + file: 2 + start_column: 23 + start_line: 51 + } + } + } + args { + apply_expr { + fn { + indirect_table_fn_id_ref { + id { + bitfield1: 13 + } + } + } + src { + end_column: 67 + end_line: 51 + file: 2 + start_column: 12 + start_line: 51 + } + } + } + variadic: true + } + df { + dataframe_ref { + id { + bitfield1: 8 + } + } + } + src { + end_column: 67 + end_line: 51 + file: 2 + start_column: 12 + start_line: 51 + } + } + } + symbol { + } + uid: 14 + var_id { + bitfield1: 14 + } + } +} +body { + assign { + expr { + dataframe_select__columns { + cols { + args { + string_val { + src { + end_column: 43 + end_line: 52 + file: 2 + start_column: 13 + start_line: 52 + } + v: "strs" + } + } + args { + string_val { + src { + end_column: 43 + end_line: 52 + file: 2 + start_column: 13 + start_line: 52 + } + v: "key" + } + } + args { + string_val { + src { + end_column: 43 + end_line: 52 + file: 2 + start_column: 13 + start_line: 52 + } + v: "value" + } + } + variadic: true + } + df { + dataframe_ref { + id { + bitfield1: 14 + } + } + } + src { + end_column: 43 + end_line: 52 + file: 2 + start_column: 13 + start_line: 52 + } + } + } + symbol { + } + uid: 15 + var_id { + bitfield1: 15 + } + } +} +body { + assign { + expr { + dataframe_filter { + condition { + sql_expr { + sql: "key is not NULL" + src { + end_column: 37 + end_line: 53 + file: 2 + start_column: 13 + start_line: 53 + } + } + } + df { + dataframe_ref { + id { + bitfield1: 15 + } + } + } + src { + end_column: 37 + end_line: 53 + file: 2 + start_column: 13 + start_line: 53 + } + } + } + symbol { + } + uid: 16 + var_id { + bitfield1: 16 + } + } +} +body { + assign { + expr { + dataframe_sort { + cols { + args { + string_val { + src { + end_column: 25 + end_line: 54 + file: 2 + start_column: 13 + start_line: 54 + } + v: "strs" + } + } + variadic: true + } + df { + dataframe_ref { + id { + bitfield1: 16 + } + } + } + src { + end_column: 25 + end_line: 54 + file: 2 + start_column: 13 + start_line: 54 + } + } + } + symbol { + } + uid: 17 + var_id { + bitfield1: 17 + } + } +} +body { + assign { + expr { + apply_expr { + fn { + builtin_fn { + name { + name { + name_flat { + name: "flatten" + } + } + } + } + } + pos_args { + dataframe_col { + col_name: "maps" + df { + dataframe_ref { + id { + bitfield1: 8 + } + } + } + src { + end_column: 49 + end_line: 58 + file: 2 + start_column: 41 + start_line: 58 + } + } + } + pos_args { + string_val { + src { + end_column: 66 + end_line: 58 + file: 2 + start_column: 33 + start_line: 58 + } + } + } + pos_args { + bool_val { + src { + end_column: 66 + end_line: 58 + file: 2 + start_column: 33 + start_line: 58 + } + } + } + pos_args { + bool_val { + src { + end_column: 66 + end_line: 58 + file: 2 + start_column: 33 + start_line: 58 + } + v: true + } + } + pos_args { + string_val { + src { + end_column: 66 + end_line: 58 + file: 2 + start_column: 33 + start_line: 58 + } + v: "both" + } + } + src { + end_column: 66 + end_line: 58 + file: 2 + start_column: 33 + start_line: 58 + } + } + } + symbol { + } + uid: 18 + var_id { + bitfield1: 18 + } + } +} +body { + assign { + expr { + dataframe_select__columns { + cols { + args { + dataframe_col { + col_name: "strs" + df { + dataframe_ref { + id { + bitfield1: 8 + } + } + } + src { + end_column: 31 + end_line: 58 + file: 2 + start_column: 23 + start_line: 58 + } + } + } + args { + apply_expr { + fn { + indirect_table_fn_id_ref { + id { + bitfield1: 18 + } + } + } + src { + end_column: 67 + end_line: 58 + file: 2 + start_column: 12 + start_line: 58 + } + } + } + variadic: true + } + df { + dataframe_ref { + id { + bitfield1: 8 + } + } + } + src { + end_column: 67 + end_line: 58 + file: 2 + start_column: 12 + start_line: 58 + } + } + } + symbol { + } + uid: 19 + var_id { + bitfield1: 19 + } + } +} +body { + assign { + expr { + dataframe_select__columns { + cols { + args { + string_val { + src { + end_column: 43 + end_line: 59 + file: 2 + start_column: 13 + start_line: 59 + } + v: "strs" + } + } + args { + string_val { + src { + end_column: 43 + end_line: 59 + file: 2 + start_column: 13 + start_line: 59 + } + v: "key" + } + } + args { + string_val { + src { + end_column: 43 + end_line: 59 + file: 2 + start_column: 13 + start_line: 59 + } + v: "value" + } + } + variadic: true + } + df { + dataframe_ref { + id { + bitfield1: 19 + } + } + } + src { + end_column: 43 + end_line: 59 + file: 2 + start_column: 13 + start_line: 59 + } + } + } + symbol { + } + uid: 20 + var_id { + bitfield1: 20 + } + } +} +body { + assign { + expr { + dataframe_filter { + condition { + sql_expr { + sql: "key is NULL" + src { + end_column: 33 + end_line: 60 + file: 2 + start_column: 13 + start_line: 60 + } + } + } + df { + dataframe_ref { + id { + bitfield1: 20 + } + } + } + src { + end_column: 33 + end_line: 60 + file: 2 + start_column: 13 + start_line: 60 + } + } + } + symbol { + } + uid: 21 + var_id { + bitfield1: 21 + } + } +} +body { + assign { + expr { + dataframe_sort { + cols { + args { + string_val { + src { + end_column: 34 + end_line: 61 + file: 2 + start_column: 13 + start_line: 61 + } + v: "strs" + } + } + args { + string_val { + src { + end_column: 34 + end_line: 61 + file: 2 + start_column: 13 + start_line: 61 + } + v: "value" + } + } + variadic: true + } + df { + dataframe_ref { + id { + bitfield1: 21 + } + } + } + src { + end_column: 34 + end_line: 61 + file: 2 + start_column: 13 + start_line: 61 + } + } + } + symbol { + } + uid: 22 + var_id { + bitfield1: 22 + } + } +} +client_ast_version: 1 +client_language { + python_language { + version { + label: "final" + major: 3 + minor: 9 + patch: 1 + } + } +} +client_version { + major: 1 + minor: 29 +} diff --git a/tests/ast/data/functions.table_functions.test.DISABLED b/tests/ast/data/functions.table_functions.test.DISABLED deleted file mode 100644 index 7ae852b684..0000000000 --- a/tests/ast/data/functions.table_functions.test.DISABLED +++ /dev/null @@ -1,44 +0,0 @@ -## TEST CASE - -df1 = session.create_dataframe( - [ - [1, [1, 2, 3], {"Ashi Garami": "Single Leg X"}, "Kimura"], - [2, [11, 22], {"Sankaku": "Triangle"}, "Coffee"], - ], - schema=["idx", "lists", "maps", "strs"], -) -df2 = df1.select(df1.idx, explode(df1.lists)).sort(col("idx")) - -df3 = df1.select(explode("maps").as_("primo", "secundo")).sort(col("primo")) - -df4 = session.create_dataframe( - [ - [1, [1, 2, 3], {"Ashi Garami": ["X", "Leg Entanglement"]}, "Kimura"], - [2, [11, 22], {"Sankaku": ["Triangle"]}, "Coffee"], - [3, [], {}, "empty"], - ], - schema=["idx", "lists", "maps", "strs"], -) -df5 = ( - df4.select(df4.idx, flatten(df4.lists, outer=True)) - .select("idx", "value") - .sort("idx") -) - -df6 = ( - df.select(df.strs, flatten(df.maps, recursive=True)) - .select("strs", "key", "value") - .where("key is not NULL") - .sort("strs") -) - -df7 = ( - df.select(df.strs, flatten(df.maps, recursive=True)) - .select("strs", "key", "value") - .where("key is NULL") - .sort("strs", "value") -) - -## EXPECTED ENCODED AST - -## EXPECTED UNPARSER OUTPUT diff --git a/tests/integ/test_function.py b/tests/integ/test_function.py index 8050252cd2..ff8fdcdbd1 100644 --- a/tests/integ/test_function.py +++ b/tests/integ/test_function.py @@ -110,6 +110,7 @@ lit, ln, log, + month, months_between, negate, not_, @@ -159,6 +160,7 @@ vector_cosine_distance, vector_inner_product, vector_l2_distance, + year, ) from snowflake.snowpark.types import ( ArrayType, @@ -420,6 +422,33 @@ def check_concat_ws_ignore_nulls_output(session): ], ) + df = session.create_dataframe( + [(datetime.date(2021, 12, 21),), (datetime.date(1969, 12, 31),)], + schema=["year_month"], + ) + + Utils.check_answer( + df.select( + _concat_ws_ignore_nulls("-", year("year_month"), month("year_month")) + ), + [ + Row("2021-12"), + Row("1969-12"), + ], + ) + + Utils.check_answer( + df.select( + _concat_ws_ignore_nulls( + "-", year("year_month"), month("year_month") + ).alias("year_month") + ), + [ + Row(YEAR_MONTH="2021-12"), + Row(YEAR_MONTH="1969-12"), + ], + ) + if structured_type_semantics: if not structured_types_supported(session, False): pytest.skip("Structured type support required.")