Skip to content

Commit

Permalink
Merge branch 'snowflakedb:main' into fhe-SNOW-1936603-fix-limit-bug
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-fhe authored Feb 27, 2025
2 parents d352bef + a764a70 commit 580661a
Show file tree
Hide file tree
Showing 10 changed files with 2,502 additions and 137 deletions.
25 changes: 7 additions & 18 deletions src/snowflake/snowpark/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3803,7 +3803,7 @@ def _concat_ws_ignore_nulls(sep: str, *cols: ColumnOrName) -> Column:
"""
# TODO: SNOW-1831917 create ast
columns = [_to_col_if_str(c, "_concat_ws_ignore_nulls") for c in cols]
names = ",".join([c.get_name() for c in columns])
names = ",".join([c.get_name() or f"COL{i}" for i, c in enumerate(columns)])

# The implementation of this function is as follows with example input of
# sep = "," and row = [a, NULL], b, NULL, c:
Expand All @@ -3815,7 +3815,7 @@ def _concat_ws_ignore_nulls(sep: str, *cols: ColumnOrName) -> Column:
# [a, NULL, b, c]
# 4. Filter out nulls (array_remove_nulls).
# [a, b, c]
# 5. Concatenate the non-null values into a single string (concat_strings_with_sep).
# 5. Concatenate the non-null values into a single string (array_to_string).
# "a,b,c"

def array_remove_nulls(col: Column) -> Column:
Expand All @@ -3824,29 +3824,18 @@ def array_remove_nulls(col: Column) -> Column:
col, sql_expr("x -> NOT IS_NULL_VALUE(x)", _emit_ast=False)
)

def concat_strings_with_sep(col: Column) -> Column:
"""
Expects an array of strings and returns a single string
with the values concatenated with the separator.
"""
return substring(
builtin("reduce", _emit_ast=False)(
col, lit(""), sql_expr(f"(l, r) -> l || '{sep}' || r", _emit_ast=False)
),
len(sep) + 1,
_emit_ast=False,
)

return concat_strings_with_sep(
array_remove_nulls(
return array_to_string(
array=array_remove_nulls(
array_flatten(
array_construct_compact(
*[c.cast(ArrayType(), _emit_ast=False) for c in columns],
_emit_ast=False,
),
_emit_ast=False,
)
)
),
separator=lit(sep, _emit_ast=False),
_emit_ast=False,
).alias(f"CONCAT_WS_IGNORE_NULLS('{sep}', {names})", _emit_ast=False)


Expand Down
12 changes: 9 additions & 3 deletions src/snowflake/snowpark/mock/_nop_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,15 @@ def resolve_attributes(

elif isinstance(plan, TableFunctionJoin):
left_attributes = resolve_attributes(plan.children[0], session)
output_schema = session.udtf.get_udtf(
plan.table_function.func_name
)._output_schema
try:
output_schema = session.udtf.get_udtf(
plan.table_function.func_name
)._output_schema
except KeyError:
if session is not None and session._conn._suppress_not_implemented_error:
return []
else:
raise
if isinstance(output_schema, PandasDataFrameType):
right_attributes = [
Attribute(col_name, col_type, True)
Expand Down
275 changes: 275 additions & 0 deletions tests/ast/data/DataFrame.join_table_function.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,275 @@
## TEST CASE

df1 = session.create_dataframe(
[
["foo", "The quick brown fox jumps over the lazy dog"],
["bar", "Lorem ipsum dolor sit amet, consectetur adipiscing elit"],
],
schema=["name", "text"],
)

df2 = df1.join_table_function("STRTOK_SPLIT_TO_TABLE", df1["text"], lit(" "))

# The following part of the tests is impossible to execute in the mock environment.

# tokenize_text = (
# call_table_function("STRTOK_SPLIT_TO_TABLE", df1["text"], lit(" "))
# .over(partition_by="name")
# .over(order_by="text")
# .alias("ignored1", "ignored2", "ignored3")
# .alias("original_row_number", "token_number", "token")
# )

# df3 = df1.join_table_function(tokenize_text)

# tokenize_text_ref = table_function("STRTOK_SPLIT_TO_TABLE")
# df4 = df1.join_table_function(
# tokenize_text_ref(df1["text"], lit(" "))
# .over(partition_by="name", order_by="text")
# .alias("original_row_number", "token_number", "token")
# )

# df5 = df1.join_table_function(
# tokenize_text_ref(df1["text"], lit(" ,"))
# .over(partition_by="name", order_by="text")
# .alias("row_number", "token_number", "token")
# )

## EXPECTED UNPARSER OUTPUT

df1 = session.create_dataframe([["foo", "The quick brown fox jumps over the lazy dog"], ["bar", "Lorem ipsum dolor sit amet, consectetur adipiscing elit"]], schema=["name", "text"])

df2 = df1.join_table_function("STRTOK_SPLIT_TO_TABLE", df1["text"], lit(" "))

## EXPECTED ENCODED AST

interned_value_table {
string_values {
key: -1
}
string_values {
key: 2
value: "SRC_POSITION_TEST_MODE"
}
}
body {
assign {
expr {
create_dataframe {
data {
dataframe_data__list {
vs {
list_val {
src {
end_column: 9
end_line: 31
file: 2
start_column: 14
start_line: 25
}
vs {
string_val {
src {
end_column: 9
end_line: 31
file: 2
start_column: 14
start_line: 25
}
v: "foo"
}
}
vs {
string_val {
src {
end_column: 9
end_line: 31
file: 2
start_column: 14
start_line: 25
}
v: "The quick brown fox jumps over the lazy dog"
}
}
}
}
vs {
list_val {
src {
end_column: 9
end_line: 31
file: 2
start_column: 14
start_line: 25
}
vs {
string_val {
src {
end_column: 9
end_line: 31
file: 2
start_column: 14
start_line: 25
}
v: "bar"
}
}
vs {
string_val {
src {
end_column: 9
end_line: 31
file: 2
start_column: 14
start_line: 25
}
v: "Lorem ipsum dolor sit amet, consectetur adipiscing elit"
}
}
}
}
}
}
schema {
dataframe_schema__list {
vs: "name"
vs: "text"
}
}
src {
end_column: 9
end_line: 31
file: 2
start_column: 14
start_line: 25
}
}
}
symbol {
value: "df1"
}
uid: 1
var_id {
bitfield1: 1
}
}
}
body {
assign {
expr {
dataframe_join_table_function {
fn {
apply_expr {
fn {
indirect_table_fn_name_ref {
name {
name {
name_flat {
name: "STRTOK_SPLIT_TO_TABLE"
}
}
}
}
}
pos_args {
dataframe_col {
col_name: "text"
df {
dataframe_ref {
id {
bitfield1: 1
}
}
}
src {
end_column: 74
end_line: 33
file: 2
start_column: 63
start_line: 33
}
}
}
pos_args {
apply_expr {
fn {
builtin_fn {
name {
name {
name_flat {
name: "lit"
}
}
}
}
}
pos_args {
string_val {
src {
end_column: 84
end_line: 33
file: 2
start_column: 76
start_line: 33
}
v: " "
}
}
src {
end_column: 84
end_line: 33
file: 2
start_column: 76
start_line: 33
}
}
}
src {
end_column: 85
end_line: 33
file: 2
start_column: 14
start_line: 33
}
}
}
lhs {
dataframe_ref {
id {
bitfield1: 1
}
}
}
src {
end_column: 85
end_line: 33
file: 2
start_column: 14
start_line: 33
}
}
}
symbol {
value: "df2"
}
uid: 2
var_id {
bitfield1: 2
}
}
}
client_ast_version: 1
client_language {
python_language {
version {
label: "final"
major: 3
minor: 9
patch: 1
}
}
}
client_version {
major: 1
minor: 29
}
40 changes: 0 additions & 40 deletions tests/ast/data/DataFrame.join_table_function.test.DISABLED

This file was deleted.

Loading

0 comments on commit 580661a

Please sign in to comment.