Merge branch 'snowflakedb:main' into fhe-SNOW-1936603-fix-limit-bug

snowflakedb · Feb 27, 2025 · 580661a · 580661a
2 parents d352bef + a764a70
commit 580661a
Show file tree

Hide file tree

Showing 10 changed files with 2,502 additions and 137 deletions.
diff --git a/src/snowflake/snowpark/functions.py b/src/snowflake/snowpark/functions.py
@@ -3803,7 +3803,7 @@ def _concat_ws_ignore_nulls(sep: str, *cols: ColumnOrName) -> Column:
     """
     # TODO: SNOW-1831917 create ast
     columns = [_to_col_if_str(c, "_concat_ws_ignore_nulls") for c in cols]
-    names = ",".join([c.get_name() for c in columns])
+    names = ",".join([c.get_name() or f"COL{i}" for i, c in enumerate(columns)])
 
     # The implementation of this function is as follows with example input of
     # sep = "," and row = [a, NULL], b, NULL, c:
@@ -3815,7 +3815,7 @@ def _concat_ws_ignore_nulls(sep: str, *cols: ColumnOrName) -> Column:
     #   [a, NULL, b, c]
     # 4. Filter out nulls (array_remove_nulls).
     #   [a, b, c]
-    # 5. Concatenate the non-null values into a single string (concat_strings_with_sep).
+    # 5. Concatenate the non-null values into a single string (array_to_string).
     #   "a,b,c"
 
     def array_remove_nulls(col: Column) -> Column:
@@ -3824,29 +3824,18 @@ def array_remove_nulls(col: Column) -> Column:
             col, sql_expr("x -> NOT IS_NULL_VALUE(x)", _emit_ast=False)
         )
 
-    def concat_strings_with_sep(col: Column) -> Column:
-        """
-        Expects an array of strings and returns a single string
-        with the values concatenated with the separator.
-        """
-        return substring(
-            builtin("reduce", _emit_ast=False)(
-                col, lit(""), sql_expr(f"(l, r) -> l || '{sep}' || r", _emit_ast=False)
-            ),
-            len(sep) + 1,
-            _emit_ast=False,
-        )
-
-    return concat_strings_with_sep(
-        array_remove_nulls(
+    return array_to_string(
+        array=array_remove_nulls(
             array_flatten(
                 array_construct_compact(
                     *[c.cast(ArrayType(), _emit_ast=False) for c in columns],
                     _emit_ast=False,
                 ),
                 _emit_ast=False,
             )
-        )
+        ),
+        separator=lit(sep, _emit_ast=False),
+        _emit_ast=False,
     ).alias(f"CONCAT_WS_IGNORE_NULLS('{sep}', {names})", _emit_ast=False)
 
 

diff --git a/src/snowflake/snowpark/mock/_nop_plan.py b/src/snowflake/snowpark/mock/_nop_plan.py
@@ -152,9 +152,15 @@ def resolve_attributes(
 
     elif isinstance(plan, TableFunctionJoin):
         left_attributes = resolve_attributes(plan.children[0], session)
-        output_schema = session.udtf.get_udtf(
-            plan.table_function.func_name
-        )._output_schema
+        try:
+            output_schema = session.udtf.get_udtf(
+                plan.table_function.func_name
+            )._output_schema
+        except KeyError:
+            if session is not None and session._conn._suppress_not_implemented_error:
+                return []
+            else:
+                raise
         if isinstance(output_schema, PandasDataFrameType):
             right_attributes = [
                 Attribute(col_name, col_type, True)

diff --git a/tests/ast/data/DataFrame.join_table_function.test b/tests/ast/data/DataFrame.join_table_function.test
@@ -0,0 +1,275 @@
+## TEST CASE
+
+df1 = session.create_dataframe(
+    [
+        ["foo", "The quick brown fox jumps over the lazy dog"],
+        ["bar", "Lorem ipsum dolor sit amet, consectetur adipiscing elit"],
+    ],
+    schema=["name", "text"],
+)
+
+df2 = df1.join_table_function("STRTOK_SPLIT_TO_TABLE", df1["text"], lit(" "))
+
+# The following part of the tests is impossible to execute in the mock environment.
+
+# tokenize_text = (
+#     call_table_function("STRTOK_SPLIT_TO_TABLE", df1["text"], lit(" "))
+#     .over(partition_by="name")
+#     .over(order_by="text")
+#     .alias("ignored1", "ignored2", "ignored3")
+#     .alias("original_row_number", "token_number", "token")
+# )
+
+# df3 = df1.join_table_function(tokenize_text)
+
+# tokenize_text_ref = table_function("STRTOK_SPLIT_TO_TABLE")
+# df4 = df1.join_table_function(
+#     tokenize_text_ref(df1["text"], lit(" "))
+#     .over(partition_by="name", order_by="text")
+#     .alias("original_row_number", "token_number", "token")
+# )
+
+# df5 = df1.join_table_function(
+#     tokenize_text_ref(df1["text"], lit(" ,"))
+#     .over(partition_by="name", order_by="text")
+#     .alias("row_number", "token_number", "token")
+# )
+
+## EXPECTED UNPARSER OUTPUT
+
+df1 = session.create_dataframe([["foo", "The quick brown fox jumps over the lazy dog"], ["bar", "Lorem ipsum dolor sit amet, consectetur adipiscing elit"]], schema=["name", "text"])
+
+df2 = df1.join_table_function("STRTOK_SPLIT_TO_TABLE", df1["text"], lit(" "))
+
+## EXPECTED ENCODED AST
+
+interned_value_table {
+  string_values {
+    key: -1
+  }
+  string_values {
+    key: 2
+    value: "SRC_POSITION_TEST_MODE"
+  }
+}
+body {
+  assign {
+    expr {
+      create_dataframe {
+        data {
+          dataframe_data__list {
+            vs {
+              list_val {
+                src {
+                  end_column: 9
+                  end_line: 31
+                  file: 2
+                  start_column: 14
+                  start_line: 25
+                }
+                vs {
+                  string_val {
+                    src {
+                      end_column: 9
+                      end_line: 31
+                      file: 2
+                      start_column: 14
+                      start_line: 25
+                    }
+                    v: "foo"
+                  }
+                }
+                vs {
+                  string_val {
+                    src {
+                      end_column: 9
+                      end_line: 31
+                      file: 2
+                      start_column: 14
+                      start_line: 25
+                    }
+                    v: "The quick brown fox jumps over the lazy dog"
+                  }
+                }
+              }
+            }
+            vs {
+              list_val {
+                src {
+                  end_column: 9
+                  end_line: 31
+                  file: 2
+                  start_column: 14
+                  start_line: 25
+                }
+                vs {
+                  string_val {
+                    src {
+                      end_column: 9
+                      end_line: 31
+                      file: 2
+                      start_column: 14
+                      start_line: 25
+                    }
+                    v: "bar"
+                  }
+                }
+                vs {
+                  string_val {
+                    src {
+                      end_column: 9
+                      end_line: 31
+                      file: 2
+                      start_column: 14
+                      start_line: 25
+                    }
+                    v: "Lorem ipsum dolor sit amet, consectetur adipiscing elit"
+                  }
+                }
+              }
+            }
+          }
+        }
+        schema {
+          dataframe_schema__list {
+            vs: "name"
+            vs: "text"
+          }
+        }
+        src {
+          end_column: 9
+          end_line: 31
+          file: 2
+          start_column: 14
+          start_line: 25
+        }
+      }
+    }
+    symbol {
+      value: "df1"
+    }
+    uid: 1
+    var_id {
+      bitfield1: 1
+    }
+  }
+}
+body {
+  assign {
+    expr {
+      dataframe_join_table_function {
+        fn {
+          apply_expr {
+            fn {
+              indirect_table_fn_name_ref {
+                name {
+                  name {
+                    name_flat {
+                      name: "STRTOK_SPLIT_TO_TABLE"
+                    }
+                  }
+                }
+              }
+            }
+            pos_args {
+              dataframe_col {
+                col_name: "text"
+                df {
+                  dataframe_ref {
+                    id {
+                      bitfield1: 1
+                    }
+                  }
+                }
+                src {
+                  end_column: 74
+                  end_line: 33
+                  file: 2
+                  start_column: 63
+                  start_line: 33
+                }
+              }
+            }
+            pos_args {
+              apply_expr {
+                fn {
+                  builtin_fn {
+                    name {
+                      name {
+                        name_flat {
+                          name: "lit"
+                        }
+                      }
+                    }
+                  }
+                }
+                pos_args {
+                  string_val {
+                    src {
+                      end_column: 84
+                      end_line: 33
+                      file: 2
+                      start_column: 76
+                      start_line: 33
+                    }
+                    v: " "
+                  }
+                }
+                src {
+                  end_column: 84
+                  end_line: 33
+                  file: 2
+                  start_column: 76
+                  start_line: 33
+                }
+              }
+            }
+            src {
+              end_column: 85
+              end_line: 33
+              file: 2
+              start_column: 14
+              start_line: 33
+            }
+          }
+        }
+        lhs {
+          dataframe_ref {
+            id {
+              bitfield1: 1
+            }
+          }
+        }
+        src {
+          end_column: 85
+          end_line: 33
+          file: 2
+          start_column: 14
+          start_line: 33
+        }
+      }
+    }
+    symbol {
+      value: "df2"
+    }
+    uid: 2
+    var_id {
+      bitfield1: 2
+    }
+  }
+}
+client_ast_version: 1
+client_language {
+  python_language {
+    version {
+      label: "final"
+      major: 3
+      minor: 9
+      patch: 1
+    }
+  }
+}
+client_version {
+  major: 1
+  minor: 29
+}
diff --git a/tests/ast/data/DataFrame.join_table_function.test.DISABLED b/tests/ast/data/DataFrame.join_table_function.test.DISABLED