Improving Expression Support for In Database (#5790)

- Adjust Excel Workbook write behaviour. - Support Nothing / Null constants. - Deduce the type of arithmetic operations and `iif`. - Allow Date_Time constants, treating as local timezone. - Removed the `to_column_name` and `ensure_sane_name` code.
enso-org · Mar 3, 2023 · 01fc34c · 01fc34c
1 parent ee981d2
commit 01fc34c
Show file tree

Hide file tree

Showing 15 changed files with 173 additions and 109 deletions.
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso
@@ -360,7 +360,9 @@ type Column
         op = case other of
             _ : Column -> if self.sql_type.is_definitely_numeric || other.sql_type.is_definitely_numeric then 'ADD_NUMBER' else 'ADD_TEXT'
             _ -> if self.sql_type.is_definitely_numeric then 'ADD_NUMBER' else 'ADD_TEXT'
-        self.make_binary_op op other
+        new_type = if op == 'ADD_TEXT' then self.sql_type else
+            SQL_Type.merge_type self.sql_type (SQL_Type.approximate_type other)
+        self.make_binary_op op other new_type=new_type
 
     ## UNSTABLE
 
@@ -373,7 +375,9 @@ type Column
        element of `self`.  If `other` is a column, the operation is performed
        pairwise between corresponding elements of `self` and `other`.
     - : Column | Any -> Column
-    - self other = self.make_binary_op "-" other
+    - self other =
+        new_type = SQL_Type.merge_type self.sql_type (SQL_Type.approximate_type other)
+        self.make_binary_op "-" other new_type=new_type
 
     ## UNSTABLE
 
@@ -386,7 +390,9 @@ type Column
        element of `self`.  If `other` is a column, the operation is performed
        pairwise between corresponding elements of `self` and `other`.
     * : Column | Any -> Column
-    * self other = self.make_binary_op "*" other
+    * self other =
+        new_type = SQL_Type.merge_type self.sql_type (SQL_Type.approximate_type other)
+        self.make_binary_op "*" other new_type=new_type
 
     ## ALIAS Divide Columns
 
@@ -452,7 +458,9 @@ type Column
              example_mod = Examples.integer_column % 3
     % : Column | Any -> Column
     % self other =
-        self.make_binary_op "%" other
+        new_type = SQL_Type.merge_type self.sql_type (SQL_Type.approximate_type other)
+        op = if new_type == SQL_Type.integer then "%" else "mod"
+        self.make_binary_op op other new_type=new_type
 
     ## ALIAS Power
 
@@ -480,7 +488,8 @@ type Column
 
              example_div = Examples.decimal_column ^ Examples.integer_column
     ^ : Column | Any -> Column
-    ^ self other = self.make_binary_op '^' other
+    ^ self other =
+        self.make_binary_op '^' other new_type=SQL_Type.double
 
     ## UNSTABLE
 
@@ -494,7 +503,7 @@ type Column
        operation is performed pairwise between corresponding elements of `self`
        and `other`.
     && : Column | Any -> Column
-    && self other = self.make_binary_op "AND" other
+    && self other = self.make_binary_op "AND" other new_type=SQL_Type.boolean
 
     ## UNSTABLE
 
@@ -508,7 +517,7 @@ type Column
        operation is performed pairwise between corresponding elements of `self`
        and `other`.
     || : Column | Any -> Column
-    || self other = self.make_binary_op "OR" other
+    || self other = self.make_binary_op "OR" other new_type=SQL_Type.boolean
 
     ## UNSTABLE
 
@@ -537,8 +546,10 @@ type Column
            common type.
         left_type = get_approximate_type when_true self.sql_type
         right_type = get_approximate_type when_false self.sql_type
-        if left_type != right_type then Error.throw (Illegal_Argument.Error "when_true and when_false types do not match") else
-            self.make_op "IIF" [when_true, when_false] new_type=left_type
+        new_type = SQL_Type.merge_type left_type right_type
+
+        if new_type.is_error then Error.throw (Illegal_Argument.Error "when_true and when_false types do not match") else
+            self.make_op "IIF" [when_true, when_false] new_type=new_type
 
     ## Returns a column of first non-`Nothing` value on each row of `self` and
        `values` list.
@@ -555,8 +566,9 @@ type Column
     coalesce : (Any | Vector Any) -> Column
     coalesce self values = case values of
         _ : Vector ->
-            if values.any (v->(self.sql_type != get_approximate_type v self.sql_type)) then Error.throw (Illegal_Argument.Error "self and values types do not all match") else
-                self.make_op "COALESCE" values new_type=self.sql_type
+            fold_type = values.fold self.sql_type c->v-> SQL_Type.merge_type c (get_approximate_type v self.sql_type)
+            if fold_type.is_error then Error.throw (Illegal_Argument.Error "self and values types do not all match") else
+                self.make_op "COALESCE" values new_type=fold_type
         _ : Array -> self.coalesce (Vector.from_polyglot_array values)
         _ -> self.coalesce [values]
 
@@ -574,8 +586,9 @@ type Column
     min : (Any | Vector Any) -> Column
     min self values = case values of
         _ : Vector ->
-            if values.any (v->(self.sql_type != get_approximate_type v self.sql_type)) then Error.throw (Illegal_Argument.Error "self and values types do not all match") else
-                self.make_op "ROW_MIN" values new_type=self.sql_type
+            fold_type = values.fold self.sql_type c->v-> SQL_Type.merge_type c (get_approximate_type v self.sql_type)
+            if fold_type.is_error then Error.throw (Illegal_Argument.Error "self and values types do not all match") else
+                self.make_op "ROW_MIN" values new_type=fold_type
         _ : Array -> self.min (Vector.from_polyglot_array values)
         _ -> self.min [values]
 
@@ -593,8 +606,9 @@ type Column
     max : (Any | Vector Any) -> Column
     max self values = case values of
         _ : Vector ->
-            if values.any (v->(self.sql_type != get_approximate_type v self.sql_type)) then Error.throw (Illegal_Argument.Error "self and values types do not all match") else
-                self.make_op "ROW_MAX" values new_type=self.sql_type
+            fold_type = values.fold self.sql_type c->v-> SQL_Type.merge_type c (get_approximate_type v self.sql_type)
+            if fold_type.is_error then Error.throw (Illegal_Argument.Error "self and values types do not all match") else
+                self.make_op "ROW_MAX" values new_type=fold_type
         _ : Array -> self.max (Vector.from_polyglot_array values)
         _ -> self.max [values]
 
@@ -662,7 +676,7 @@ type Column
        Arguments:
        - new_name: The name to rename `self` column to.
     rename : Text -> Column ! Unsupported_Name
-    rename self new_name = Helpers.ensure_name_is_sane new_name <|
+    rename self new_name =
         Column.Value new_name self.connection self.sql_type self.expression self.context
 
     ## UNSTABLE

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL_Type.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL_Type.enso
@@ -1,6 +1,8 @@
 from Standard.Base import all
 import Standard.Base.Error.Illegal_Argument.Illegal_Argument
 
+import project.Data.Column.Column
+
 polyglot java import java.sql.Types
 
 ## Represents an internal SQL data-type.
@@ -30,6 +32,10 @@ type SQL_Type
     bigint : SQL_Type
     bigint = SQL_Type.Value Types.BIGINT "BIGINT"
 
+    ## The SQL representation of the `TINYINT` type.
+    tinyint : SQL_Type
+    tinyint = SQL_Type.Value Types.TINYINT "TINYINT"
+
     ## The SQL representation of the `SMALLINT` type.
     smallint : SQL_Type
     smallint = SQL_Type.Value Types.SMALLINT "SMALLINT"
@@ -51,12 +57,6 @@ type SQL_Type
     numeric = SQL_Type.Value Types.NUMERIC "NUMERIC"
 
     ## The SQL type representing one of the supported textual types.
-    varchar : SQL_Type
-    varchar = SQL_Type.Value Types.VARCHAR "VARCHAR"
-
-    ## UNSTABLE
-       The SQL type representing one of the supported textual types.
-
        It seems that JDBC treats the `TEXT` and `VARCHAR` types as interchangeable.
     text : SQL_Type
     text = SQL_Type.Value Types.VARCHAR "VARCHAR"
@@ -77,18 +77,38 @@ type SQL_Type
     date_time : SQL_Type
     date_time = SQL_Type.Value Types.TIMESTAMP_WITH_TIMEZONE "TIMESTAMP"
 
+    ## The SQL type representing a null column.
+    null : SQL_Type
+    null = SQL_Type.Value Types.NULL "NULL"
+
     ## ADVANCED
        Given an Enso value gets the approximate SQL type.
     approximate_type : Any -> SQL_Type ! Illegal_Argument
     approximate_type value = case value of
+        _ : Column -> value.sql_type
         _ : Boolean -> SQL_Type.boolean
-        _ : Integer -> SQL_Type.integer
+        _ : Integer -> if value.abs >= 2^32 then SQL_Type.bigint else SQL_Type.integer
         _ : Decimal -> SQL_Type.double
-        _ : Text -> SQL_Type.varchar
+        _ : Text -> SQL_Type.text
         _ : Date -> SQL_Type.date
-        _ : Time_Of_Day -> SQL_Type.time_of_day
+        _ : Time_Of_Day -> SQL_Type.time
         _ : Date_Time -> SQL_Type.date_time
-            _ -> Error.throw (Illegal_Argument.Error "Unsupported type.")
+        Nothing -> SQL_Type.null
+        _ -> Error.throw (Illegal_Argument.Error "Unsupported type.")
+
+    ## PRIVATE
+       Returns the SQL type that is the result of applying an  operation to the
+       two given types.
+    merge_type : SQL_Type -> SQL_Type -> SQL_Type ! Illegal_Argument
+    merge_type left right =
+        if left.typeid == right.typeid then left else
+            if left.is_null.not && right.is_null then left else
+                if left.is_null && right.is_null.not then right else
+                    case left.is_definitely_numeric && right.is_definitely_numeric of
+                        True -> if left.is_definitely_integer && right.is_definitely_integer then merge_integer_type left right else
+                            merge_number_type left right
+                        False -> if left.is_definitely_text && right.is_definitely_text then SQL_Type.text else
+                            Error.throw (Illegal_Argument.Error "Unmatched types for operation.")
 
     ## PRIVATE
 
@@ -141,3 +161,29 @@ type SQL_Type
     is_likely_text : Boolean
     is_likely_text self =
        self.is_definitely_text || self.name.contains "text" Case_Sensitivity.Insensitive
+
+    ## PRIVATE
+    is_null : Boolean
+    is_null self = self.typeid == Types.NULL
+
+## PRIVATE
+   Joins two integer SQL types into the larger one.
+merge_integer_type : SQL_Type -> SQL_Type -> SQL_Type
+merge_integer_type left right =
+    integer_types = [Types.TINYINT, Types.SMALLINT, Types.INTEGER, Types.BIGINT]
+    left_index = integer_types.index_of left.typeid
+    right_index = integer_types.index_of right.typeid
+    new_index = left_index.max right_index
+    [SQL_Type.tinyint, SQL_Type.smallint, SQL_Type.integer, SQL_Type.bigint].at new_index
+
+## PRIVATE
+   Joins two numeric SQL types into the larger one.
+   One of the types must be non-integer (otherwise use merge_integer_type).
+merge_number_type :  SQL_Type -> SQL_Type -> SQL_Type
+merge_number_type left right = if left.is_definitely_integer then merge_number_type right left else
+    numeric_types = [Types.NUMERIC, Types.DECIMAL, Types.FLOAT, Types.REAL, Types.DOUBLE]
+    left_index = numeric_types.index_of left.typeid
+    right_index = numeric_types.index_of right.typeid
+    if right_index.is_nothing then left else
+        new_index = left_index.max right_index
+        [SQL_Type.numeric, SQL_Type.decimal, SQL_Type.real, SQL_Type.real, SQL_Type.double].at new_index
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso
@@ -630,7 +630,7 @@ type Table
             _ -> column
         renamed = if new_name.is_nothing then resolved else resolved.rename new_name
 
-        Helpers.ensure_name_is_sane renamed.name <|
+        renamed.if_not_error <|
             index = self.internal_columns.index_of (c -> c.name == renamed.name)
             to_add = case set_mode of
                 Set_Mode.Add_Or_Update -> True
@@ -673,8 +673,7 @@ type Table
             Column.Value ("Constant_" + UUID.randomUUID.to_text) self.connection new_type other self.context
         new_column = Expression.evaluate expression get_column make_constant "Standard.Database.Data.Column" "Column" Column.var_args_functions
         problems = Warning.get_all new_column . map .value
-        new_name = Expression.to_column_name expression
-        result = new_column.rename new_name
+        result = new_column.rename expression
         on_problems.attach_problems_before problems <|
             Warning.set result []
 

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso
@@ -170,7 +170,7 @@ base_dialect =
     unary = name -> [name, make_unary_op name]
     fun = name -> [name, make_function name]
 
-    arith = [["ADD_NUMBER", make_binary_op "+"], ["ADD_TEXT", make_binary_op "||"], bin "-", bin "*", bin "/", bin "%", ["^", make_function "POWER"]]
+    arith = [["ADD_NUMBER", make_binary_op "+"], ["ADD_TEXT", make_binary_op "||"], bin "-", bin "*", bin "/", bin "%", ["mod", make_function "MOD"], ["^", make_function "POWER"]]
     logic = [bin "AND", bin "OR", unary "NOT", ["IIF", make_iif], ["TRUE", make_constant "TRUE"], ["FALSE", make_constant "FALSE"]]
     eq = lift_binary_op "==" make_equals
     neq = lift_binary_op "!=" make_not_equals

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Helpers.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Helpers.enso
@@ -49,25 +49,6 @@ unify_vector_singleton x = case x of
     _ : Vector -> x
     _ -> [x]
 
-## PRIVATE
-
-   This is used to check if the new name is safe for use in SQL queries.
-
-   Arguments:
-   - name: The name to check for safety.
-   - action: The action to perform if the name is safe.
-
-   In a future version we will decouple the internal SQL-safe names from the
-   external names shown to the user, but as a temporary solution we only allow
-   SQL-safe names for columns.
-
-   # TODO [RW] better name handling in Tables (#1513)
-ensure_name_is_sane : Text -> (Any -> Any) -> Any ! Unsupported_Name
-ensure_name_is_sane name ~action =
-    is_safe = Pattern.matches "[A-Za-z_0-9]+" name
-    if is_safe then action else
-        Error.throw <| Unsupported_Name.Error (name + " is not a valid name for a column. Please use english letters, numbers and underscore only.")
-
 ## PRIVATE
 assume_default_locale : Locale -> Any -> Any ! Unsupported_Database_Operation
 assume_default_locale locale ~action =

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/JDBC_Connection.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/JDBC_Connection.enso
@@ -24,6 +24,7 @@ polyglot java import java.sql.SQLException
 polyglot java import java.sql.SQLTimeoutException
 
 polyglot java import org.enso.database.JDBCProxy
+polyglot java import org.enso.database.JDBCUtils
 
 type JDBC_Connection
     ## PRIVATE
@@ -179,7 +180,11 @@ set_statement_values stmt holes =
     holes.map_with_index ix-> obj->
         position = ix + 1
         case obj.first of
-            Nothing -> stmt.setNull position obj.second.typeid
+            Nothing ->
+                ## If we really don't have a clue what this should be, we choose a varchar for a blank column.
+                sql_type = if obj.second == SQL_Type.null then SQL_Type.text else obj.second
+                stmt.setNull position sql_type.typeid
+            _ : Date_Time -> stmt.setTimestamp position (JDBCUtils.getTimestamp obj.first)
             _ -> stmt.setObject position obj.first
 
 ## PRIVATE

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso
@@ -103,7 +103,7 @@ make_internal_generator_dialect =
     cases = [["LOWER", Base_Generator.make_function "LOWER"], ["UPPER", Base_Generator.make_function "UPPER"]]
     text = [starts_with, contains, ends_with, agg_shortest, agg_longest, make_case_sensitive]+concat_ops+cases
     counts = [agg_count_is_null, agg_count_empty, agg_count_not_empty, ["COUNT_DISTINCT", agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", agg_count_distinct_include_null]]
-    arith_extensions = [is_nan, decimal_div]
+    arith_extensions = [is_nan, decimal_div, mod_op,  ["ROW_MIN", Base_Generator.make_function "LEAST"], ["ROW_MAX", Base_Generator.make_function "GREATEST"]]
     bool = [bool_or]
 
     stddev_pop = ["STDDEV_POP", Base_Generator.make_function "stddev_pop"]
@@ -303,3 +303,7 @@ bool_or = Base_Generator.lift_unary_op "BOOL_OR" arg->
 ## PRIVATE
 decimal_div = Base_Generator.lift_binary_op "/" x-> y->
     code "CAST(" ++ x ++ " AS double precision) / CAST(" ++ y ++ " AS double precision)"
+
+## PRIVATE
+mod_op = Base_Generator.lift_binary_op "mod" x-> y->
+    x ++ " - FLOOR(CAST(" ++ x ++ " AS double precision) / CAST(" ++ y ++ " AS double precision)) * " ++ y
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso
@@ -122,7 +122,7 @@ make_internal_generator_dialect =
     text = [starts_with, contains, ends_with, make_case_sensitive]+concat_ops
     counts = [agg_count_is_null, agg_count_empty, agg_count_not_empty, ["COUNT_DISTINCT", agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", agg_count_distinct_include_null]]
     stats = [agg_stddev_pop, agg_stddev_samp]
-    arith_extensions = [decimal_div]
+    arith_extensions = [decimal_div, mod_op]
 
     bool = [bool_or]
     my_mappings = text + counts + stats + arith_extensions + bool
@@ -260,3 +260,7 @@ bool_or = Base_Generator.lift_unary_op "BOOL_OR" arg->
 ## PRIVATE
 decimal_div = Base_Generator.lift_binary_op "/" x-> y->
     code "CAST(" ++ x ++ " AS REAL) / CAST(" ++ y ++ " AS REAL)"
+
+## PRIVATE
+mod_op = Base_Generator.lift_binary_op "mod" x-> y->
+    x ++ " - FLOOR(CAST(" ++ x ++ " AS REAL) / CAST(" ++ y ++ " AS REAL)) * " ++ y
diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Expression.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Expression.enso
@@ -28,13 +28,6 @@ type Expression
         handle_parse_error <| handle_unsupported <| handle_arguments <|
             ExpressionVisitorImpl.evaluate expression get_column make_constant module_name type_name var_args_functions.to_array
 
-    ## PRIVATE
-       Converts an expression into a compatible column name.
-    to_column_name : Text -> Text
-    to_column_name expression =
-        expression.replace "[^A-Za-z_0-9]" "_" matcher=Regex_Matcher.Value
-
-
 type Expression_Error
     ## The expression supplied could not be parsed due to a syntax error.
     Syntax_Error message:Text line:Integer column:Integer

diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso
@@ -1136,8 +1136,7 @@ type Table
         make_constant value = Column.from_vector_repeated (UUID.randomUUID.to_text) [value] self.row_count
         new_column = Expression.evaluate expression get_column make_constant "Standard.Table.Data.Column" "Column" Column.var_args_functions
         problems = Warning.get_all new_column . map .value
-        new_name = Expression.to_column_name expression
-        result = new_column.rename new_name
+        result = new_column.rename expression
         on_problems.attach_problems_before problems <|
             Warning.set result []
 

diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Format.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Format.enso
@@ -41,7 +41,9 @@ type Excel_Format
          If set to `True`, the file is read as an Excel 95-2003 format.
          If set to `False`, the file is read as an Excel 2007+ format.
          `Infer` will attempt to deduce this from the extension of the filename.
-    Excel (section:Excel_Section=Excel_Section.Workbook) (headers:(Boolean|Infer)=Infer) (xls_format:(Boolean|Infer)=Infer)
+       - default_sheet: The default sheet to use if `section` is set to
+         `Excel_Section.Workbook`.
+    Excel (section:Excel_Section=Excel_Section.Workbook) (headers:(Boolean|Infer)=Infer) (xls_format:(Boolean|Infer)=Infer) (default_sheet:Text="EnsoSheet")
 
     ## If the File_Format supports reading from the file, return a configured instance.
     for_file : File -> Excel_Format | Nothing
@@ -90,12 +92,6 @@ type Excel_Format
             Excel_Section.Sheet_Names -> Error.throw (Illegal_Argument.Error "Sheet_Names cannot be used for `write`.")
             Excel_Section.Range_Names -> Error.throw (Illegal_Argument.Error "Range_Names cannot be used for `write`.")
             Excel_Section.Workbook ->
-                sheet_name = if file.exists.not then "Sheet1" else
-                    names = Excel_Reader.read_file file Excel_Section.Sheet_Names False on_problems format
-                    if names.length == 255 then Error.throw (Illegal_Argument.Error "Too many sheets in workbook.") else
-                        name_map = Map.from_vector (names.map n-> [n, 1])
-                        idx = 1.up_to 256 . find n-> (name_map.contains_key ("Sheet" + n.to_text) . not)
-                        "Sheet" + idx.to_text
-                Excel_Writer.write_file file table on_existing_file (Excel_Section.Worksheet sheet_name) True match_columns on_problems format
+                Excel_Writer.write_file file table on_existing_file (Excel_Section.Worksheet self.default_sheet) True match_columns on_problems format
             _ -> Excel_Writer.write_file file table on_existing_file self.section self.headers match_columns on_problems format
         r.if_not_error file