Skip to content

Commit

Permalink
Improving Expression Support for In Database (#5790)
Browse files Browse the repository at this point in the history
- Adjust Excel Workbook write behaviour.
- Support Nothing / Null constants.
- Deduce the type of arithmetic operations and `iif`.
- Allow Date_Time constants, treating as local timezone.
- Removed the `to_column_name` and `ensure_sane_name` code.
  • Loading branch information
jdunkerley authored Mar 3, 2023
1 parent ee981d2 commit 01fc34c
Show file tree
Hide file tree
Showing 15 changed files with 173 additions and 109 deletions.
46 changes: 30 additions & 16 deletions distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,9 @@ type Column
op = case other of
_ : Column -> if self.sql_type.is_definitely_numeric || other.sql_type.is_definitely_numeric then 'ADD_NUMBER' else 'ADD_TEXT'
_ -> if self.sql_type.is_definitely_numeric then 'ADD_NUMBER' else 'ADD_TEXT'
self.make_binary_op op other
new_type = if op == 'ADD_TEXT' then self.sql_type else
SQL_Type.merge_type self.sql_type (SQL_Type.approximate_type other)
self.make_binary_op op other new_type=new_type

## UNSTABLE

Expand All @@ -373,7 +375,9 @@ type Column
element of `self`. If `other` is a column, the operation is performed
pairwise between corresponding elements of `self` and `other`.
- : Column | Any -> Column
- self other = self.make_binary_op "-" other
- self other =
new_type = SQL_Type.merge_type self.sql_type (SQL_Type.approximate_type other)
self.make_binary_op "-" other new_type=new_type

## UNSTABLE

Expand All @@ -386,7 +390,9 @@ type Column
element of `self`. If `other` is a column, the operation is performed
pairwise between corresponding elements of `self` and `other`.
* : Column | Any -> Column
* self other = self.make_binary_op "*" other
* self other =
new_type = SQL_Type.merge_type self.sql_type (SQL_Type.approximate_type other)
self.make_binary_op "*" other new_type=new_type

## ALIAS Divide Columns

Expand Down Expand Up @@ -452,7 +458,9 @@ type Column
example_mod = Examples.integer_column % 3
% : Column | Any -> Column
% self other =
self.make_binary_op "%" other
new_type = SQL_Type.merge_type self.sql_type (SQL_Type.approximate_type other)
op = if new_type == SQL_Type.integer then "%" else "mod"
self.make_binary_op op other new_type=new_type

## ALIAS Power

Expand Down Expand Up @@ -480,7 +488,8 @@ type Column

example_div = Examples.decimal_column ^ Examples.integer_column
^ : Column | Any -> Column
^ self other = self.make_binary_op '^' other
^ self other =
self.make_binary_op '^' other new_type=SQL_Type.double

## UNSTABLE

Expand All @@ -494,7 +503,7 @@ type Column
operation is performed pairwise between corresponding elements of `self`
and `other`.
&& : Column | Any -> Column
&& self other = self.make_binary_op "AND" other
&& self other = self.make_binary_op "AND" other new_type=SQL_Type.boolean

## UNSTABLE

Expand All @@ -508,7 +517,7 @@ type Column
operation is performed pairwise between corresponding elements of `self`
and `other`.
|| : Column | Any -> Column
|| self other = self.make_binary_op "OR" other
|| self other = self.make_binary_op "OR" other new_type=SQL_Type.boolean

## UNSTABLE

Expand Down Expand Up @@ -537,8 +546,10 @@ type Column
common type.
left_type = get_approximate_type when_true self.sql_type
right_type = get_approximate_type when_false self.sql_type
if left_type != right_type then Error.throw (Illegal_Argument.Error "when_true and when_false types do not match") else
self.make_op "IIF" [when_true, when_false] new_type=left_type
new_type = SQL_Type.merge_type left_type right_type

if new_type.is_error then Error.throw (Illegal_Argument.Error "when_true and when_false types do not match") else
self.make_op "IIF" [when_true, when_false] new_type=new_type

## Returns a column of first non-`Nothing` value on each row of `self` and
`values` list.
Expand All @@ -555,8 +566,9 @@ type Column
coalesce : (Any | Vector Any) -> Column
coalesce self values = case values of
_ : Vector ->
if values.any (v->(self.sql_type != get_approximate_type v self.sql_type)) then Error.throw (Illegal_Argument.Error "self and values types do not all match") else
self.make_op "COALESCE" values new_type=self.sql_type
fold_type = values.fold self.sql_type c->v-> SQL_Type.merge_type c (get_approximate_type v self.sql_type)
if fold_type.is_error then Error.throw (Illegal_Argument.Error "self and values types do not all match") else
self.make_op "COALESCE" values new_type=fold_type
_ : Array -> self.coalesce (Vector.from_polyglot_array values)
_ -> self.coalesce [values]

Expand All @@ -574,8 +586,9 @@ type Column
min : (Any | Vector Any) -> Column
min self values = case values of
_ : Vector ->
if values.any (v->(self.sql_type != get_approximate_type v self.sql_type)) then Error.throw (Illegal_Argument.Error "self and values types do not all match") else
self.make_op "ROW_MIN" values new_type=self.sql_type
fold_type = values.fold self.sql_type c->v-> SQL_Type.merge_type c (get_approximate_type v self.sql_type)
if fold_type.is_error then Error.throw (Illegal_Argument.Error "self and values types do not all match") else
self.make_op "ROW_MIN" values new_type=fold_type
_ : Array -> self.min (Vector.from_polyglot_array values)
_ -> self.min [values]

Expand All @@ -593,8 +606,9 @@ type Column
max : (Any | Vector Any) -> Column
max self values = case values of
_ : Vector ->
if values.any (v->(self.sql_type != get_approximate_type v self.sql_type)) then Error.throw (Illegal_Argument.Error "self and values types do not all match") else
self.make_op "ROW_MAX" values new_type=self.sql_type
fold_type = values.fold self.sql_type c->v-> SQL_Type.merge_type c (get_approximate_type v self.sql_type)
if fold_type.is_error then Error.throw (Illegal_Argument.Error "self and values types do not all match") else
self.make_op "ROW_MAX" values new_type=fold_type
_ : Array -> self.max (Vector.from_polyglot_array values)
_ -> self.max [values]

Expand Down Expand Up @@ -662,7 +676,7 @@ type Column
Arguments:
- new_name: The name to rename `self` column to.
rename : Text -> Column ! Unsupported_Name
rename self new_name = Helpers.ensure_name_is_sane new_name <|
rename self new_name =
Column.Value new_name self.connection self.sql_type self.expression self.context

## UNSTABLE
Expand Down
66 changes: 56 additions & 10 deletions distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL_Type.enso
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from Standard.Base import all
import Standard.Base.Error.Illegal_Argument.Illegal_Argument

import project.Data.Column.Column

polyglot java import java.sql.Types

## Represents an internal SQL data-type.
Expand Down Expand Up @@ -30,6 +32,10 @@ type SQL_Type
bigint : SQL_Type
bigint = SQL_Type.Value Types.BIGINT "BIGINT"

## The SQL representation of the `TINYINT` type.
tinyint : SQL_Type
tinyint = SQL_Type.Value Types.TINYINT "TINYINT"

## The SQL representation of the `SMALLINT` type.
smallint : SQL_Type
smallint = SQL_Type.Value Types.SMALLINT "SMALLINT"
Expand All @@ -51,12 +57,6 @@ type SQL_Type
numeric = SQL_Type.Value Types.NUMERIC "NUMERIC"

## The SQL type representing one of the supported textual types.
varchar : SQL_Type
varchar = SQL_Type.Value Types.VARCHAR "VARCHAR"

## UNSTABLE
The SQL type representing one of the supported textual types.

It seems that JDBC treats the `TEXT` and `VARCHAR` types as interchangeable.
text : SQL_Type
text = SQL_Type.Value Types.VARCHAR "VARCHAR"
Expand All @@ -77,18 +77,38 @@ type SQL_Type
date_time : SQL_Type
date_time = SQL_Type.Value Types.TIMESTAMP_WITH_TIMEZONE "TIMESTAMP"

## The SQL type representing a null column.
null : SQL_Type
null = SQL_Type.Value Types.NULL "NULL"

## ADVANCED
Given an Enso value gets the approximate SQL type.
approximate_type : Any -> SQL_Type ! Illegal_Argument
approximate_type value = case value of
_ : Column -> value.sql_type
_ : Boolean -> SQL_Type.boolean
_ : Integer -> SQL_Type.integer
_ : Integer -> if value.abs >= 2^32 then SQL_Type.bigint else SQL_Type.integer
_ : Decimal -> SQL_Type.double
_ : Text -> SQL_Type.varchar
_ : Text -> SQL_Type.text
_ : Date -> SQL_Type.date
_ : Time_Of_Day -> SQL_Type.time_of_day
_ : Time_Of_Day -> SQL_Type.time
_ : Date_Time -> SQL_Type.date_time
_ -> Error.throw (Illegal_Argument.Error "Unsupported type.")
Nothing -> SQL_Type.null
_ -> Error.throw (Illegal_Argument.Error "Unsupported type.")

## PRIVATE
Returns the SQL type that is the result of applying an operation to the
two given types.
merge_type : SQL_Type -> SQL_Type -> SQL_Type ! Illegal_Argument
merge_type left right =
if left.typeid == right.typeid then left else
if left.is_null.not && right.is_null then left else
if left.is_null && right.is_null.not then right else
case left.is_definitely_numeric && right.is_definitely_numeric of
True -> if left.is_definitely_integer && right.is_definitely_integer then merge_integer_type left right else
merge_number_type left right
False -> if left.is_definitely_text && right.is_definitely_text then SQL_Type.text else
Error.throw (Illegal_Argument.Error "Unmatched types for operation.")

## PRIVATE

Expand Down Expand Up @@ -141,3 +161,29 @@ type SQL_Type
is_likely_text : Boolean
is_likely_text self =
self.is_definitely_text || self.name.contains "text" Case_Sensitivity.Insensitive

## PRIVATE
is_null : Boolean
is_null self = self.typeid == Types.NULL

## PRIVATE
Joins two integer SQL types into the larger one.
merge_integer_type : SQL_Type -> SQL_Type -> SQL_Type
merge_integer_type left right =
integer_types = [Types.TINYINT, Types.SMALLINT, Types.INTEGER, Types.BIGINT]
left_index = integer_types.index_of left.typeid
right_index = integer_types.index_of right.typeid
new_index = left_index.max right_index
[SQL_Type.tinyint, SQL_Type.smallint, SQL_Type.integer, SQL_Type.bigint].at new_index

## PRIVATE
Joins two numeric SQL types into the larger one.
One of the types must be non-integer (otherwise use merge_integer_type).
merge_number_type : SQL_Type -> SQL_Type -> SQL_Type
merge_number_type left right = if left.is_definitely_integer then merge_number_type right left else
numeric_types = [Types.NUMERIC, Types.DECIMAL, Types.FLOAT, Types.REAL, Types.DOUBLE]
left_index = numeric_types.index_of left.typeid
right_index = numeric_types.index_of right.typeid
if right_index.is_nothing then left else
new_index = left_index.max right_index
[SQL_Type.numeric, SQL_Type.decimal, SQL_Type.real, SQL_Type.real, SQL_Type.double].at new_index
Original file line number Diff line number Diff line change
Expand Up @@ -630,7 +630,7 @@ type Table
_ -> column
renamed = if new_name.is_nothing then resolved else resolved.rename new_name

Helpers.ensure_name_is_sane renamed.name <|
renamed.if_not_error <|
index = self.internal_columns.index_of (c -> c.name == renamed.name)
to_add = case set_mode of
Set_Mode.Add_Or_Update -> True
Expand Down Expand Up @@ -673,8 +673,7 @@ type Table
Column.Value ("Constant_" + UUID.randomUUID.to_text) self.connection new_type other self.context
new_column = Expression.evaluate expression get_column make_constant "Standard.Database.Data.Column" "Column" Column.var_args_functions
problems = Warning.get_all new_column . map .value
new_name = Expression.to_column_name expression
result = new_column.rename new_name
result = new_column.rename expression
on_problems.attach_problems_before problems <|
Warning.set result []

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ base_dialect =
unary = name -> [name, make_unary_op name]
fun = name -> [name, make_function name]

arith = [["ADD_NUMBER", make_binary_op "+"], ["ADD_TEXT", make_binary_op "||"], bin "-", bin "*", bin "/", bin "%", ["^", make_function "POWER"]]
arith = [["ADD_NUMBER", make_binary_op "+"], ["ADD_TEXT", make_binary_op "||"], bin "-", bin "*", bin "/", bin "%", ["mod", make_function "MOD"], ["^", make_function "POWER"]]
logic = [bin "AND", bin "OR", unary "NOT", ["IIF", make_iif], ["TRUE", make_constant "TRUE"], ["FALSE", make_constant "FALSE"]]
eq = lift_binary_op "==" make_equals
neq = lift_binary_op "!=" make_not_equals
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,25 +49,6 @@ unify_vector_singleton x = case x of
_ : Vector -> x
_ -> [x]

## PRIVATE

This is used to check if the new name is safe for use in SQL queries.

Arguments:
- name: The name to check for safety.
- action: The action to perform if the name is safe.

In a future version we will decouple the internal SQL-safe names from the
external names shown to the user, but as a temporary solution we only allow
SQL-safe names for columns.

# TODO [RW] better name handling in Tables (#1513)
ensure_name_is_sane : Text -> (Any -> Any) -> Any ! Unsupported_Name
ensure_name_is_sane name ~action =
is_safe = Pattern.matches "[A-Za-z_0-9]+" name
if is_safe then action else
Error.throw <| Unsupported_Name.Error (name + " is not a valid name for a column. Please use english letters, numbers and underscore only.")

## PRIVATE
assume_default_locale : Locale -> Any -> Any ! Unsupported_Database_Operation
assume_default_locale locale ~action =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ polyglot java import java.sql.SQLException
polyglot java import java.sql.SQLTimeoutException

polyglot java import org.enso.database.JDBCProxy
polyglot java import org.enso.database.JDBCUtils

type JDBC_Connection
## PRIVATE
Expand Down Expand Up @@ -179,7 +180,11 @@ set_statement_values stmt holes =
holes.map_with_index ix-> obj->
position = ix + 1
case obj.first of
Nothing -> stmt.setNull position obj.second.typeid
Nothing ->
## If we really don't have a clue what this should be, we choose a varchar for a blank column.
sql_type = if obj.second == SQL_Type.null then SQL_Type.text else obj.second
stmt.setNull position sql_type.typeid
_ : Date_Time -> stmt.setTimestamp position (JDBCUtils.getTimestamp obj.first)
_ -> stmt.setObject position obj.first

## PRIVATE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ make_internal_generator_dialect =
cases = [["LOWER", Base_Generator.make_function "LOWER"], ["UPPER", Base_Generator.make_function "UPPER"]]
text = [starts_with, contains, ends_with, agg_shortest, agg_longest, make_case_sensitive]+concat_ops+cases
counts = [agg_count_is_null, agg_count_empty, agg_count_not_empty, ["COUNT_DISTINCT", agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", agg_count_distinct_include_null]]
arith_extensions = [is_nan, decimal_div]
arith_extensions = [is_nan, decimal_div, mod_op, ["ROW_MIN", Base_Generator.make_function "LEAST"], ["ROW_MAX", Base_Generator.make_function "GREATEST"]]
bool = [bool_or]

stddev_pop = ["STDDEV_POP", Base_Generator.make_function "stddev_pop"]
Expand Down Expand Up @@ -303,3 +303,7 @@ bool_or = Base_Generator.lift_unary_op "BOOL_OR" arg->
## PRIVATE
decimal_div = Base_Generator.lift_binary_op "/" x-> y->
code "CAST(" ++ x ++ " AS double precision) / CAST(" ++ y ++ " AS double precision)"

## PRIVATE
mod_op = Base_Generator.lift_binary_op "mod" x-> y->
x ++ " - FLOOR(CAST(" ++ x ++ " AS double precision) / CAST(" ++ y ++ " AS double precision)) * " ++ y
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ make_internal_generator_dialect =
text = [starts_with, contains, ends_with, make_case_sensitive]+concat_ops
counts = [agg_count_is_null, agg_count_empty, agg_count_not_empty, ["COUNT_DISTINCT", agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", agg_count_distinct_include_null]]
stats = [agg_stddev_pop, agg_stddev_samp]
arith_extensions = [decimal_div]
arith_extensions = [decimal_div, mod_op]

bool = [bool_or]
my_mappings = text + counts + stats + arith_extensions + bool
Expand Down Expand Up @@ -260,3 +260,7 @@ bool_or = Base_Generator.lift_unary_op "BOOL_OR" arg->
## PRIVATE
decimal_div = Base_Generator.lift_binary_op "/" x-> y->
code "CAST(" ++ x ++ " AS REAL) / CAST(" ++ y ++ " AS REAL)"

## PRIVATE
mod_op = Base_Generator.lift_binary_op "mod" x-> y->
x ++ " - FLOOR(CAST(" ++ x ++ " AS REAL) / CAST(" ++ y ++ " AS REAL)) * " ++ y
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,6 @@ type Expression
handle_parse_error <| handle_unsupported <| handle_arguments <|
ExpressionVisitorImpl.evaluate expression get_column make_constant module_name type_name var_args_functions.to_array

## PRIVATE
Converts an expression into a compatible column name.
to_column_name : Text -> Text
to_column_name expression =
expression.replace "[^A-Za-z_0-9]" "_" matcher=Regex_Matcher.Value


type Expression_Error
## The expression supplied could not be parsed due to a syntax error.
Syntax_Error message:Text line:Integer column:Integer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1136,8 +1136,7 @@ type Table
make_constant value = Column.from_vector_repeated (UUID.randomUUID.to_text) [value] self.row_count
new_column = Expression.evaluate expression get_column make_constant "Standard.Table.Data.Column" "Column" Column.var_args_functions
problems = Warning.get_all new_column . map .value
new_name = Expression.to_column_name expression
result = new_column.rename new_name
result = new_column.rename expression
on_problems.attach_problems_before problems <|
Warning.set result []

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ type Excel_Format
If set to `True`, the file is read as an Excel 95-2003 format.
If set to `False`, the file is read as an Excel 2007+ format.
`Infer` will attempt to deduce this from the extension of the filename.
Excel (section:Excel_Section=Excel_Section.Workbook) (headers:(Boolean|Infer)=Infer) (xls_format:(Boolean|Infer)=Infer)
- default_sheet: The default sheet to use if `section` is set to
`Excel_Section.Workbook`.
Excel (section:Excel_Section=Excel_Section.Workbook) (headers:(Boolean|Infer)=Infer) (xls_format:(Boolean|Infer)=Infer) (default_sheet:Text="EnsoSheet")

## If the File_Format supports reading from the file, return a configured instance.
for_file : File -> Excel_Format | Nothing
Expand Down Expand Up @@ -90,12 +92,6 @@ type Excel_Format
Excel_Section.Sheet_Names -> Error.throw (Illegal_Argument.Error "Sheet_Names cannot be used for `write`.")
Excel_Section.Range_Names -> Error.throw (Illegal_Argument.Error "Range_Names cannot be used for `write`.")
Excel_Section.Workbook ->
sheet_name = if file.exists.not then "Sheet1" else
names = Excel_Reader.read_file file Excel_Section.Sheet_Names False on_problems format
if names.length == 255 then Error.throw (Illegal_Argument.Error "Too many sheets in workbook.") else
name_map = Map.from_vector (names.map n-> [n, 1])
idx = 1.up_to 256 . find n-> (name_map.contains_key ("Sheet" + n.to_text) . not)
"Sheet" + idx.to_text
Excel_Writer.write_file file table on_existing_file (Excel_Section.Worksheet sheet_name) True match_columns on_problems format
Excel_Writer.write_file file table on_existing_file (Excel_Section.Worksheet self.default_sheet) True match_columns on_problems format
_ -> Excel_Writer.write_file file table on_existing_file self.section self.headers match_columns on_problems format
r.if_not_error file
Loading

0 comments on commit 01fc34c

Please sign in to comment.