From de09b18416fdb38a55b0f74d3f9964de7a36138d Mon Sep 17 00:00:00 2001 From: tangruilin Date: Sun, 10 Mar 2024 23:48:33 +0800 Subject: [PATCH] [task #9539] Move starts_with, to_hex, trim, upper to datafusion-functions Signed-off-by: tangruilin --- .../tests/dataframe/dataframe_functions.rs | 39 --- datafusion/expr/src/built_in_function.rs | 56 +--- datafusion/expr/src/expr_fn.rs | 16 - datafusion/functions/Cargo.toml | 6 +- datafusion/functions/src/lib.rs | 7 +- datafusion/functions/src/string/mod.rs | 287 ++++++++++++++++++ .../functions/src/string/starts_with.rs | 89 ++++++ datafusion/functions/src/string/to_hex.rs | 154 ++++++++++ datafusion/functions/src/string/trim.rs | 78 +++++ datafusion/functions/src/string/upper.rs | 66 ++++ datafusion/physical-expr/src/functions.rs | 94 ------ .../physical-expr/src/string_expressions.rs | 77 +---- datafusion/proto/proto/datafusion.proto | 8 +- datafusion/proto/src/generated/pbjson.rs | 12 - datafusion/proto/src/generated/prost.rs | 16 +- .../proto/src/logical_plan/from_proto.rs | 22 +- datafusion/proto/src/logical_plan/to_proto.rs | 4 - datafusion/sql/src/expr/mod.rs | 2 +- 18 files changed, 713 insertions(+), 320 deletions(-) create mode 100644 datafusion/functions/src/string/mod.rs create mode 100644 datafusion/functions/src/string/starts_with.rs create mode 100644 datafusion/functions/src/string/to_hex.rs create mode 100644 datafusion/functions/src/string/trim.rs create mode 100644 datafusion/functions/src/string/upper.rs diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs b/datafusion/core/tests/dataframe/dataframe_functions.rs index cea7014929101..33aeac859b3df 100644 --- a/datafusion/core/tests/dataframe/dataframe_functions.rs +++ b/datafusion/core/tests/dataframe/dataframe_functions.rs @@ -650,26 +650,6 @@ async fn test_fn_split_part() -> Result<()> { Ok(()) } -#[tokio::test] -async fn test_fn_starts_with() -> Result<()> { - let expr = starts_with(col("a"), lit("abc")); - - let expected = [ - "+---------------------------------+", - "| starts_with(test.a,Utf8(\"abc\")) |", - "+---------------------------------+", - "| true |", - "| true |", - "| false |", - "| false |", - "+---------------------------------+", - ]; - - assert_fn_batches!(expr, expected); - - Ok(()) -} - #[tokio::test] async fn test_fn_ends_with() -> Result<()> { let expr = ends_with(col("a"), lit("DEF")); @@ -749,25 +729,6 @@ async fn test_cast() -> Result<()> { Ok(()) } -#[tokio::test] -async fn test_fn_to_hex() -> Result<()> { - let expr = to_hex(col("b")); - - let expected = [ - "+----------------+", - "| to_hex(test.b) |", - "+----------------+", - "| 1 |", - "| a |", - "| a |", - "| 64 |", - "+----------------+", - ]; - assert_fn_batches!(expr, expected); - - Ok(()) -} - #[tokio::test] #[cfg(feature = "unicode_expressions")] async fn test_fn_translate() -> Result<()> { diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index cf39a244c8dbc..7e0f988e67b6e 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -182,20 +182,14 @@ pub enum BuiltinScalarFunction { Rtrim, /// split_part SplitPart, - /// starts_with - StartsWith, /// strpos Strpos, /// substr Substr, - /// to_hex - ToHex, /// make_date MakeDate, /// translate Translate, - /// trim - Trim, /// upper Upper, /// uuid @@ -331,15 +325,11 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Rpad => Volatility::Immutable, BuiltinScalarFunction::Rtrim => Volatility::Immutable, BuiltinScalarFunction::SplitPart => Volatility::Immutable, - BuiltinScalarFunction::StartsWith => Volatility::Immutable, BuiltinScalarFunction::Strpos => Volatility::Immutable, BuiltinScalarFunction::Substr => Volatility::Immutable, - BuiltinScalarFunction::ToHex => Volatility::Immutable, BuiltinScalarFunction::ToChar => Volatility::Immutable, BuiltinScalarFunction::MakeDate => Volatility::Immutable, BuiltinScalarFunction::Translate => Volatility::Immutable, - BuiltinScalarFunction::Trim => Volatility::Immutable, - BuiltinScalarFunction::Upper => Volatility::Immutable, BuiltinScalarFunction::OverLay => Volatility::Immutable, BuiltinScalarFunction::Levenshtein => Volatility::Immutable, BuiltinScalarFunction::SubstrIndex => Volatility::Immutable, @@ -470,7 +460,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::SplitPart => { utf8_to_str_type(&input_expr_types[0], "split_part") } - BuiltinScalarFunction::StartsWith => Ok(Boolean), BuiltinScalarFunction::EndsWith => Ok(Boolean), BuiltinScalarFunction::Strpos => { utf8_to_int_type(&input_expr_types[0], "strpos/instr/position") @@ -478,12 +467,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Substr => { utf8_to_str_type(&input_expr_types[0], "substr") } - BuiltinScalarFunction::ToHex => Ok(match input_expr_types[0] { - Int8 | Int16 | Int32 | Int64 => Utf8, - _ => { - return plan_err!("The to_hex function can only accept integers."); - } - }), BuiltinScalarFunction::SubstrIndex => { utf8_to_str_type(&input_expr_types[0], "substr_index") } @@ -495,10 +478,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Translate => { utf8_to_str_type(&input_expr_types[0], "translate") } - BuiltinScalarFunction::Trim => utf8_to_str_type(&input_expr_types[0], "trim"), - BuiltinScalarFunction::Upper => { - utf8_to_str_type(&input_expr_types[0], "upper") - } BuiltinScalarFunction::Factorial | BuiltinScalarFunction::Gcd @@ -619,17 +598,13 @@ impl BuiltinScalarFunction { | BuiltinScalarFunction::Lower | BuiltinScalarFunction::OctetLength | BuiltinScalarFunction::Reverse - | BuiltinScalarFunction::Upper => { - Signature::uniform(1, vec![Utf8, LargeUtf8], self.volatility()) - } BuiltinScalarFunction::Btrim | BuiltinScalarFunction::Ltrim - | BuiltinScalarFunction::Rtrim - | BuiltinScalarFunction::Trim => Signature::one_of( + | BuiltinScalarFunction::Rtrim => Signature::one_of( vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])], self.volatility(), ), - BuiltinScalarFunction::Chr | BuiltinScalarFunction::ToHex => { + BuiltinScalarFunction::Chr => { Signature::uniform(1, vec![Int64], self.volatility()) } BuiltinScalarFunction::Lpad | BuiltinScalarFunction::Rpad => { @@ -696,17 +671,18 @@ impl BuiltinScalarFunction { self.volatility(), ), - BuiltinScalarFunction::EndsWith - | BuiltinScalarFunction::Strpos - | BuiltinScalarFunction::StartsWith => Signature::one_of( - vec![ - Exact(vec![Utf8, Utf8]), - Exact(vec![Utf8, LargeUtf8]), - Exact(vec![LargeUtf8, Utf8]), - Exact(vec![LargeUtf8, LargeUtf8]), - ], - self.volatility(), - ), + + BuiltinScalarFunction::EndsWith | BuiltinScalarFunction::Strpos => { + Signature::one_of( + vec![ + Exact(vec![Utf8, Utf8]), + Exact(vec![Utf8, LargeUtf8]), + Exact(vec![LargeUtf8, Utf8]), + Exact(vec![LargeUtf8, LargeUtf8]), + ], + self.volatility(), + ) + } BuiltinScalarFunction::Substr => Signature::one_of( vec![ @@ -931,13 +907,9 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Rpad => &["rpad"], BuiltinScalarFunction::Rtrim => &["rtrim"], BuiltinScalarFunction::SplitPart => &["split_part"], - BuiltinScalarFunction::StartsWith => &["starts_with"], BuiltinScalarFunction::Strpos => &["strpos", "instr", "position"], BuiltinScalarFunction::Substr => &["substr"], - BuiltinScalarFunction::ToHex => &["to_hex"], BuiltinScalarFunction::Translate => &["translate"], - BuiltinScalarFunction::Trim => &["trim"], - BuiltinScalarFunction::Upper => &["upper"], BuiltinScalarFunction::Uuid => &["uuid"], BuiltinScalarFunction::Levenshtein => &["levenshtein"], BuiltinScalarFunction::SubstrIndex => &["substr_index", "substring_index"], diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index f35b663edf24d..c1e6f522b67af 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -575,12 +575,6 @@ scalar_expr!(Log10, log10, num, "base 10 logarithm of number"); scalar_expr!(Ln, ln, num, "natural logarithm (base e) of number"); scalar_expr!(Power, power, base exponent, "`base` raised to the power of `exponent`"); scalar_expr!(Atan2, atan2, y x, "inverse tangent of a division given in the argument"); -scalar_expr!( - ToHex, - to_hex, - num, - "returns the hexdecimal representation of an integer" -); scalar_expr!(Uuid, uuid, , "returns uuid v4 as a string value"); scalar_expr!(Log, log, base x, "logarithm of a `x` for a particular `base`"); @@ -725,18 +719,11 @@ scalar_expr!( "removes all characters, spaces by default, from the end of a string" ); scalar_expr!(SplitPart, split_part, string delimiter index, "splits a string based on a delimiter and picks out the desired field based on the index."); -scalar_expr!(StartsWith, starts_with, string prefix, "whether the `string` starts with the `prefix`"); scalar_expr!(EndsWith, ends_with, string suffix, "whether the `string` ends with the `suffix`"); scalar_expr!(Strpos, strpos, string substring, "finds the position from where the `substring` matches the `string`"); scalar_expr!(Substr, substr, string position, "substring from the `position` to the end"); scalar_expr!(Substr, substring, string position length, "substring from the `position` with `length` characters"); scalar_expr!(Translate, translate, string from to, "replaces the characters in `from` with the counterpart in `to`"); -scalar_expr!( - Trim, - trim, - string, - "removes all characters, space by default from the string" -); scalar_expr!(Upper, upper, string, "converts the string to upper case"); //use vec as parameter nary_scalar_expr!( @@ -1220,14 +1207,11 @@ mod test { test_nary_scalar_expr!(Rpad, rpad, string, count, characters); test_scalar_expr!(Rtrim, rtrim, string); test_scalar_expr!(SplitPart, split_part, expr, delimiter, index); - test_scalar_expr!(StartsWith, starts_with, string, characters); test_scalar_expr!(EndsWith, ends_with, string, characters); test_scalar_expr!(Strpos, strpos, string, substring); test_scalar_expr!(Substr, substr, string, position); test_scalar_expr!(Substr, substring, string, position, count); - test_scalar_expr!(ToHex, to_hex, string); test_scalar_expr!(Translate, translate, string, from, to); - test_scalar_expr!(Trim, trim, string); test_scalar_expr!(Upper, upper, string); test_scalar_expr!(ArrayPopFront, array_pop_front, array); diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml index 92c80208e35f8..bfb52d1814966 100644 --- a/datafusion/functions/Cargo.toml +++ b/datafusion/functions/Cargo.toml @@ -29,11 +29,12 @@ authors = { workspace = true } rust-version = { workspace = true } [features] +# enable string functions +string_expressions = [] # enable core functions core_expressions = [] # enable datetime functions datetime_expressions = [] -# Enable encoding by default so the doctests work. In general don't automatically enable all packages. default = [ "core_expressions", "datetime_expressions", @@ -41,7 +42,8 @@ default = [ "math_expressions", "regex_expressions", "crypto_expressions", -] + "string_expressions", +] # Enable encoding by default so the doctests work. In general don't automatically enable all packages. # enable encode/decode functions encoding_expressions = ["base64", "hex"] # enable math functions diff --git a/datafusion/functions/src/lib.rs b/datafusion/functions/src/lib.rs index 3a2eab8e5f05b..507f899446d21 100644 --- a/datafusion/functions/src/lib.rs +++ b/datafusion/functions/src/lib.rs @@ -84,6 +84,10 @@ use log::debug; #[macro_use] pub mod macros; +#[cfg(feature = "string_expressions")] +pub mod string; +make_stub_package!(string, "string_expressions"); + /// Core datafusion expressions /// Enabled via feature flag `core_expressions` #[cfg(feature = "core_expressions")] @@ -144,7 +148,8 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> { .chain(encoding::functions()) .chain(math::functions()) .chain(regex::functions()) - .chain(crypto::functions()); + .chain(crypto::functions()) + .chain(string::functions()); all_functions.try_for_each(|udf| { let existing_udf = registry.register_udf(udf)?; diff --git a/datafusion/functions/src/string/mod.rs b/datafusion/functions/src/string/mod.rs new file mode 100644 index 0000000000000..e85e0ae165f59 --- /dev/null +++ b/datafusion/functions/src/string/mod.rs @@ -0,0 +1,287 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::datatypes::DataType; +use arrow_array::{Array, ArrayRef, GenericStringArray, OffsetSizeTrait}; +use datafusion_common::{ + cast::as_generic_string_array, exec_err, plan_err, Result, ScalarValue, +}; +use datafusion_expr::{ColumnarValue, ScalarFunctionImplementation}; +use datafusion_physical_expr::functions::Hint; +use std::{ + fmt::{Display, Formatter}, + sync::Arc, +}; + +/// Creates a function to identify the optimal return type of a string function given +/// the type of its first argument. +/// +/// If the input type is `LargeUtf8` or `LargeBinary` the return type is +/// `$largeUtf8Type`, +/// +/// If the input type is `Utf8` or `Binary` the return type is `$utf8Type`, +macro_rules! get_optimal_return_type { + ($FUNC:ident, $largeUtf8Type:expr, $utf8Type:expr) => { + fn $FUNC(arg_type: &DataType, name: &str) -> Result { + Ok(match arg_type { + // LargeBinary inputs are automatically coerced to Utf8 + DataType::LargeUtf8 | DataType::LargeBinary => $largeUtf8Type, + // Binary inputs are automatically coerced to Utf8 + DataType::Utf8 | DataType::Binary => $utf8Type, + DataType::Null => DataType::Null, + DataType::Dictionary(_, value_type) => match **value_type { + DataType::LargeUtf8 | DataType::LargeBinary => $largeUtf8Type, + DataType::Utf8 | DataType::Binary => $utf8Type, + DataType::Null => DataType::Null, + _ => { + return plan_err!( + "The {} function can only accept strings, but got {:?}.", + name.to_uppercase(), + **value_type + ); + } + }, + data_type => { + return plan_err!( + "The {} function can only accept strings, but got {:?}.", + name.to_uppercase(), + data_type + ); + } + }) + } + }; +} + +// `utf8_to_str_type`: returns either a Utf8 or LargeUtf8 based on the input type size. +get_optimal_return_type!(utf8_to_str_type, DataType::LargeUtf8, DataType::Utf8); + +/// applies a unary expression to `args[0]` that is expected to be downcastable to +/// a `GenericStringArray` and returns a `GenericStringArray` (which may have a different offset) +/// # Errors +/// This function errors when: +/// * the number of arguments is not 1 +/// * the first argument is not castable to a `GenericStringArray` +pub(crate) fn unary_string_function<'a, T, O, F, R>( + args: &[&'a dyn Array], + op: F, + name: &str, +) -> Result> +where + R: AsRef, + O: OffsetSizeTrait, + T: OffsetSizeTrait, + F: Fn(&'a str) -> R, +{ + if args.len() != 1 { + return exec_err!( + "{:?} args were supplied but {} takes exactly one argument", + args.len(), + name + ); + } + + let string_array = as_generic_string_array::(args[0])?; + + // first map is the iterator, second is for the `Option<_>` + Ok(string_array.iter().map(|string| string.map(&op)).collect()) +} + +fn handle<'a, F, R>(args: &'a [ColumnarValue], op: F, name: &str) -> Result +where + R: AsRef, + F: Fn(&'a str) -> R, +{ + match &args[0] { + ColumnarValue::Array(a) => match a.data_type() { + DataType::Utf8 => { + Ok(ColumnarValue::Array(Arc::new(unary_string_function::< + i32, + i32, + _, + _, + >( + &[a.as_ref()], op, name + )?))) + } + DataType::LargeUtf8 => { + Ok(ColumnarValue::Array(Arc::new(unary_string_function::< + i64, + i64, + _, + _, + >( + &[a.as_ref()], op, name + )?))) + } + other => exec_err!("Unsupported data type {other:?} for function {name}"), + }, + ColumnarValue::Scalar(scalar) => match scalar { + ScalarValue::Utf8(a) => { + let result = a.as_ref().map(|x| (op)(x).as_ref().to_string()); + Ok(ColumnarValue::Scalar(ScalarValue::Utf8(result))) + } + ScalarValue::LargeUtf8(a) => { + let result = a.as_ref().map(|x| (op)(x).as_ref().to_string()); + Ok(ColumnarValue::Scalar(ScalarValue::LargeUtf8(result))) + } + other => exec_err!("Unsupported data type {other:?} for function {name}"), + }, + } +} + +enum TrimType { + Left, + Right, + Both, +} + +impl Display for TrimType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + TrimType::Left => write!(f, "ltrim"), + TrimType::Right => write!(f, "rtrim"), + TrimType::Both => write!(f, "btrim"), + } + } +} + +fn general_trim( + args: &[ArrayRef], + trim_type: TrimType, +) -> Result { + let func = match trim_type { + TrimType::Left => |input, pattern: &str| { + let pattern = pattern.chars().collect::>(); + str::trim_start_matches::<&[char]>(input, pattern.as_ref()) + }, + TrimType::Right => |input, pattern: &str| { + let pattern = pattern.chars().collect::>(); + str::trim_end_matches::<&[char]>(input, pattern.as_ref()) + }, + TrimType::Both => |input, pattern: &str| { + let pattern = pattern.chars().collect::>(); + str::trim_end_matches::<&[char]>( + str::trim_start_matches::<&[char]>(input, pattern.as_ref()), + pattern.as_ref(), + ) + }, + }; + + let string_array = as_generic_string_array::(&args[0])?; + + match args.len() { + 1 => { + let result = string_array + .iter() + .map(|string| string.map(|string: &str| func(string, " "))) + .collect::>(); + + Ok(Arc::new(result) as ArrayRef) + } + 2 => { + let characters_array = as_generic_string_array::(&args[1])?; + + let result = string_array + .iter() + .zip(characters_array.iter()) + .map(|(string, characters)| match (string, characters) { + (Some(string), Some(characters)) => Some(func(string, characters)), + _ => None, + }) + .collect::>(); + + Ok(Arc::new(result) as ArrayRef) + } + other => { + exec_err!( + "{trim_type} was called with {other} arguments. It requires at least 1 and at most 2." + ) + } + } +} + +pub(super) fn make_scalar_function( + inner: F, + hints: Vec, +) -> ScalarFunctionImplementation +where + F: Fn(&[ArrayRef]) -> Result + Sync + Send + 'static, +{ + Arc::new(move |args: &[ColumnarValue]| { + // first, identify if any of the arguments is an Array. If yes, store its `len`, + // as any scalar will need to be converted to an array of len `len`. + let len = args + .iter() + .fold(Option::::None, |acc, arg| match arg { + ColumnarValue::Scalar(_) => acc, + ColumnarValue::Array(a) => Some(a.len()), + }); + + let is_scalar = len.is_none(); + + let inferred_length = len.unwrap_or(1); + let args = args + .iter() + .zip(hints.iter().chain(std::iter::repeat(&Hint::Pad))) + .map(|(arg, hint)| { + // Decide on the length to expand this scalar to depending + // on the given hints. + let expansion_len = match hint { + Hint::AcceptsSingular => 1, + Hint::Pad => inferred_length, + }; + arg.clone().into_array(expansion_len) + }) + .collect::>>()?; + + let result = (inner)(&args); + if is_scalar { + // If all inputs are scalar, keeps output as scalar + let result = result.and_then(|arr| ScalarValue::try_from_array(&arr, 0)); + result.map(ColumnarValue::Scalar) + } else { + result.map(ColumnarValue::Array) + } + }) +} + +mod starts_with; +mod to_hex; +mod trim; +mod upper; +// create UDFs +make_udf_function!(starts_with::StartsWithFunc, STARTS_WITH, starts_with); +make_udf_function!(to_hex::ToHexFunc, TO_HEX, to_hex); +make_udf_function!(trim::TrimFunc, TRIM, trim); +make_udf_function!(upper::UpperFunc, UPPER, upper); + +export_functions!( + ( + starts_with, + arg1 arg2, + "Returns true if string starts with prefix."), + ( + to_hex, + arg1, + "Converts an integer to a hexadecimal string."), + (trim, + arg1, + "removes all characters, space by default from the string"), + (upper, + arg1, + "Converts a string to uppercase.")); diff --git a/datafusion/functions/src/string/starts_with.rs b/datafusion/functions/src/string/starts_with.rs new file mode 100644 index 0000000000000..1fce399d1e701 --- /dev/null +++ b/datafusion/functions/src/string/starts_with.rs @@ -0,0 +1,89 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::{ArrayRef, OffsetSizeTrait}; +use arrow::datatypes::DataType; +use datafusion_common::{cast::as_generic_string_array, internal_err, Result}; +use datafusion_expr::ColumnarValue; +use datafusion_expr::TypeSignature::*; +use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use std::any::Any; +use std::sync::Arc; + +use crate::string::make_scalar_function; + +/// Returns true if string starts with prefix. +/// starts_with('alphabet', 'alph') = 't' +pub fn starts_with(args: &[ArrayRef]) -> Result { + let left = as_generic_string_array::(&args[0])?; + let right = as_generic_string_array::(&args[1])?; + + let result = arrow::compute::kernels::comparison::starts_with(left, right)?; + + Ok(Arc::new(result) as ArrayRef) +} + +#[derive(Debug)] +pub(super) struct StartsWithFunc { + signature: Signature, +} +impl StartsWithFunc { + pub fn new() -> Self { + use DataType::*; + Self { + signature: Signature::one_of( + vec![ + Exact(vec![Utf8, Utf8]), + Exact(vec![Utf8, LargeUtf8]), + Exact(vec![LargeUtf8, Utf8]), + Exact(vec![LargeUtf8, LargeUtf8]), + ], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for StartsWithFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "starts_with" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + use DataType::*; + + Ok(Boolean) + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + match args[0].data_type() { + DataType::Utf8 => make_scalar_function(starts_with::, vec![])(args), + DataType::LargeUtf8 => { + return make_scalar_function(starts_with::, vec![])(args); + } + _ => internal_err!("Unsupported data type"), + } + } +} diff --git a/datafusion/functions/src/string/to_hex.rs b/datafusion/functions/src/string/to_hex.rs new file mode 100644 index 0000000000000..1d8f29e0907bf --- /dev/null +++ b/datafusion/functions/src/string/to_hex.rs @@ -0,0 +1,154 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::{ArrayRef, OffsetSizeTrait}; +use arrow::datatypes::{ + ArrowNativeType, ArrowPrimitiveType, DataType, Int32Type, Int64Type, +}; +use arrow_array::GenericStringArray; +use datafusion_common::cast::as_primitive_array; +use datafusion_common::Result; +use datafusion_common::{exec_err, plan_err}; +use datafusion_expr::ColumnarValue; +use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use std::any::Any; +use std::sync::Arc; + +use super::make_scalar_function; + +/// Converts the number to its equivalent hexadecimal representation. +/// to_hex(2147483647) = '7fffffff' +pub fn to_hex(args: &[ArrayRef]) -> Result +where + T::Native: OffsetSizeTrait, +{ + let integer_array = as_primitive_array::(&args[0])?; + + let result = integer_array + .iter() + .map(|integer| { + if let Some(value) = integer { + if let Some(value_usize) = value.to_usize() { + Ok(Some(format!("{value_usize:x}"))) + } else if let Some(value_isize) = value.to_isize() { + Ok(Some(format!("{value_isize:x}"))) + } else { + exec_err!("Unsupported data type {integer:?} for function to_hex") + } + } else { + Ok(None) + } + }) + .collect::>>()?; + + Ok(Arc::new(result) as ArrayRef) +} + +#[derive(Debug)] +pub(super) struct ToHexFunc { + signature: Signature, +} +impl ToHexFunc { + pub fn new() -> Self { + use DataType::*; + Self { + signature: Signature::uniform(1, vec![Int64], Volatility::Immutable), + } + } +} + +impl ScalarUDFImpl for ToHexFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "to_hex" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + use DataType::*; + + Ok(match arg_types[0] { + Int8 | Int16 | Int32 | Int64 => Utf8, + _ => { + return plan_err!("The to_hex function can only accept integers."); + } + }) + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + match args[0].data_type() { + DataType::Int32 => make_scalar_function(to_hex::, vec![])(args), + DataType::Int64 => make_scalar_function(to_hex::, vec![])(args), + other => exec_err!("Unsupported data type {other:?} for function to_hex"), + } + } +} + +#[cfg(test)] +mod tests { + use arrow::{array::Int32Array, datatypes::Int32Type}; + use arrow_array::StringArray; + + use datafusion_common::cast::as_string_array; + + use super::*; + + #[test] + // Test to_hex function for zero + fn to_hex_zero() -> Result<()> { + let array = vec![0].into_iter().collect::(); + let array_ref = Arc::new(array); + let hex_value_arc = to_hex::(&[array_ref])?; + let hex_value = as_string_array(&hex_value_arc)?; + let expected = StringArray::from(vec![Some("0")]); + assert_eq!(&expected, hex_value); + + Ok(()) + } + + #[test] + // Test to_hex function for positive number + fn to_hex_positive_number() -> Result<()> { + let array = vec![100].into_iter().collect::(); + let array_ref = Arc::new(array); + let hex_value_arc = to_hex::(&[array_ref])?; + let hex_value = as_string_array(&hex_value_arc)?; + let expected = StringArray::from(vec![Some("64")]); + assert_eq!(&expected, hex_value); + + Ok(()) + } + + #[test] + // Test to_hex function for negative number + fn to_hex_negative_number() -> Result<()> { + let array = vec![-1].into_iter().collect::(); + let array_ref = Arc::new(array); + let hex_value_arc = to_hex::(&[array_ref])?; + let hex_value = as_string_array(&hex_value_arc)?; + let expected = StringArray::from(vec![Some("ffffffffffffffff")]); + assert_eq!(&expected, hex_value); + + Ok(()) + } +} diff --git a/datafusion/functions/src/string/trim.rs b/datafusion/functions/src/string/trim.rs new file mode 100644 index 0000000000000..e04a171722e34 --- /dev/null +++ b/datafusion/functions/src/string/trim.rs @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::{ArrayRef, OffsetSizeTrait}; +use arrow::datatypes::DataType; +use datafusion_common::exec_err; +use datafusion_common::Result; +use datafusion_expr::ColumnarValue; +use datafusion_expr::TypeSignature::*; +use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use std::any::Any; + +use crate::string::{make_scalar_function, utf8_to_str_type}; + +use super::{general_trim, TrimType}; + +/// Returns the longest string with leading and trailing characters removed. If the characters are not specified, whitespace is removed. +/// btrim('xyxtrimyyx', 'xyz') = 'trim' +pub fn btrim(args: &[ArrayRef]) -> Result { + general_trim::(args, TrimType::Both) +} + +#[derive(Debug)] +pub(super) struct TrimFunc { + signature: Signature, +} + +impl TrimFunc { + pub fn new() -> Self { + use DataType::*; + Self { + signature: Signature::one_of( + vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for TrimFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "trim" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + utf8_to_str_type(&arg_types[0], "trim") + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + match args[0].data_type() { + DataType::Utf8 => make_scalar_function(btrim::, vec![])(args), + DataType::LargeUtf8 => make_scalar_function(btrim::, vec![])(args), + other => exec_err!("Unsupported data type {other:?} for function trim"), + } + } +} diff --git a/datafusion/functions/src/string/upper.rs b/datafusion/functions/src/string/upper.rs new file mode 100644 index 0000000000000..ed41487699aa9 --- /dev/null +++ b/datafusion/functions/src/string/upper.rs @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::datatypes::DataType; +use datafusion_common::Result; +use datafusion_expr::ColumnarValue; +use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use std::any::Any; + +use crate::string::utf8_to_str_type; + +use super::handle; + +#[derive(Debug)] +pub(super) struct UpperFunc { + signature: Signature, +} + +impl UpperFunc { + pub fn new() -> Self { + use DataType::*; + Self { + signature: Signature::uniform( + 1, + vec![Utf8, LargeUtf8], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for UpperFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "upper" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + utf8_to_str_type(&arg_types[0], "upper") + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + handle(args, |string| string.to_uppercase(), "upper") + } +} diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index 6da3980fede73..73c6cd12babe8 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -502,17 +502,6 @@ pub fn create_physical_fun( exec_err!("Unsupported data type {other:?} for function split_part") } }), - BuiltinScalarFunction::StartsWith => Arc::new(|args| match args[0].data_type() { - DataType::Utf8 => { - make_scalar_function_inner(string_expressions::starts_with::)(args) - } - DataType::LargeUtf8 => { - make_scalar_function_inner(string_expressions::starts_with::)(args) - } - other => { - exec_err!("Unsupported data type {other:?} for function starts_with") - } - }), BuiltinScalarFunction::EndsWith => Arc::new(|args| match args[0].data_type() { DataType::Utf8 => { make_scalar_function_inner(string_expressions::ends_with::)(args) @@ -552,15 +541,6 @@ pub fn create_physical_fun( } other => exec_err!("Unsupported data type {other:?} for function substr"), }), - BuiltinScalarFunction::ToHex => Arc::new(|args| match args[0].data_type() { - DataType::Int32 => { - make_scalar_function_inner(string_expressions::to_hex::)(args) - } - DataType::Int64 => { - make_scalar_function_inner(string_expressions::to_hex::)(args) - } - other => exec_err!("Unsupported data type {other:?} for function to_hex"), - }), BuiltinScalarFunction::Translate => Arc::new(|args| match args[0].data_type() { DataType::Utf8 => { let func = invoke_if_unicode_expressions_feature_flag!( @@ -582,16 +562,6 @@ pub fn create_physical_fun( exec_err!("Unsupported data type {other:?} for function translate") } }), - BuiltinScalarFunction::Trim => Arc::new(|args| match args[0].data_type() { - DataType::Utf8 => { - make_scalar_function_inner(string_expressions::btrim::)(args) - } - DataType::LargeUtf8 => { - make_scalar_function_inner(string_expressions::btrim::)(args) - } - other => exec_err!("Unsupported data type {other:?} for function trim"), - }), - BuiltinScalarFunction::Upper => Arc::new(string_expressions::upper), BuiltinScalarFunction::Uuid => Arc::new(string_expressions::uuid), BuiltinScalarFunction::OverLay => Arc::new(|args| match args[0].data_type() { DataType::Utf8 => { @@ -1852,38 +1822,6 @@ mod tests { Utf8, StringArray ); - test_function!( - StartsWith, - &[lit("alphabet"), lit("alph"),], - Ok(Some(true)), - bool, - Boolean, - BooleanArray - ); - test_function!( - StartsWith, - &[lit("alphabet"), lit("blph"),], - Ok(Some(false)), - bool, - Boolean, - BooleanArray - ); - test_function!( - StartsWith, - &[lit(ScalarValue::Utf8(None)), lit("alph"),], - Ok(None), - bool, - Boolean, - BooleanArray - ); - test_function!( - StartsWith, - &[lit("alphabet"), lit(ScalarValue::Utf8(None)),], - Ok(None), - bool, - Boolean, - BooleanArray - ); test_function!( EndsWith, &[lit("alphabet"), lit("alph"),], @@ -2204,38 +2142,6 @@ mod tests { Utf8, StringArray ); - test_function!( - Trim, - &[lit(" trim ")], - Ok(Some("trim")), - &str, - Utf8, - StringArray - ); - test_function!( - Trim, - &[lit("trim ")], - Ok(Some("trim")), - &str, - Utf8, - StringArray - ); - test_function!( - Trim, - &[lit(" trim")], - Ok(Some("trim")), - &str, - Utf8, - StringArray - ); - test_function!( - Trim, - &[lit(ScalarValue::Utf8(None))], - Ok(None), - &str, - Utf8, - StringArray - ); test_function!( Upper, &[lit("upper")], diff --git a/datafusion/physical-expr/src/string_expressions.rs b/datafusion/physical-expr/src/string_expressions.rs index ace7ef2888a30..86c0092a220d7 100644 --- a/datafusion/physical-expr/src/string_expressions.rs +++ b/datafusion/physical-expr/src/string_expressions.rs @@ -32,16 +32,14 @@ use arrow::{ Array, ArrayRef, GenericStringArray, Int32Array, Int64Array, OffsetSizeTrait, StringArray, }, - datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType}, + datatypes::DataType, }; use uuid::Uuid; use datafusion_common::utils::datafusion_strsim; use datafusion_common::Result; use datafusion_common::{ - cast::{ - as_generic_string_array, as_int64_array, as_primitive_array, as_string_array, - }, + cast::{as_generic_string_array, as_int64_array, as_string_array}, exec_err, ScalarValue, }; use datafusion_expr::ColumnarValue; @@ -526,34 +524,6 @@ pub fn ends_with(args: &[ArrayRef]) -> Result { Ok(Arc::new(result) as ArrayRef) } -/// Converts the number to its equivalent hexadecimal representation. -/// to_hex(2147483647) = '7fffffff' -pub fn to_hex(args: &[ArrayRef]) -> Result -where - T::Native: OffsetSizeTrait, -{ - let integer_array = as_primitive_array::(&args[0])?; - - let result = integer_array - .iter() - .map(|integer| { - if let Some(value) = integer { - if let Some(value_usize) = value.to_usize() { - Ok(Some(format!("{value_usize:x}"))) - } else if let Some(value_isize) = value.to_isize() { - Ok(Some(format!("{value_isize:x}"))) - } else { - exec_err!("Unsupported data type {integer:?} for function to_hex") - } - } else { - Ok(None) - } - }) - .collect::>>()?; - - Ok(Arc::new(result) as ArrayRef) -} - /// Converts the string to all upper case. /// upper('tom') = 'TOM' pub fn upper(args: &[ColumnarValue]) -> Result { @@ -709,54 +679,13 @@ pub fn levenshtein(args: &[ArrayRef]) -> Result { #[cfg(test)] mod tests { - use arrow::{array::Int32Array, datatypes::Int32Type}; + use arrow::array::Int32Array; use arrow_array::Int64Array; use datafusion_common::cast::as_int32_array; - use crate::string_expressions; - use super::*; - #[test] - // Test to_hex function for zero - fn to_hex_zero() -> Result<()> { - let array = vec![0].into_iter().collect::(); - let array_ref = Arc::new(array); - let hex_value_arc = string_expressions::to_hex::(&[array_ref])?; - let hex_value = as_string_array(&hex_value_arc)?; - let expected = StringArray::from(vec![Some("0")]); - assert_eq!(&expected, hex_value); - - Ok(()) - } - - #[test] - // Test to_hex function for positive number - fn to_hex_positive_number() -> Result<()> { - let array = vec![100].into_iter().collect::(); - let array_ref = Arc::new(array); - let hex_value_arc = string_expressions::to_hex::(&[array_ref])?; - let hex_value = as_string_array(&hex_value_arc)?; - let expected = StringArray::from(vec![Some("64")]); - assert_eq!(&expected, hex_value); - - Ok(()) - } - - #[test] - // Test to_hex function for negative number - fn to_hex_negative_number() -> Result<()> { - let array = vec![-1].into_iter().collect::(); - let array_ref = Arc::new(array); - let hex_value_arc = string_expressions::to_hex::(&[array_ref])?; - let hex_value = as_string_array(&hex_value_arc)?; - let expected = StringArray::from(vec![Some("ffffffffffffffff")]); - assert_eq!(&expected, hex_value); - - Ok(()) - } - #[test] fn to_overlay() -> Result<()> { let string = diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index c9e4e0d64ed88..5797a161a4063 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -592,18 +592,18 @@ enum ScalarFunction { // 48 was SHA384 // 49 was SHA512 SplitPart = 50; - StartsWith = 51; + // StartsWith = 51; Strpos = 52; Substr = 53; - ToHex = 54; + // ToHex = 54; // 55 was ToTimestamp // 56 was ToTimestampMillis // 57 was ToTimestampMicros // 58 was ToTimestampSeconds // 59 was Now Translate = 60; - Trim = 61; - Upper = 62; + // Trim = 61; + // Upper = 62; Coalesce = 63; Power = 64; // 65 was StructFun diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index e3e1ee966b4a6..c84a214694239 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -22928,13 +22928,9 @@ impl serde::Serialize for ScalarFunction { Self::Rpad => "Rpad", Self::Rtrim => "Rtrim", Self::SplitPart => "SplitPart", - Self::StartsWith => "StartsWith", Self::Strpos => "Strpos", Self::Substr => "Substr", - Self::ToHex => "ToHex", Self::Translate => "Translate", - Self::Trim => "Trim", - Self::Upper => "Upper", Self::Coalesce => "Coalesce", Self::Power => "Power", Self::Atan2 => "Atan2", @@ -23024,13 +23020,9 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Rpad", "Rtrim", "SplitPart", - "StartsWith", "Strpos", "Substr", - "ToHex", "Translate", - "Trim", - "Upper", "Coalesce", "Power", "Atan2", @@ -23149,13 +23141,9 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Rpad" => Ok(ScalarFunction::Rpad), "Rtrim" => Ok(ScalarFunction::Rtrim), "SplitPart" => Ok(ScalarFunction::SplitPart), - "StartsWith" => Ok(ScalarFunction::StartsWith), "Strpos" => Ok(ScalarFunction::Strpos), "Substr" => Ok(ScalarFunction::Substr), - "ToHex" => Ok(ScalarFunction::ToHex), "Translate" => Ok(ScalarFunction::Translate), - "Trim" => Ok(ScalarFunction::Trim), - "Upper" => Ok(ScalarFunction::Upper), "Coalesce" => Ok(ScalarFunction::Coalesce), "Power" => Ok(ScalarFunction::Power), "Atan2" => Ok(ScalarFunction::Atan2), diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index d7e1224043970..d54b182584b46 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -2865,18 +2865,18 @@ pub enum ScalarFunction { /// 48 was SHA384 /// 49 was SHA512 SplitPart = 50, - StartsWith = 51, + /// StartsWith = 51; Strpos = 52, Substr = 53, - ToHex = 54, + /// ToHex = 54; /// 55 was ToTimestamp /// 56 was ToTimestampMillis /// 57 was ToTimestampMicros /// 58 was ToTimestampSeconds /// 59 was Now Translate = 60, - Trim = 61, - Upper = 62, + /// Trim = 61; + /// Upper = 62; Coalesce = 63, Power = 64, /// 65 was StructFun @@ -2996,13 +2996,9 @@ impl ScalarFunction { ScalarFunction::Rpad => "Rpad", ScalarFunction::Rtrim => "Rtrim", ScalarFunction::SplitPart => "SplitPart", - ScalarFunction::StartsWith => "StartsWith", ScalarFunction::Strpos => "Strpos", ScalarFunction::Substr => "Substr", - ScalarFunction::ToHex => "ToHex", ScalarFunction::Translate => "Translate", - ScalarFunction::Trim => "Trim", - ScalarFunction::Upper => "Upper", ScalarFunction::Coalesce => "Coalesce", ScalarFunction::Power => "Power", ScalarFunction::Atan2 => "Atan2", @@ -3086,13 +3082,9 @@ impl ScalarFunction { "Rpad" => Some(Self::Rpad), "Rtrim" => Some(Self::Rtrim), "SplitPart" => Some(Self::SplitPart), - "StartsWith" => Some(Self::StartsWith), "Strpos" => Some(Self::Strpos), "Substr" => Some(Self::Substr), - "ToHex" => Some(Self::ToHex), "Translate" => Some(Self::Translate), - "Trim" => Some(Self::Trim), - "Upper" => Some(Self::Upper), "Coalesce" => Some(Self::Coalesce), "Power" => Some(Self::Power), "Atan2" => Some(Self::Atan2), diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index 024f16fd098ca..d2185444d0131 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -58,10 +58,9 @@ use datafusion_expr::{ logical_plan::{PlanType, StringifiedPlan}, lower, lpad, ltrim, nanvl, octet_length, overlay, pi, power, radians, random, repeat, replace, reverse, right, round, rpad, rtrim, signum, sin, sinh, split_part, sqrt, - starts_with, strpos, substr, substr_index, substring, to_hex, translate, trim, trunc, - upper, uuid, AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, - BuiltinScalarFunction, Case, Cast, Expr, GetFieldAccess, GetIndexedField, - GroupingSet, + starts_with, strpos, substr, substr_index, substring, translate, trunc, upper, uuid, + AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction, + Case, Cast, Expr, GetFieldAccess, GetIndexedField, GroupingSet, GroupingSet::GroupingSets, JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound, WindowFrameUnits, @@ -467,8 +466,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::OctetLength => Self::OctetLength, ScalarFunction::Concat => Self::Concat, ScalarFunction::Lower => Self::Lower, - ScalarFunction::Upper => Self::Upper, - ScalarFunction::Trim => Self::Trim, ScalarFunction::Ltrim => Self::Ltrim, ScalarFunction::Rtrim => Self::Rtrim, ScalarFunction::ArrayExcept => Self::ArrayExcept, @@ -506,10 +503,8 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::Right => Self::Right, ScalarFunction::Rpad => Self::Rpad, ScalarFunction::SplitPart => Self::SplitPart, - ScalarFunction::StartsWith => Self::StartsWith, ScalarFunction::Strpos => Self::Strpos, ScalarFunction::Substr => Self::Substr, - ScalarFunction::ToHex => Self::ToHex, ScalarFunction::ToChar => Self::ToChar, ScalarFunction::MakeDate => Self::MakeDate, ScalarFunction::Uuid => Self::Uuid, @@ -1501,10 +1496,6 @@ pub fn parse_expr( ScalarFunction::Lower => { Ok(lower(parse_expr(&args[0], registry, codec)?)) } - ScalarFunction::Upper => { - Ok(upper(parse_expr(&args[0], registry, codec)?)) - } - ScalarFunction::Trim => Ok(trim(parse_expr(&args[0], registry, codec)?)), ScalarFunction::Ltrim => { Ok(ltrim(parse_expr(&args[0], registry, codec)?)) } @@ -1589,10 +1580,6 @@ pub fn parse_expr( parse_expr(&args[1], registry, codec)?, parse_expr(&args[2], registry, codec)?, )), - ScalarFunction::StartsWith => Ok(starts_with( - parse_expr(&args[0], registry, codec)?, - parse_expr(&args[1], registry, codec)?, - )), ScalarFunction::EndsWith => Ok(ends_with( parse_expr(&args[0], registry, codec)?, parse_expr(&args[1], registry, codec)?, @@ -1620,9 +1607,6 @@ pub fn parse_expr( parse_expr(&args[0], registry, codec)?, parse_expr(&args[1], registry, codec)?, )), - ScalarFunction::ToHex => { - Ok(to_hex(parse_expr(&args[0], registry, codec)?)) - } ScalarFunction::MakeDate => { let args: Vec<_> = args .iter() diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index e492b96577ec9..ed850885c3eb6 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -1449,8 +1449,6 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::OctetLength => Self::OctetLength, BuiltinScalarFunction::Concat => Self::Concat, BuiltinScalarFunction::Lower => Self::Lower, - BuiltinScalarFunction::Upper => Self::Upper, - BuiltinScalarFunction::Trim => Self::Trim, BuiltinScalarFunction::Ltrim => Self::Ltrim, BuiltinScalarFunction::Rtrim => Self::Rtrim, BuiltinScalarFunction::ToChar => Self::ToChar, @@ -1490,10 +1488,8 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::Right => Self::Right, BuiltinScalarFunction::Rpad => Self::Rpad, BuiltinScalarFunction::SplitPart => Self::SplitPart, - BuiltinScalarFunction::StartsWith => Self::StartsWith, BuiltinScalarFunction::Strpos => Self::Strpos, BuiltinScalarFunction::Substr => Self::Substr, - BuiltinScalarFunction::ToHex => Self::ToHex, BuiltinScalarFunction::MakeDate => Self::MakeDate, BuiltinScalarFunction::Translate => Self::Translate, BuiltinScalarFunction::Coalesce => Self::Coalesce, diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index d45a195cb6532..48f2a7e434619 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -737,7 +737,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Some(TrimWhereField::Leading) => BuiltinScalarFunction::Ltrim, Some(TrimWhereField::Trailing) => BuiltinScalarFunction::Rtrim, Some(TrimWhereField::Both) => BuiltinScalarFunction::Btrim, - None => BuiltinScalarFunction::Trim, + None => BuiltinScalarFunction::Btrim, }; let arg = self.sql_expr_to_logical_expr(expr, schema, planner_context)?;