diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index be10da3669b4..89c5cfcce647 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -126,10 +126,6 @@ pub enum BuiltinScalarFunction { ArrayDistinct, /// array_element ArrayElement, - /// array_empty - ArrayEmpty, - /// array_length - ArrayLength, /// array_position ArrayPosition, /// array_positions @@ -360,11 +356,9 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayAppend => Volatility::Immutable, BuiltinScalarFunction::ArraySort => Volatility::Immutable, BuiltinScalarFunction::ArrayConcat => Volatility::Immutable, - BuiltinScalarFunction::ArrayEmpty => Volatility::Immutable, BuiltinScalarFunction::ArrayDistinct => Volatility::Immutable, BuiltinScalarFunction::ArrayElement => Volatility::Immutable, BuiltinScalarFunction::ArrayExcept => Volatility::Immutable, - BuiltinScalarFunction::ArrayLength => Volatility::Immutable, BuiltinScalarFunction::ArrayPopFront => Volatility::Immutable, BuiltinScalarFunction::ArrayPopBack => Volatility::Immutable, BuiltinScalarFunction::ArrayPosition => Volatility::Immutable, @@ -527,7 +521,6 @@ impl BuiltinScalarFunction { Ok(expr_type) } - BuiltinScalarFunction::ArrayEmpty => Ok(Boolean), BuiltinScalarFunction::ArrayDistinct => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArrayElement => match &input_expr_types[0] { List(field) @@ -537,7 +530,6 @@ impl BuiltinScalarFunction { "The {self} function can only accept List, LargeList or FixedSizeList as the first argument" ), }, - BuiltinScalarFunction::ArrayLength => Ok(UInt64), BuiltinScalarFunction::ArrayPopFront => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArrayPopBack => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArrayPosition => Ok(UInt64), @@ -831,15 +823,11 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayConcat => { Signature::variadic_any(self.volatility()) } - BuiltinScalarFunction::ArrayEmpty => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayElement => { Signature::array_and_index(self.volatility()) } BuiltinScalarFunction::ArrayExcept => Signature::any(2, self.volatility()), BuiltinScalarFunction::Flatten => Signature::array(self.volatility()), - BuiltinScalarFunction::ArrayLength => { - Signature::variadic_any(self.volatility()) - } BuiltinScalarFunction::ArrayDistinct => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayPosition => { Signature::array_and_element_and_optional_index(self.volatility()) @@ -1396,7 +1384,6 @@ impl BuiltinScalarFunction { &["array_concat", "array_cat", "list_concat", "list_cat"] } BuiltinScalarFunction::ArrayDistinct => &["array_distinct", "list_distinct"], - BuiltinScalarFunction::ArrayEmpty => &["empty"], BuiltinScalarFunction::ArrayElement => &[ "array_element", "array_extract", @@ -1405,7 +1392,6 @@ impl BuiltinScalarFunction { ], BuiltinScalarFunction::ArrayExcept => &["array_except", "list_except"], BuiltinScalarFunction::Flatten => &["flatten"], - BuiltinScalarFunction::ArrayLength => &["array_length", "list_length"], BuiltinScalarFunction::ArrayPopFront => { &["array_pop_front", "list_pop_front"] } diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index ad69208ce9c5..7e1c0e77d770 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -611,12 +611,6 @@ scalar_expr!( ); nary_scalar_expr!(ArrayConcat, array_concat, "concatenates arrays."); -scalar_expr!( - ArrayEmpty, - array_empty, - array, - "returns true for an empty array or false for a non-empty array." -); scalar_expr!( Flatten, flatten, @@ -635,12 +629,6 @@ scalar_expr!( first_array second_array, "Returns an array of the elements that appear in the first array but not in the second." ); -scalar_expr!( - ArrayLength, - array_length, - array dimension, - "returns the length of the array dimension." -); scalar_expr!( ArrayDistinct, array_distinct, @@ -1336,7 +1324,6 @@ mod test { test_scalar_expr!(ArraySort, array_sort, array, desc, null_first); test_scalar_expr!(ArrayPopFront, array_pop_front, array); test_scalar_expr!(ArrayPopBack, array_pop_back, array); - test_scalar_expr!(ArrayLength, array_length, array, dimension); test_scalar_expr!(ArrayPosition, array_position, array, element, index); test_scalar_expr!(ArrayPositions, array_positions, array, element); test_scalar_expr!(ArrayPrepend, array_prepend, array, element); diff --git a/datafusion/functions-array/src/kernels.rs b/datafusion/functions-array/src/kernels.rs index c22ddeb43a5c..3138843feb58 100644 --- a/datafusion/functions-array/src/kernels.rs +++ b/datafusion/functions-array/src/kernels.rs @@ -17,19 +17,19 @@ //! implementation kernels for array functions -use arrow::array::ListArray; use arrow::array::{ Array, ArrayRef, BooleanArray, Date32Array, Float32Array, Float64Array, GenericListArray, Int16Array, Int32Array, Int64Array, Int8Array, LargeStringArray, OffsetSizeTrait, StringArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, }; +use arrow::array::{LargeListArray, ListArray}; use arrow::buffer::OffsetBuffer; use arrow::datatypes::Field; use arrow::datatypes::UInt64Type; use arrow::datatypes::{DataType, Date32Type, IntervalMonthDayNanoType}; use datafusion_common::cast::{ - as_date32_array, as_int64_array, as_interval_mdn_array, as_large_list_array, - as_list_array, as_string_array, + as_date32_array, as_generic_list_array, as_int64_array, as_interval_mdn_array, + as_large_list_array, as_list_array, as_null_array, as_string_array, }; use datafusion_common::{exec_err, not_impl_datafusion_err, DataFusionError, Result}; use std::any::type_name; @@ -517,3 +517,102 @@ pub fn gen_range_date( )?); Ok(arr) } + +/// Array_empty SQL function +pub fn array_empty(args: &[ArrayRef]) -> Result { + if args.len() != 1 { + return exec_err!("array_empty expects one argument"); + } + + if as_null_array(&args[0]).is_ok() { + // Make sure to return Boolean type. + return Ok(Arc::new(BooleanArray::new_null(args[0].len()))); + } + let array_type = args[0].data_type(); + + match array_type { + DataType::List(_) => general_array_empty::(&args[0]), + DataType::LargeList(_) => general_array_empty::(&args[0]), + _ => exec_err!("array_empty does not support type '{array_type:?}'."), + } +} + +fn general_array_empty(array: &ArrayRef) -> Result { + let array = as_generic_list_array::(array)?; + let builder = array + .iter() + .map(|arr| arr.map(|arr| arr.len() == arr.null_count())) + .collect::(); + Ok(Arc::new(builder)) +} + +/// Returns the length of a concrete array dimension +fn compute_array_length( + arr: Option, + dimension: Option, +) -> Result> { + let mut current_dimension: i64 = 1; + let mut value = match arr { + Some(arr) => arr, + None => return Ok(None), + }; + let dimension = match dimension { + Some(value) => { + if value < 1 { + return Ok(None); + } + + value + } + None => return Ok(None), + }; + + loop { + if current_dimension == dimension { + return Ok(Some(value.len() as u64)); + } + + match value.data_type() { + DataType::List(..) => { + value = downcast_arg!(value, ListArray).value(0); + current_dimension += 1; + } + DataType::LargeList(..) => { + value = downcast_arg!(value, LargeListArray).value(0); + current_dimension += 1; + } + _ => return Ok(None), + } + } +} + +/// Dispatch array length computation based on the offset type. +fn general_array_length(array: &[ArrayRef]) -> Result { + let list_array = as_generic_list_array::(&array[0])?; + let dimension = if array.len() == 2 { + as_int64_array(&array[1])?.clone() + } else { + Int64Array::from_value(1, list_array.len()) + }; + + let result = list_array + .iter() + .zip(dimension.iter()) + .map(|(arr, dim)| compute_array_length(arr, dim)) + .collect::>()?; + + Ok(Arc::new(result) as ArrayRef) +} + +/// Array_length SQL function +pub fn array_length(args: &[ArrayRef]) -> Result { + if args.len() != 1 && args.len() != 2 { + return exec_err!("array_length expects one or two arguments"); + } + + match &args[0].data_type() { + DataType::List(_) => general_array_length::(args), + DataType::LargeList(_) => general_array_length::(args), + array_type => exec_err!("array_length does not support type '{array_type:?}'"), + } +} diff --git a/datafusion/functions-array/src/lib.rs b/datafusion/functions-array/src/lib.rs index 710f49761f32..6f0f2beca75c 100644 --- a/datafusion/functions-array/src/lib.rs +++ b/datafusion/functions-array/src/lib.rs @@ -45,6 +45,8 @@ pub mod expr_fn { pub use super::array_has::array_has_all; pub use super::array_has::array_has_any; pub use super::udf::array_dims; + pub use super::udf::array_empty; + pub use super::udf::array_length; pub use super::udf::array_ndims; pub use super::udf::array_to_string; pub use super::udf::cardinality; @@ -64,6 +66,8 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> { array_has::array_has_udf(), array_has::array_has_all_udf(), array_has::array_has_any_udf(), + udf::array_empty_udf(), + udf::array_length_udf(), ]; functions.into_iter().try_for_each(|udf| { let existing_udf = registry.register_udf(udf)?; diff --git a/datafusion/functions-array/src/udf.rs b/datafusion/functions-array/src/udf.rs index 6c695539625c..8dc4f722c08a 100644 --- a/datafusion/functions-array/src/udf.rs +++ b/datafusion/functions-array/src/udf.rs @@ -22,6 +22,7 @@ use arrow::datatypes::Field; use arrow::datatypes::IntervalUnit::MonthDayNano; use datafusion_common::exec_err; use datafusion_common::plan_err; +use datafusion_common::Result; use datafusion_expr::expr::ScalarFunction; use datafusion_expr::Expr; use datafusion_expr::TypeSignature::Exact; @@ -68,7 +69,7 @@ impl ScalarUDFImpl for ArrayToString { &self.signature } - fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { + fn return_type(&self, arg_types: &[DataType]) -> Result { use DataType::*; Ok(match arg_types[0] { List(_) | LargeList(_) | FixedSizeList(_, _) => Utf8, @@ -78,7 +79,7 @@ impl ScalarUDFImpl for ArrayToString { }) } - fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { + fn invoke(&self, args: &[ColumnarValue]) -> Result { let args = ColumnarValue::values_to_arrays(args)?; crate::kernels::array_to_string(&args).map(ColumnarValue::Array) } @@ -129,7 +130,7 @@ impl ScalarUDFImpl for Range { &self.signature } - fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { + fn return_type(&self, arg_types: &[DataType]) -> Result { use DataType::*; Ok(List(Arc::new(Field::new( "item", @@ -138,7 +139,7 @@ impl ScalarUDFImpl for Range { )))) } - fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { + fn invoke(&self, args: &[ColumnarValue]) -> Result { let args = ColumnarValue::values_to_arrays(args)?; match args[0].data_type() { arrow::datatypes::DataType::Int64 => { @@ -199,7 +200,7 @@ impl ScalarUDFImpl for GenSeries { &self.signature } - fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { + fn return_type(&self, arg_types: &[DataType]) -> Result { use DataType::*; Ok(List(Arc::new(Field::new( "item", @@ -208,7 +209,7 @@ impl ScalarUDFImpl for GenSeries { )))) } - fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { + fn invoke(&self, args: &[ColumnarValue]) -> Result { let args = ColumnarValue::values_to_arrays(args)?; match args[0].data_type() { arrow::datatypes::DataType::Int64 => { @@ -263,7 +264,7 @@ impl ScalarUDFImpl for ArrayDims { &self.signature } - fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { + fn return_type(&self, arg_types: &[DataType]) -> Result { use DataType::*; Ok(match arg_types[0] { List(_) | LargeList(_) | FixedSizeList(_, _) => { @@ -275,7 +276,7 @@ impl ScalarUDFImpl for ArrayDims { }) } - fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { + fn invoke(&self, args: &[ColumnarValue]) -> Result { let args = ColumnarValue::values_to_arrays(args)?; crate::kernels::array_dims(&args).map(ColumnarValue::Array) } @@ -319,7 +320,7 @@ impl ScalarUDFImpl for Cardinality { &self.signature } - fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { + fn return_type(&self, arg_types: &[DataType]) -> Result { use DataType::*; Ok(match arg_types[0] { List(_) | LargeList(_) | FixedSizeList(_, _) => UInt64, @@ -329,7 +330,7 @@ impl ScalarUDFImpl for Cardinality { }) } - fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { + fn invoke(&self, args: &[ColumnarValue]) -> Result { let args = ColumnarValue::values_to_arrays(args)?; crate::kernels::cardinality(&args).map(ColumnarValue::Array) } @@ -373,7 +374,7 @@ impl ScalarUDFImpl for ArrayNdims { &self.signature } - fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { + fn return_type(&self, arg_types: &[DataType]) -> Result { use DataType::*; Ok(match arg_types[0] { List(_) | LargeList(_) | FixedSizeList(_, _) => UInt64, @@ -383,7 +384,7 @@ impl ScalarUDFImpl for ArrayNdims { }) } - fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { + fn invoke(&self, args: &[ColumnarValue]) -> Result { let args = ColumnarValue::values_to_arrays(args)?; crate::kernels::array_ndims(&args).map(ColumnarValue::Array) } @@ -392,3 +393,111 @@ impl ScalarUDFImpl for ArrayNdims { &self.aliases } } + +make_udf_function!( + ArrayEmpty, + array_empty, + array, + "returns true for an empty array or false for a non-empty array.", + array_empty_udf +); + +#[derive(Debug)] +pub(super) struct ArrayEmpty { + signature: Signature, + aliases: Vec, +} +impl ArrayEmpty { + pub fn new() -> Self { + Self { + signature: Signature::array(Volatility::Immutable), + aliases: vec![String::from("empty")], + } + } +} + +impl ScalarUDFImpl for ArrayEmpty { + fn as_any(&self) -> &dyn Any { + self + } + fn name(&self) -> &str { + "empty" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + use DataType::*; + Ok(match arg_types[0] { + List(_) | LargeList(_) | FixedSizeList(_, _) => Boolean, + _ => { + return plan_err!("The array_empty function can only accept List/LargeList/FixedSizeList."); + } + }) + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + let args = ColumnarValue::values_to_arrays(args)?; + crate::kernels::array_empty(&args).map(ColumnarValue::Array) + } + + fn aliases(&self) -> &[String] { + &self.aliases + } +} + +make_udf_function!( + ArrayLength, + array_length, + array, + "returns the length of the array dimension.", + array_length_udf +); + +#[derive(Debug)] +pub(super) struct ArrayLength { + signature: Signature, + aliases: Vec, +} +impl ArrayLength { + pub fn new() -> Self { + Self { + signature: Signature::variadic_any(Volatility::Immutable), + aliases: vec![String::from("array_length"), String::from("list_length")], + } + } +} + +impl ScalarUDFImpl for ArrayLength { + fn as_any(&self) -> &dyn Any { + self + } + fn name(&self) -> &str { + "array_length" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + use DataType::*; + Ok(match arg_types[0] { + List(_) | LargeList(_) | FixedSizeList(_, _) => UInt64, + _ => { + return plan_err!("The array_length function can only accept List/LargeList/FixedSizeList."); + } + }) + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + let args = ColumnarValue::values_to_arrays(args)?; + crate::kernels::array_length(&args).map(ColumnarValue::Array) + } + + fn aliases(&self) -> &[String] { + &self.aliases + } +} diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 8d2a283a0508..495de01c7615 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -17,7 +17,6 @@ //! Array expressions -use std::any::type_name; use std::collections::HashSet; use std::fmt::{Display, Formatter}; use std::sync::Arc; @@ -32,7 +31,7 @@ use arrow_buffer::{ArrowNativeType, NullBuffer}; use arrow_schema::{FieldRef, SortOptions}; use datafusion_common::cast::{ as_generic_list_array, as_generic_string_array, as_int64_array, as_large_list_array, - as_list_array, as_null_array, as_string_array, + as_list_array, as_string_array, }; use datafusion_common::utils::{array_into_list_array, list_ndims}; use datafusion_common::{ @@ -41,17 +40,6 @@ use datafusion_common::{ }; use itertools::Itertools; -macro_rules! downcast_arg { - ($ARG:expr, $ARRAY_TYPE:ident) => {{ - $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| { - DataFusionError::Internal(format!( - "could not cast to {}", - type_name::<$ARRAY_TYPE>() - )) - })? - }}; -} - /// Computes a BooleanArray indicating equality or inequality between elements in a list array and a specified element array. /// /// # Arguments @@ -153,46 +141,6 @@ fn compare_element_to_list( Ok(res) } -/// Returns the length of a concrete array dimension -fn compute_array_length( - arr: Option, - dimension: Option, -) -> Result> { - let mut current_dimension: i64 = 1; - let mut value = match arr { - Some(arr) => arr, - None => return Ok(None), - }; - let dimension = match dimension { - Some(value) => { - if value < 1 { - return Ok(None); - } - - value - } - None => return Ok(None), - }; - - loop { - if current_dimension == dimension { - return Ok(Some(value.len() as u64)); - } - - match value.data_type() { - DataType::List(..) => { - value = downcast_arg!(value, ListArray).value(0); - current_dimension += 1; - } - DataType::LargeList(..) => { - value = downcast_arg!(value, LargeListArray).value(0); - current_dimension += 1; - } - _ => return Ok(None), - } - } -} - fn check_datatypes(name: &str, args: &[&ArrayRef]) -> Result<()> { let data_type = args[0].data_type(); if !args.iter().all(|arg| { @@ -1130,34 +1078,6 @@ pub fn array_concat(args: &[ArrayRef]) -> Result { } } -/// Array_empty SQL function -pub fn array_empty(args: &[ArrayRef]) -> Result { - if args.len() != 1 { - return exec_err!("array_empty expects one argument"); - } - - if as_null_array(&args[0]).is_ok() { - // Make sure to return Boolean type. - return Ok(Arc::new(BooleanArray::new_null(args[0].len()))); - } - let array_type = args[0].data_type(); - - match array_type { - DataType::List(_) => array_empty_dispatch::(&args[0]), - DataType::LargeList(_) => array_empty_dispatch::(&args[0]), - _ => exec_err!("array_empty does not support type '{array_type:?}'."), - } -} - -fn array_empty_dispatch(array: &ArrayRef) -> Result { - let array = as_generic_list_array::(array)?; - let builder = array - .iter() - .map(|arr| arr.map(|arr| arr.len() == arr.null_count())) - .collect::(); - Ok(Arc::new(builder)) -} - /// Array_repeat SQL function pub fn array_repeat(args: &[ArrayRef]) -> Result { if args.len() != 2 { @@ -1987,37 +1907,6 @@ pub fn flatten(args: &[ArrayRef]) -> Result { // Ok(Arc::new(flattened_array) as ArrayRef) } -/// Dispatch array length computation based on the offset type. -fn array_length_dispatch(array: &[ArrayRef]) -> Result { - let list_array = as_generic_list_array::(&array[0])?; - let dimension = if array.len() == 2 { - as_int64_array(&array[1])?.clone() - } else { - Int64Array::from_value(1, list_array.len()) - }; - - let result = list_array - .iter() - .zip(dimension.iter()) - .map(|(arr, dim)| compute_array_length(arr, dim)) - .collect::>()?; - - Ok(Arc::new(result) as ArrayRef) -} - -/// Array_length SQL function -pub fn array_length(args: &[ArrayRef]) -> Result { - if args.len() != 1 && args.len() != 2 { - return exec_err!("array_length expects one or two arguments"); - } - - match &args[0].data_type() { - DataType::List(_) => array_length_dispatch::(args), - DataType::LargeList(_) => array_length_dispatch::(args), - array_type => exec_err!("array_length does not support type '{array_type:?}'"), - } -} - /// Splits string at occurrences of delimiter and returns an array of parts /// string_to_array('abc~@~def~@~ghi', '~@~') = '["abc", "def", "ghi"]' pub fn string_to_array(args: &[ArrayRef]) -> Result { diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index eebbb1dbea75..776f6315a405 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -311,9 +311,6 @@ pub fn create_physical_fun( BuiltinScalarFunction::ArrayConcat => Arc::new(|args| { make_scalar_function_inner(array_expressions::array_concat)(args) }), - BuiltinScalarFunction::ArrayEmpty => Arc::new(|args| { - make_scalar_function_inner(array_expressions::array_empty)(args) - }), BuiltinScalarFunction::ArrayDistinct => Arc::new(|args| { make_scalar_function_inner(array_expressions::array_distinct)(args) }), @@ -323,9 +320,6 @@ pub fn create_physical_fun( BuiltinScalarFunction::ArrayExcept => Arc::new(|args| { make_scalar_function_inner(array_expressions::array_except)(args) }), - BuiltinScalarFunction::ArrayLength => Arc::new(|args| { - make_scalar_function_inner(array_expressions::array_length)(args) - }), BuiltinScalarFunction::Flatten => { Arc::new(|args| make_scalar_function_inner(array_expressions::flatten)(args)) } diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index c5b20986c324..d1fef7c1ceae 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -639,7 +639,7 @@ enum ScalarFunction { ArrayConcat = 87; // 88 was ArrayDims ArrayRepeat = 89; - ArrayLength = 90; + // 90 was ArrayLength // 91 was ArrayNdims ArrayPosition = 92; ArrayPositions = 93; @@ -662,7 +662,7 @@ enum ScalarFunction { Flatten = 112; // 113 was IsNan Iszero = 114; - ArrayEmpty = 115; + // 115 was ArrayEmpty ArrayPopBack = 116; StringToArray = 117; // 118 was ToTimestampNanos diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index b99e95740622..e4da28ed44ec 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -22401,7 +22401,6 @@ impl serde::Serialize for ScalarFunction { Self::ArrayAppend => "ArrayAppend", Self::ArrayConcat => "ArrayConcat", Self::ArrayRepeat => "ArrayRepeat", - Self::ArrayLength => "ArrayLength", Self::ArrayPosition => "ArrayPosition", Self::ArrayPositions => "ArrayPositions", Self::ArrayPrepend => "ArrayPrepend", @@ -22417,7 +22416,6 @@ impl serde::Serialize for ScalarFunction { Self::Nanvl => "Nanvl", Self::Flatten => "Flatten", Self::Iszero => "Iszero", - Self::ArrayEmpty => "ArrayEmpty", Self::ArrayPopBack => "ArrayPopBack", Self::StringToArray => "StringToArray", Self::ArrayIntersect => "ArrayIntersect", @@ -22526,7 +22524,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "ArrayAppend", "ArrayConcat", "ArrayRepeat", - "ArrayLength", "ArrayPosition", "ArrayPositions", "ArrayPrepend", @@ -22542,7 +22539,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Nanvl", "Flatten", "Iszero", - "ArrayEmpty", "ArrayPopBack", "StringToArray", "ArrayIntersect", @@ -22680,7 +22676,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "ArrayAppend" => Ok(ScalarFunction::ArrayAppend), "ArrayConcat" => Ok(ScalarFunction::ArrayConcat), "ArrayRepeat" => Ok(ScalarFunction::ArrayRepeat), - "ArrayLength" => Ok(ScalarFunction::ArrayLength), "ArrayPosition" => Ok(ScalarFunction::ArrayPosition), "ArrayPositions" => Ok(ScalarFunction::ArrayPositions), "ArrayPrepend" => Ok(ScalarFunction::ArrayPrepend), @@ -22696,7 +22691,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Nanvl" => Ok(ScalarFunction::Nanvl), "Flatten" => Ok(ScalarFunction::Flatten), "Iszero" => Ok(ScalarFunction::Iszero), - "ArrayEmpty" => Ok(ScalarFunction::ArrayEmpty), "ArrayPopBack" => Ok(ScalarFunction::ArrayPopBack), "StringToArray" => Ok(ScalarFunction::StringToArray), "ArrayIntersect" => Ok(ScalarFunction::ArrayIntersect), diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 62b3d39580ae..30b76c16bc91 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -2727,7 +2727,7 @@ pub enum ScalarFunction { ArrayConcat = 87, /// 88 was ArrayDims ArrayRepeat = 89, - ArrayLength = 90, + /// 90 was ArrayLength /// 91 was ArrayNdims ArrayPosition = 92, ArrayPositions = 93, @@ -2750,7 +2750,7 @@ pub enum ScalarFunction { Flatten = 112, /// 113 was IsNan Iszero = 114, - ArrayEmpty = 115, + /// 115 was ArrayEmpty ArrayPopBack = 116, StringToArray = 117, /// 118 was ToTimestampNanos @@ -2863,7 +2863,6 @@ impl ScalarFunction { ScalarFunction::ArrayAppend => "ArrayAppend", ScalarFunction::ArrayConcat => "ArrayConcat", ScalarFunction::ArrayRepeat => "ArrayRepeat", - ScalarFunction::ArrayLength => "ArrayLength", ScalarFunction::ArrayPosition => "ArrayPosition", ScalarFunction::ArrayPositions => "ArrayPositions", ScalarFunction::ArrayPrepend => "ArrayPrepend", @@ -2879,7 +2878,6 @@ impl ScalarFunction { ScalarFunction::Nanvl => "Nanvl", ScalarFunction::Flatten => "Flatten", ScalarFunction::Iszero => "Iszero", - ScalarFunction::ArrayEmpty => "ArrayEmpty", ScalarFunction::ArrayPopBack => "ArrayPopBack", ScalarFunction::StringToArray => "StringToArray", ScalarFunction::ArrayIntersect => "ArrayIntersect", @@ -2982,7 +2980,6 @@ impl ScalarFunction { "ArrayAppend" => Some(Self::ArrayAppend), "ArrayConcat" => Some(Self::ArrayConcat), "ArrayRepeat" => Some(Self::ArrayRepeat), - "ArrayLength" => Some(Self::ArrayLength), "ArrayPosition" => Some(Self::ArrayPosition), "ArrayPositions" => Some(Self::ArrayPositions), "ArrayPrepend" => Some(Self::ArrayPrepend), @@ -2998,7 +2995,6 @@ impl ScalarFunction { "Nanvl" => Some(Self::Nanvl), "Flatten" => Some(Self::Flatten), "Iszero" => Some(Self::Iszero), - "ArrayEmpty" => Some(Self::ArrayEmpty), "ArrayPopBack" => Some(Self::ArrayPopBack), "StringToArray" => Some(Self::StringToArray), "ArrayIntersect" => Some(Self::ArrayIntersect), diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index c26b8acbf1d5..ece3caa09475 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -47,14 +47,14 @@ use datafusion_common::{ use datafusion_expr::expr::Unnest; use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by}; use datafusion_expr::{ - acosh, array, array_append, array_concat, array_distinct, array_element, array_empty, - array_except, array_intersect, array_length, array_pop_back, array_pop_front, - array_position, array_positions, array_prepend, array_remove, array_remove_all, - array_remove_n, array_repeat, array_replace, array_replace_all, array_replace_n, - array_resize, array_slice, array_sort, array_union, arrow_typeof, ascii, asinh, atan, - atan2, atanh, bit_length, btrim, cbrt, ceil, character_length, chr, coalesce, - concat_expr, concat_ws_expr, cos, cosh, cot, current_date, current_time, date_bin, - date_part, date_trunc, degrees, digest, ends_with, exp, + acosh, array, array_append, array_concat, array_distinct, array_element, + array_except, array_intersect, array_pop_back, array_pop_front, array_position, + array_positions, array_prepend, array_remove, array_remove_all, array_remove_n, + array_repeat, array_replace, array_replace_all, array_replace_n, array_resize, + array_slice, array_sort, array_union, arrow_typeof, ascii, asinh, atan, atan2, atanh, + bit_length, btrim, cbrt, ceil, character_length, chr, coalesce, concat_expr, + concat_ws_expr, cos, cosh, cot, current_date, current_time, date_bin, date_part, + date_trunc, degrees, digest, ends_with, exp, expr::{self, InList, Sort, WindowFunction}, factorial, find_in_set, flatten, floor, from_unixtime, gcd, initcap, iszero, lcm, left, levenshtein, ln, log, log10, log2, @@ -480,12 +480,10 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::ArrayAppend => Self::ArrayAppend, ScalarFunction::ArraySort => Self::ArraySort, ScalarFunction::ArrayConcat => Self::ArrayConcat, - ScalarFunction::ArrayEmpty => Self::ArrayEmpty, ScalarFunction::ArrayExcept => Self::ArrayExcept, ScalarFunction::ArrayDistinct => Self::ArrayDistinct, ScalarFunction::ArrayElement => Self::ArrayElement, ScalarFunction::Flatten => Self::Flatten, - ScalarFunction::ArrayLength => Self::ArrayLength, ScalarFunction::ArrayPopFront => Self::ArrayPopFront, ScalarFunction::ArrayPopBack => Self::ArrayPopBack, ScalarFunction::ArrayPosition => Self::ArrayPosition, @@ -1505,10 +1503,6 @@ pub fn parse_expr( parse_expr(&args[2], registry, codec)?, parse_expr(&args[3], registry, codec)?, )), - ScalarFunction::ArrayLength => Ok(array_length( - parse_expr(&args[0], registry, codec)?, - parse_expr(&args[1], registry, codec)?, - )), ScalarFunction::ArrayDistinct => { Ok(array_distinct(parse_expr(&args[0], registry, codec)?)) } @@ -1516,9 +1510,6 @@ pub fn parse_expr( parse_expr(&args[0], registry, codec)?, parse_expr(&args[1], registry, codec)?, )), - ScalarFunction::ArrayEmpty => { - Ok(array_empty(parse_expr(&args[0], registry, codec)?)) - } ScalarFunction::ArrayUnion => Ok(array_union( parse_expr(&args[0], registry, codec)?, parse_expr(&args[1], registry, codec)?, diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index 55c8542d9745..43c8d7e4b299 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -1459,12 +1459,10 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::ArrayAppend => Self::ArrayAppend, BuiltinScalarFunction::ArraySort => Self::ArraySort, BuiltinScalarFunction::ArrayConcat => Self::ArrayConcat, - BuiltinScalarFunction::ArrayEmpty => Self::ArrayEmpty, BuiltinScalarFunction::ArrayExcept => Self::ArrayExcept, BuiltinScalarFunction::ArrayDistinct => Self::ArrayDistinct, BuiltinScalarFunction::ArrayElement => Self::ArrayElement, BuiltinScalarFunction::Flatten => Self::Flatten, - BuiltinScalarFunction::ArrayLength => Self::ArrayLength, BuiltinScalarFunction::ArrayPopFront => Self::ArrayPopFront, BuiltinScalarFunction::ArrayPopBack => Self::ArrayPopBack, BuiltinScalarFunction::ArrayPosition => Self::ArrayPosition, diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index fad50d3ecddc..ef9f2b27aa15 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -588,6 +588,17 @@ async fn roundtrip_expr_api() -> Result<()> { cardinality(array(vec![lit(1), lit(2), lit(3)])), range(lit(1), lit(10), lit(2)), gen_series(lit(1), lit(10), lit(2)), + array_has(array(vec![lit(1), lit(2), lit(3)]), lit(1)), + array_has_all( + array(vec![lit(1), lit(2), lit(3)]), + array(vec![lit(1), lit(2)]), + ), + array_has_any( + array(vec![lit(1), lit(2), lit(3)]), + array(vec![lit(1), lit(4)]), + ), + array_empty(array(vec![lit(1), lit(2), lit(3)])), + array_length(array(vec![lit(1), lit(2), lit(3)])), ]; // ensure expressions created with the expr api can be round tripped