diff --git a/datafusion/functions-nested/Cargo.toml b/datafusion/functions-nested/Cargo.toml index bdfb07031b8c..b294d0c0d1c9 100644 --- a/datafusion/functions-nested/Cargo.toml +++ b/datafusion/functions-nested/Cargo.toml @@ -46,10 +46,12 @@ arrow-buffer = { workspace = true } arrow-ord = { workspace = true } arrow-schema = { workspace = true } datafusion-common = { workspace = true } +datafusion-doc = { workspace = true } datafusion-execution = { workspace = true } datafusion-expr = { workspace = true } datafusion-functions = { workspace = true } datafusion-functions-aggregate = { workspace = true } +datafusion-macros = { workspace = true } datafusion-physical-expr-common = { workspace = true } itertools = { workspace = true, features = ["use_std"] } log = { workspace = true } diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index 499b07dafccf..cc762f4ba6b7 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -25,10 +25,11 @@ use arrow_buffer::BooleanBuffer; use datafusion_common::cast::as_generic_list_array; use datafusion_common::utils::string_utils::string_array_to_vec; use datafusion_common::{exec_err, Result, ScalarValue}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use datafusion_physical_expr_common::datum::compare_with_eq; use itertools::Itertools; @@ -57,6 +58,27 @@ make_udf_expr_and_func!(ArrayHasAny, array_has_any_udf // internal function name ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns true if the array contains the element.", + syntax_example = "array_has(array, element)", + sql_example = r#"```sql +> select array_has([1, 2, 3], 2); ++-----------------------------+ +| array_has(List([1,2,3]), 2) | ++-----------------------------+ +| true | ++-----------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "element", + description = "Scalar or Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub struct ArrayHas { signature: Signature, @@ -138,41 +160,10 @@ impl ScalarUDFImpl for ArrayHas { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_has_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_array_has_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns true if the array contains the element.", - - "array_has(array, element)") - .with_sql_example( - r#"```sql -> select array_has([1, 2, 3], 2); -+-----------------------------+ -| array_has(List([1,2,3]), 2) | -+-----------------------------+ -| true | -+-----------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "element", - "Scalar or Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - fn array_has_inner_for_scalar( haystack: &ArrayRef, needle: &dyn Datum, @@ -287,6 +278,27 @@ fn array_has_any_inner(args: &[ArrayRef]) -> Result { } } +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns true if all elements of sub-array exist in array.", + syntax_example = "array_has_all(array, sub-array)", + sql_example = r#"```sql +> select array_has_all([1, 2, 3, 4], [2, 3]); ++--------------------------------------------+ +| array_has_all(List([1,2,3,4]), List([2,3])) | ++--------------------------------------------+ +| true | ++--------------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "sub-array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub struct ArrayHasAll { signature: Signature, @@ -337,39 +349,31 @@ impl ScalarUDFImpl for ArrayHasAll { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_has_all_doc()) + self.doc() } } -fn get_array_has_all_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns true if all elements of sub-array exist in array.", - - "array_has_all(array, sub-array)") - .with_sql_example( - r#"```sql -> select array_has_all([1, 2, 3, 4], [2, 3]); -+--------------------------------------------+ -| array_has_all(List([1,2,3,4]), List([2,3])) | -+--------------------------------------------+ -| true | -+--------------------------------------------+ +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns true if any elements exist in both arrays.", + syntax_example = "array_has_any(array, sub-array)", + sql_example = r#"```sql +> select array_has_any([1, 2, 3], [3, 4]); ++------------------------------------------+ +| array_has_any(List([1,2,3]), List([3,4])) | ++------------------------------------------+ +| true | ++------------------------------------------+ ```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "sub-array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "sub-array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub struct ArrayHasAny { signature: Signature, @@ -420,39 +424,10 @@ impl ScalarUDFImpl for ArrayHasAny { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_has_any_doc()) + self.doc() } } -fn get_array_has_any_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns true if any elements exist in both arrays.", - - "array_has_any(array, sub-array)") - .with_sql_example( - r#"```sql -> select array_has_any([1, 2, 3], [3, 4]); -+------------------------------------------+ -| array_has_any(List([1,2,3]), List([3,4])) | -+------------------------------------------+ -| true | -+------------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "sub-array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - /// Represents the type of comparison for array_has. #[derive(Debug, PartialEq, Clone, Copy)] enum ComparisonType { diff --git a/datafusion/functions-nested/src/cardinality.rs b/datafusion/functions-nested/src/cardinality.rs index 45543d1bd68b..993fdbd2d3e0 100644 --- a/datafusion/functions-nested/src/cardinality.rs +++ b/datafusion/functions-nested/src/cardinality.rs @@ -26,11 +26,12 @@ use arrow_schema::DataType::{FixedSizeList, LargeList, List, Map, UInt64}; use datafusion_common::cast::{as_large_list_array, as_list_array, as_map_array}; use datafusion_common::Result; use datafusion_common::{exec_err, plan_err}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; +use datafusion_doc::DocSection; use datafusion_expr::{ ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::{Arc, OnceLock}; @@ -57,6 +58,23 @@ impl Cardinality { } } +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns the total number of elements in the array.", + syntax_example = "cardinality(array)", + sql_example = r#"```sql +> select cardinality([[1, 2, 3, 4], [5, 6, 7, 8]]); ++--------------------------------------+ +| cardinality(List([1,2,3,4,5,6,7,8])) | ++--------------------------------------+ +| 8 | ++--------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub(super) struct Cardinality { signature: Signature, @@ -96,37 +114,10 @@ impl ScalarUDFImpl for Cardinality { } fn documentation(&self) -> Option<&Documentation> { - Some(get_cardinality_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_cardinality_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns the total number of elements in the array.", - - "cardinality(array)") - .with_sql_example( - r#"```sql -> select cardinality([[1, 2, 3, 4], [5, 6, 7, 8]]); -+--------------------------------------+ -| cardinality(List([1,2,3,4,5,6,7,8])) | -+--------------------------------------+ -| 8 | -+--------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - /// Cardinality SQL function pub fn cardinality_inner(args: &[ArrayRef]) -> Result { if args.len() != 1 { diff --git a/datafusion/functions-nested/src/concat.rs b/datafusion/functions-nested/src/concat.rs index 3e8a5877fb33..acd5796f2998 100644 --- a/datafusion/functions-nested/src/concat.rs +++ b/datafusion/functions-nested/src/concat.rs @@ -28,11 +28,12 @@ use datafusion_common::Result; use datafusion_common::{ cast::as_generic_list_array, exec_err, not_impl_err, plan_err, utils::list_ndims, }; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; +use datafusion_doc::DocSection; use datafusion_expr::{ type_coercion::binary::get_wider_type, ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use crate::utils::{align_array_dimensions, check_datatypes, make_scalar_function}; @@ -44,6 +45,24 @@ make_udf_expr_and_func!( array_append_udf // internal function name ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Appends an element to the end of an array.", + syntax_example = "array_append(array, element)", + sql_example = r#"```sql +> select array_append([1, 2, 3], 4); ++--------------------------------------+ +| array_append(List([1,2,3]),Int64(4)) | ++--------------------------------------+ +| [1, 2, 3, 4] | ++--------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument(name = "element", description = "Element to append to the array.") +)] #[derive(Debug)] pub struct ArrayAppend { signature: Signature, @@ -99,41 +118,10 @@ impl ScalarUDFImpl for ArrayAppend { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_append_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_array_append_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Appends an element to the end of an array.", - - "array_append(array, element)") - .with_sql_example( - r#"```sql -> select array_append([1, 2, 3], 4); -+--------------------------------------+ -| array_append(List([1,2,3]),Int64(4)) | -+--------------------------------------+ -| [1, 2, 3, 4] | -+--------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "element", - "Element to append to the array.", - ) - .build() - }) -} - make_udf_expr_and_func!( ArrayPrepend, array_prepend, @@ -142,6 +130,24 @@ make_udf_expr_and_func!( array_prepend_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Prepends an element to the beginning of an array.", + syntax_example = "array_prepend(element, array)", + sql_example = r#"```sql +> select array_prepend(1, [2, 3, 4]); ++---------------------------------------+ +| array_prepend(Int64(1),List([2,3,4])) | ++---------------------------------------+ +| [1, 2, 3, 4] | ++---------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument(name = "element", description = "Element to prepend to the array.") +)] #[derive(Debug)] pub struct ArrayPrepend { signature: Signature, @@ -197,41 +203,10 @@ impl ScalarUDFImpl for ArrayPrepend { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_prepend_doc()) + self.doc() } } -static DOCUMENTATION_PREPEND: OnceLock = OnceLock::new(); - -fn get_array_prepend_doc() -> &'static Documentation { - DOCUMENTATION_PREPEND.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Prepends an element to the beginning of an array.", - - "array_prepend(element, array)") - .with_sql_example( - r#"```sql -> select array_prepend(1, [2, 3, 4]); -+---------------------------------------+ -| array_prepend(Int64(1),List([2,3,4])) | -+---------------------------------------+ -| [1, 2, 3, 4] | -+---------------------------------------+ -```"#, - ) - .with_argument( - "element", - "Element to prepend to the array.", - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - make_udf_expr_and_func!( ArrayConcat, array_concat, @@ -239,6 +214,27 @@ make_udf_expr_and_func!( array_concat_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Concatenates arrays.", + syntax_example = "array_concat(array[, ..., array_n])", + sql_example = r#"```sql +> select array_concat([1, 2], [3, 4], [5, 6]); ++---------------------------------------------------+ +| array_concat(List([1,2]),List([3,4]),List([5,6])) | ++---------------------------------------------------+ +| [1, 2, 3, 4, 5, 6] | ++---------------------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "array_n", + description = "Subsequent array column or literal array to concatenate." + ) +)] #[derive(Debug)] pub struct ArrayConcat { signature: Signature, @@ -319,39 +315,10 @@ impl ScalarUDFImpl for ArrayConcat { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_concat_doc()) + self.doc() } } -fn get_array_concat_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Concatenates arrays.", - - "array_concat(array[, ..., array_n])") - .with_sql_example( - r#"```sql -> select array_concat([1, 2], [3, 4], [5, 6]); -+---------------------------------------------------+ -| array_concat(List([1,2]),List([3,4]),List([5,6])) | -+---------------------------------------------------+ -| [1, 2, 3, 4, 5, 6] | -+---------------------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression to concatenate. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "array_n", - "Subsequent array column or literal array to concatenate.", - ) - .build() - }) -} - /// Array_concat/Array_cat SQL function pub(crate) fn array_concat_inner(args: &[ArrayRef]) -> Result { if args.is_empty() { diff --git a/datafusion/functions-nested/src/dimension.rs b/datafusion/functions-nested/src/dimension.rs index 2d2f90e9c7cb..5fd87ed4665b 100644 --- a/datafusion/functions-nested/src/dimension.rs +++ b/datafusion/functions-nested/src/dimension.rs @@ -25,14 +25,15 @@ use std::any::Any; use datafusion_common::cast::{as_large_list_array, as_list_array}; use datafusion_common::{exec_err, plan_err, Result}; +use datafusion_doc::DocSection; use crate::utils::{compute_array_dims, make_scalar_function}; use arrow_schema::DataType::{FixedSizeList, LargeList, List, UInt64}; use arrow_schema::Field; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::sync::{Arc, OnceLock}; make_udf_expr_and_func!( @@ -43,6 +44,23 @@ make_udf_expr_and_func!( array_dims_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns an array of the array's dimensions.", + syntax_example = "array_dims(array)", + sql_example = r#"```sql +> select array_dims([[1, 2, 3], [4, 5, 6]]); ++---------------------------------+ +| array_dims(List([1,2,3,4,5,6])) | ++---------------------------------+ +| [2, 3] | ++---------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub(super) struct ArrayDims { signature: Signature, @@ -94,37 +112,10 @@ impl ScalarUDFImpl for ArrayDims { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_dims_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_array_dims_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns an array of the array's dimensions.", - - "array_dims(array)") - .with_sql_example( - r#"```sql -> select array_dims([[1, 2, 3], [4, 5, 6]]); -+---------------------------------+ -| array_dims(List([1,2,3,4,5,6])) | -+---------------------------------+ -| [2, 3] | -+---------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - make_udf_expr_and_func!( ArrayNdims, array_ndims, @@ -133,6 +124,24 @@ make_udf_expr_and_func!( array_ndims_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns the number of dimensions of the array.", + syntax_example = "array_ndims(array, element)", + sql_example = r#"```sql +> select array_ndims([[1, 2, 3], [4, 5, 6]]); ++----------------------------------+ +| array_ndims(List([1,2,3,4,5,6])) | ++----------------------------------+ +| 2 | ++----------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument(name = "element", description = "Array element.") +)] #[derive(Debug)] pub(super) struct ArrayNdims { signature: Signature, @@ -181,39 +190,10 @@ impl ScalarUDFImpl for ArrayNdims { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_ndims_doc()) + self.doc() } } -fn get_array_ndims_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns the number of dimensions of the array.", - - "array_ndims(array, element)") - .with_sql_example( - r#"```sql -> select array_ndims([[1, 2, 3], [4, 5, 6]]); -+----------------------------------+ -| array_ndims(List([1,2,3,4,5,6])) | -+----------------------------------+ -| 2 | -+----------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "element", - "Array element.", - ) - .build() - }) -} - /// Array_dims SQL function pub fn array_dims_inner(args: &[ArrayRef]) -> Result { if args.len() != 1 { diff --git a/datafusion/functions-nested/src/distance.rs b/datafusion/functions-nested/src/distance.rs index 381ddeb59a0b..b365e62b8040 100644 --- a/datafusion/functions-nested/src/distance.rs +++ b/datafusion/functions-nested/src/distance.rs @@ -31,10 +31,11 @@ use datafusion_common::cast::{ use datafusion_common::utils::coerced_fixed_size_list_to_list; use datafusion_common::DataFusionError; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::{Arc, OnceLock}; @@ -46,6 +47,27 @@ make_udf_expr_and_func!( array_distance_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns the Euclidean distance between two input arrays of equal length.", + syntax_example = "array_distance(array1, array2)", + sql_example = r#"```sql +> select array_distance([1, 2], [1, 4]); ++------------------------------------+ +| array_distance(List([1,2], [1,4])) | ++------------------------------------+ +| 2.0 | ++------------------------------------+ +```"#, + argument( + name = "array1", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "array2", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub(super) struct ArrayDistance { signature: Signature, @@ -109,41 +131,10 @@ impl ScalarUDFImpl for ArrayDistance { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_distance_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_array_distance_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns the Euclidean distance between two input arrays of equal length.", - - "array_distance(array1, array2)") - .with_sql_example( - r#"```sql -> select array_distance([1, 2], [1, 4]); -+------------------------------------+ -| array_distance(List([1,2], [1,4])) | -+------------------------------------+ -| 2.0 | -+------------------------------------+ -```"#, - ) - .with_argument( - "array1", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "array2", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - pub fn array_distance_inner(args: &[ArrayRef]) -> Result { if args.len() != 2 { return exec_err!("array_distance expects exactly two arguments"); diff --git a/datafusion/functions-nested/src/empty.rs b/datafusion/functions-nested/src/empty.rs index 5270c84c0338..040eea73f5a5 100644 --- a/datafusion/functions-nested/src/empty.rs +++ b/datafusion/functions-nested/src/empty.rs @@ -23,10 +23,11 @@ use arrow_schema::DataType; use arrow_schema::DataType::{Boolean, FixedSizeList, LargeList, List}; use datafusion_common::cast::as_generic_list_array; use datafusion_common::{exec_err, plan_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::{Arc, OnceLock}; @@ -38,6 +39,23 @@ make_udf_expr_and_func!( array_empty_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns 1 for an empty array or 0 for a non-empty array.", + syntax_example = "empty(array)", + sql_example = r#"```sql +> select empty([1]); ++------------------+ +| empty(List([1])) | ++------------------+ +| 0 | ++------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub(super) struct ArrayEmpty { signature: Signature, @@ -86,37 +104,10 @@ impl ScalarUDFImpl for ArrayEmpty { } fn documentation(&self) -> Option<&Documentation> { - Some(get_empty_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_empty_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns 1 for an empty array or 0 for a non-empty array.", - - "empty(array)") - .with_sql_example( - r#"```sql -> select empty([1]); -+------------------+ -| empty(List([1])) | -+------------------+ -| 0 | -+------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - /// Array_empty SQL function pub fn array_empty_inner(args: &[ArrayRef]) -> Result { if args.len() != 1 { diff --git a/datafusion/functions-nested/src/except.rs b/datafusion/functions-nested/src/except.rs index 83c09ad7fd90..ee40d2c7b561 100644 --- a/datafusion/functions-nested/src/except.rs +++ b/datafusion/functions-nested/src/except.rs @@ -24,10 +24,11 @@ use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait}; use arrow_buffer::OffsetBuffer; use arrow_schema::{DataType, FieldRef}; use datafusion_common::{exec_err, internal_err, HashSet, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::{Arc, OnceLock}; @@ -39,6 +40,33 @@ make_udf_expr_and_func!( array_except_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns an array of the elements that appear in the first array but not in the second.", + syntax_example = "array_except(array1, array2)", + sql_example = r#"```sql +> select array_except([1, 2, 3, 4], [5, 6, 3, 4]); ++----------------------------------------------------+ +| array_except([1, 2, 3, 4], [5, 6, 3, 4]); | ++----------------------------------------------------+ +| [1, 2] | ++----------------------------------------------------+ +> select array_except([1, 2, 3, 4], [3, 4, 5, 6]); ++----------------------------------------------------+ +| array_except([1, 2, 3, 4], [3, 4, 5, 6]); | ++----------------------------------------------------+ +| [1, 2] | ++----------------------------------------------------+ +```"#, + argument( + name = "array1", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "array2", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub(super) struct ArrayExcept { signature: Signature, @@ -86,47 +114,10 @@ impl ScalarUDFImpl for ArrayExcept { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_except_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_array_except_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns an array of the elements that appear in the first array but not in the second.", - - "array_except(array1, array2)") - .with_sql_example( - r#"```sql -> select array_except([1, 2, 3, 4], [5, 6, 3, 4]); -+----------------------------------------------------+ -| array_except([1, 2, 3, 4], [5, 6, 3, 4]); | -+----------------------------------------------------+ -| [1, 2] | -+----------------------------------------------------+ -> select array_except([1, 2, 3, 4], [3, 4, 5, 6]); -+----------------------------------------------------+ -| array_except([1, 2, 3, 4], [3, 4, 5, 6]); | -+----------------------------------------------------+ -| [1, 2] | -+----------------------------------------------------+ -```"#, - ) - .with_argument( - "array1", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "array2", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - /// Array_except SQL function pub fn array_except_inner(args: &[ArrayRef]) -> Result { if args.len() != 2 { diff --git a/datafusion/functions-nested/src/extract.rs b/datafusion/functions-nested/src/extract.rs index fc35f0076330..facbfd14cd2b 100644 --- a/datafusion/functions-nested/src/extract.rs +++ b/datafusion/functions-nested/src/extract.rs @@ -35,11 +35,12 @@ use datafusion_common::cast::as_list_array; use datafusion_common::{ exec_err, internal_datafusion_err, plan_err, DataFusionError, Result, }; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; +use datafusion_doc::DocSection; use datafusion_expr::Expr; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::{Arc, OnceLock}; @@ -80,6 +81,27 @@ make_udf_expr_and_func!( array_any_value_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Extracts the element with the index n from the array.", + syntax_example = "array_element(array, index)", + sql_example = r#"```sql +> select array_element([1, 2, 3, 4], 3); ++-----------------------------------------+ +| array_element(List([1,2,3,4]),Int64(3)) | ++-----------------------------------------+ +| 3 | ++-----------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "index", + description = "Index to extract the element from the array" + ) +)] #[derive(Debug)] pub(super) struct ArrayElement { signature: Signature, @@ -156,41 +178,10 @@ impl ScalarUDFImpl for ArrayElement { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_element_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_array_element_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Extracts the element with the index n from the array.", - - "array_element(array, index)") - .with_sql_example( - r#"```sql -> select array_element([1, 2, 3, 4], 3); -+-----------------------------------------+ -| array_element(List([1,2,3,4]),Int64(3)) | -+-----------------------------------------+ -| 3 | -+-----------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "index", - "Index to extract the element from the array.", - ) - .build() - }) -} - /// array_element SQL function /// /// There are two arguments for array_element, the first one is the array, the second one is the 1-indexed index. @@ -296,6 +287,35 @@ pub fn array_slice(array: Expr, begin: Expr, end: Expr, stride: Option) -> array_slice_udf().call(args) } +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns a slice of the array based on 1-indexed start and end positions.", + syntax_example = "array_slice(array, begin, end)", + sql_example = r#"```sql +> select array_slice([1, 2, 3, 4, 5, 6, 7, 8], 3, 6); ++--------------------------------------------------------+ +| array_slice(List([1,2,3,4,5,6,7,8]),Int64(3),Int64(6)) | ++--------------------------------------------------------+ +| [3, 4, 5, 6] | ++--------------------------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "begin", + description = "Index of the first element. If negative, it counts backward from the end of the array." + ), + argument( + name = "end", + description = "Index of the last element. If negative, it counts backward from the end of the array." + ), + argument( + name = "stride", + description = "Stride of the array slice. The default is 1." + ) +)] #[derive(Debug)] pub(super) struct ArraySlice { signature: Signature, @@ -362,47 +382,10 @@ impl ScalarUDFImpl for ArraySlice { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_slice_doc()) + self.doc() } } -fn get_array_slice_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns a slice of the array based on 1-indexed start and end positions.", - - "array_slice(array, begin, end)") - .with_sql_example( - r#"```sql -> select array_slice([1, 2, 3, 4, 5, 6, 7, 8], 3, 6); -+--------------------------------------------------------+ -| array_slice(List([1,2,3,4,5,6,7,8]),Int64(3),Int64(6)) | -+--------------------------------------------------------+ -| [3, 4, 5, 6] | -+--------------------------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "begin", - "Index of the first element. If negative, it counts backward from the end of the array.", - ) - .with_argument( - "end", - "Index of the last element. If negative, it counts backward from the end of the array.", - ) - .with_argument( - "stride", - "Stride of the array slice. The default is 1.", - ) - .build() - }) -} - /// array_slice SQL function /// /// We follow the behavior of array_slice in DuckDB @@ -629,6 +612,23 @@ where )?)) } +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns the array without the first element.", + syntax_example = "array_pop_front(array)", + sql_example = r#"```sql +> select array_pop_front([1, 2, 3]); ++-------------------------------+ +| array_pop_front(List([1,2,3])) | ++-------------------------------+ +| [2, 3] | ++-------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub(super) struct ArrayPopFront { signature: Signature, @@ -673,35 +673,10 @@ impl ScalarUDFImpl for ArrayPopFront { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_pop_front_doc()) + self.doc() } } -fn get_array_pop_front_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns the array without the first element.", - - "array_pop_front(array)") - .with_sql_example( - r#"```sql -> select array_pop_front([1, 2, 3]); -+-------------------------------+ -| array_pop_front(List([1,2,3])) | -+-------------------------------+ -| [2, 3] | -+-------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - /// array_pop_front SQL function fn array_pop_front_inner(args: &[ArrayRef]) -> Result { let array_data_type = args[0].data_type(); @@ -737,6 +712,23 @@ where general_array_slice::(array, &from_array, &to_array, None) } +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns the array without the last element.", + syntax_example = "array_pop_back(array)", + sql_example = r#"```sql +> select array_pop_back([1, 2, 3]); ++-------------------------------+ +| array_pop_back(List([1,2,3])) | ++-------------------------------+ +| [1, 2] | ++-------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub(super) struct ArrayPopBack { signature: Signature, @@ -781,35 +773,10 @@ impl ScalarUDFImpl for ArrayPopBack { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_pop_back_doc()) + self.doc() } } -fn get_array_pop_back_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns the array without the last element.", - - "array_pop_back(array)") - .with_sql_example( - r#"```sql -> select array_pop_back([1, 2, 3]); -+-------------------------------+ -| array_pop_back(List([1,2,3])) | -+-------------------------------+ -| [1, 2] | -+-------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - /// array_pop_back SQL function fn array_pop_back_inner(args: &[ArrayRef]) -> Result { if args.len() != 1 { @@ -849,6 +816,23 @@ where general_array_slice::(array, &from_array, &to_array, None) } +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns the first non-null element in the array.", + syntax_example = "array_any_value(array)", + sql_example = r#"```sql +> select array_any_value([NULL, 1, 2, 3]); ++-------------------------------+ +| array_any_value(List([NULL,1,2,3])) | ++-------------------------------------+ +| 1 | ++-------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub(super) struct ArrayAnyValue { signature: Signature, @@ -897,35 +881,10 @@ impl ScalarUDFImpl for ArrayAnyValue { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_any_value_doc()) + self.doc() } } -fn get_array_any_value_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns the first non-null element in the array.", - - "array_any_value(array)") - .with_sql_example( - r#"```sql -> select array_any_value([NULL, 1, 2, 3]); -+-------------------------------+ -| array_any_value(List([NULL,1,2,3])) | -+-------------------------------------+ -| 1 | -+-------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - fn array_any_value_inner(args: &[ArrayRef]) -> Result { if args.len() != 1 { return exec_err!("array_any_value expects one argument"); diff --git a/datafusion/functions-nested/src/flatten.rs b/datafusion/functions-nested/src/flatten.rs index 9d2cb8a3f667..80a716730045 100644 --- a/datafusion/functions-nested/src/flatten.rs +++ b/datafusion/functions-nested/src/flatten.rs @@ -26,10 +26,11 @@ use datafusion_common::cast::{ as_generic_list_array, as_large_list_array, as_list_array, }; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::{Arc, OnceLock}; @@ -41,6 +42,23 @@ make_udf_expr_and_func!( flatten_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Converts an array of arrays to a flat array.\n\n- Applies to any depth of nested arrays\n- Does not change arrays that are already flat\n\nThe flattened array contains all the elements from all source arrays.", + syntax_example = "flatten(array)", + sql_example = r#"```sql +> select flatten([[1, 2], [3, 4]]); ++------------------------------+ +| flatten(List([1,2], [3,4])) | ++------------------------------+ +| [1, 2, 3, 4] | ++------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub struct Flatten { signature: Signature, @@ -111,35 +129,9 @@ impl ScalarUDFImpl for Flatten { } fn documentation(&self) -> Option<&Documentation> { - Some(get_flatten_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_flatten_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Converts an array of arrays to a flat array.\n\n- Applies to any depth of nested arrays\n- Does not change arrays that are already flat\n\nThe flattened array contains all the elements from all source arrays.", - - "flatten(array)") - .with_sql_example( - r#"```sql -> select flatten([[1, 2], [3, 4]]); -+------------------------------+ -| flatten(List([1,2], [3,4])) | -+------------------------------+ -| [1, 2, 3, 4] | -+------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} /// Flatten SQL function pub fn flatten_inner(args: &[ArrayRef]) -> Result { diff --git a/datafusion/functions-nested/src/length.rs b/datafusion/functions-nested/src/length.rs index 3f92cb3ebb21..508c1e77522e 100644 --- a/datafusion/functions-nested/src/length.rs +++ b/datafusion/functions-nested/src/length.rs @@ -27,10 +27,11 @@ use core::any::type_name; use datafusion_common::cast::{as_generic_list_array, as_int64_array}; use datafusion_common::DataFusionError; use datafusion_common::{exec_err, plan_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::{Arc, OnceLock}; @@ -42,6 +43,24 @@ make_udf_expr_and_func!( array_length_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns the length of the array dimension.", + syntax_example = "array_length(array, dimension)", + sql_example = r#"```sql +> select array_length([1, 2, 3, 4, 5], 1); ++-------------------------------------------+ +| array_length(List([1,2,3,4,5]), 1) | ++-------------------------------------------+ +| 5 | ++-------------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument(name = "dimension", description = "Array dimension") +)] #[derive(Debug)] pub struct ArrayLength { signature: Signature, @@ -97,41 +116,10 @@ impl ScalarUDFImpl for ArrayLength { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_length_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_array_length_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns the length of the array dimension.", - - "array_length(array, dimension)") - .with_sql_example( - r#"```sql -> select array_length([1, 2, 3, 4, 5], 1); -+-------------------------------------------+ -| array_length(List([1,2,3,4,5]), 1) | -+-------------------------------------------+ -| 5 | -+-------------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "dimension", - "Array dimension.", - ) - .build() - }) -} - /// Array_length SQL function pub fn array_length_inner(args: &[ArrayRef]) -> Result { if args.len() != 1 && args.len() != 2 { diff --git a/datafusion/functions-nested/src/make_array.rs b/datafusion/functions-nested/src/make_array.rs index 22870dd85f0c..f5a819c4e692 100644 --- a/datafusion/functions-nested/src/make_array.rs +++ b/datafusion/functions-nested/src/make_array.rs @@ -21,6 +21,7 @@ use std::any::Any; use std::sync::{Arc, OnceLock}; use std::vec; +use crate::utils::make_scalar_function; use arrow::array::{ArrayData, Capacities, MutableArrayData}; use arrow_array::{ new_null_array, Array, ArrayRef, GenericListArray, NullArray, OffsetSizeTrait, @@ -30,16 +31,15 @@ use arrow_schema::DataType::{List, Null}; use arrow_schema::{DataType, Field}; use datafusion_common::utils::SingleRowListArrayBuilder; use datafusion_common::{plan_err, Result}; +use datafusion_doc::DocSection; use datafusion_expr::binary::{ try_type_union_resolution_with_struct, type_union_resolution, }; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; use datafusion_expr::TypeSignature; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; - -use crate::utils::make_scalar_function; +use datafusion_macros::user_doc; make_udf_expr_and_func!( MakeArray, @@ -48,6 +48,24 @@ make_udf_expr_and_func!( make_array_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns an array using the specified input expressions.", + syntax_example = "make_array(expression1[, ..., expression_n])", + sql_example = r#"```sql +> select make_array(1, 2, 3, 4, 5); ++----------------------------------------------------------+ +| make_array(Int64(1),Int64(2),Int64(3),Int64(4),Int64(5)) | ++----------------------------------------------------------+ +| [1, 2, 3, 4, 5] | ++----------------------------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument(name = "dimension", description = "Array dimension") +)] #[derive(Debug)] pub struct MakeArray { signature: Signature, @@ -139,37 +157,10 @@ impl ScalarUDFImpl for MakeArray { } fn documentation(&self) -> Option<&Documentation> { - Some(get_make_array_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_make_array_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns an array using the specified input expressions.", - - "make_array(expression1[, ..., expression_n])") - .with_sql_example( - r#"```sql -> select make_array(1, 2, 3, 4, 5); -+----------------------------------------------------------+ -| make_array(Int64(1),Int64(2),Int64(3),Int64(4),Int64(5)) | -+----------------------------------------------------------+ -| [1, 2, 3, 4, 5] | -+----------------------------------------------------------+ -```"#, - ) - .with_argument( - "expression_n", - "Expression to include in the output array. Can be a constant, column, or function, and any combination of arithmetic or string operators.", - ) - .build() - }) -} - // Empty array is a special case that is useful for many other array functions pub(super) fn empty_array_type() -> DataType { List(Arc::new(Field::new_list_field(DataType::Int64, true))) diff --git a/datafusion/functions-nested/src/map.rs b/datafusion/functions-nested/src/map.rs index d21a19c9fb33..63aeb526ba39 100644 --- a/datafusion/functions-nested/src/map.rs +++ b/datafusion/functions-nested/src/map.rs @@ -26,11 +26,12 @@ use arrow_schema::{DataType, Field, SchemaBuilder}; use datafusion_common::utils::{fixed_size_list_to_arrays, list_to_arrays}; use datafusion_common::{exec_err, HashSet, Result, ScalarValue}; +use datafusion_doc::DocSection; use datafusion_expr::expr::ScalarFunction; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_MAP; use datafusion_expr::{ ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use crate::make_array::make_array; @@ -181,6 +182,50 @@ fn make_map_batch_internal( }) } +#[user_doc( + doc_section(label = "Map Functions"), + description = "Returns an Arrow map with the specified key-value pairs.\n\n\ + The `make_map` function creates a map from two lists: one for keys and one for values. Each key must be unique and non-null.", + syntax_example = "map(key, value)\nmap(key: value)\nmake_map(['key1', 'key2'], ['value1', 'value2'])", + sql_example = r#" +```sql +-- Using map function +SELECT MAP('type', 'test'); +---- +{type: test} + +SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]); +---- +{POST: 41, HEAD: 33, PATCH: } + +SELECT MAP([[1,2], [3,4]], ['a', 'b']); +---- +{[1, 2]: a, [3, 4]: b} + +SELECT MAP { 'a': 1, 'b': 2 }; +---- +{a: 1, b: 2} + +-- Using make_map function +SELECT MAKE_MAP(['POST', 'HEAD'], [41, 33]); +---- +{POST: 41, HEAD: 33} + +SELECT MAKE_MAP(['key1', 'key2'], ['value1', null]); +---- +{key1: value1, key2: } +```"#, + argument( + name = "key", + description = "For `map`: Expression to be used for key. Can be a constant, column, function, or any combination of arithmetic or string operators.\n\ + For `make_map`: The list of keys to be used in the map. Each key must be unique and non-null." + ), + argument( + name = "value", + description = "For `map`: Expression to be used for value. Can be a constant, column, function, or any combination of arithmetic or string operators.\n\ + For `make_map`: The list of values to be mapped to the corresponding keys." + ) +)] #[derive(Debug)] pub struct MapFunc { signature: Signature, @@ -247,65 +292,10 @@ impl ScalarUDFImpl for MapFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_map_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_map_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_MAP, - "Returns an Arrow map with the specified key-value pairs.\n\n\ - The `make_map` function creates a map from two lists: one for keys and one for values. Each key must be unique and non-null.", - - "map(key, value)\nmap(key: value)\nmake_map(['key1', 'key2'], ['value1', 'value2'])" - ) - .with_sql_example( - r#" -```sql --- Using map function -SELECT MAP('type', 'test'); ----- -{type: test} - -SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]); ----- -{POST: 41, HEAD: 33, PATCH: } - -SELECT MAP([[1,2], [3,4]], ['a', 'b']); ----- -{[1, 2]: a, [3, 4]: b} - -SELECT MAP { 'a': 1, 'b': 2 }; ----- -{a: 1, b: 2} - --- Using make_map function -SELECT MAKE_MAP(['POST', 'HEAD'], [41, 33]); ----- -{POST: 41, HEAD: 33} - -SELECT MAKE_MAP(['key1', 'key2'], ['value1', null]); ----- -{key1: value1, key2: } -```"#, - ) - .with_argument( - "key", - "For `map`: Expression to be used for key. Can be a constant, column, function, or any combination of arithmetic or string operators.\n\ - For `make_map`: The list of keys to be used in the map. Each key must be unique and non-null." - ) - .with_argument( - "value", - "For `map`: Expression to be used for value. Can be a constant, column, function, or any combination of arithmetic or string operators.\n\ - For `make_map`: The list of values to be mapped to the corresponding keys." - ) - .build() - }) -} - fn get_element_type(data_type: &DataType) -> Result<&DataType> { match data_type { DataType::List(element) => Ok(element.data_type()), diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index 24f396e741b2..5562415f3a2c 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -26,10 +26,11 @@ use arrow_buffer::OffsetBuffer; use arrow_schema::Field; use datafusion_common::{cast::as_map_array, exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_MAP; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::{Arc, OnceLock}; use std::vec; @@ -45,6 +46,32 @@ make_udf_expr_and_func!( map_extract_udf ); +#[user_doc( + doc_section(label = "Map Functions"), + description = "Returns a list containing the value for the given key or an empty list if the key is not present in the map.", + syntax_example = "map_extract(map, key)", + sql_example = r#"```sql +SELECT map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'a'); +---- +[1] + +SELECT map_extract(MAP {1: 'one', 2: 'two'}, 2); +---- +['two'] + +SELECT map_extract(MAP {'x': 10, 'y': NULL, 'z': 30}, 'y'); +---- +[] +```"#, + argument( + name = "map", + description = "Map expression. Can be a constant, column, or function, and any combination of map operators." + ), + argument( + name = "key", + description = "Key to extract from the map. Can be a constant, column, or function, any combination of arithmetic or string operators, or a named expression of the previously listed." + ) +)] #[derive(Debug)] pub(super) struct MapExtract { signature: Signature, @@ -109,45 +136,10 @@ impl ScalarUDFImpl for MapExtract { } fn documentation(&self) -> Option<&Documentation> { - Some(get_map_extract_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_map_extract_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_MAP, - "Returns a list containing the value for the given key or an empty list if the key is not present in the map.", - "map_extract(map, key)") - .with_sql_example( - r#"```sql -SELECT map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'a'); ----- -[1] - -SELECT map_extract(MAP {1: 'one', 2: 'two'}, 2); ----- -['two'] - -SELECT map_extract(MAP {'x': 10, 'y': NULL, 'z': 30}, 'y'); ----- -[] -```"#, - ) - .with_argument( - "map", - "Map expression. Can be a constant, column, or function, and any combination of map operators.", - ) - .with_argument( - "key", - "Key to extract from the map. Can be a constant, column, or function, any combination of arithmetic or string operators, or a named expression of the previously listed.", - ) - .build() - }) -} - fn general_map_extract_inner( map_array: &MapArray, query_keys_array: &dyn Array, diff --git a/datafusion/functions-nested/src/map_keys.rs b/datafusion/functions-nested/src/map_keys.rs index 1d19cb8492f0..2fa0f5cbab7b 100644 --- a/datafusion/functions-nested/src/map_keys.rs +++ b/datafusion/functions-nested/src/map_keys.rs @@ -21,11 +21,12 @@ use crate::utils::{get_map_entry_field, make_scalar_function}; use arrow_array::{Array, ArrayRef, ListArray}; use arrow_schema::{DataType, Field}; use datafusion_common::{cast::as_map_array, exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_MAP; +use datafusion_doc::DocSection; use datafusion_expr::{ ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::{Arc, OnceLock}; @@ -37,6 +38,24 @@ make_udf_expr_and_func!( map_keys_udf ); +#[user_doc( + doc_section(label = "Map Functions"), + description = "Returns a list of all keys in the map.", + syntax_example = "map_keys(map)", + sql_example = r#"```sql +SELECT map_keys(MAP {'a': 1, 'b': NULL, 'c': 3}); +---- +[a, b, c] + +SELECT map_keys(map([100, 5], [42, 43])); +---- +[100, 5] +```"#, + argument( + name = "map", + description = "Map expression. Can be a constant, column, or function, and any combination of map operators." + ) +)] #[derive(Debug)] pub(crate) struct MapKeysFunc { signature: Signature, @@ -87,37 +106,10 @@ impl ScalarUDFImpl for MapKeysFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_map_keys_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_map_keys_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_MAP, - "Returns a list of all keys in the map.", - "map_keys(map)") - .with_sql_example( - r#"```sql -SELECT map_keys(MAP {'a': 1, 'b': NULL, 'c': 3}); ----- -[a, b, c] - -SELECT map_keys(map([100, 5], [42, 43])); ----- -[100, 5] -```"#, - ) - .with_argument( - "map", - "Map expression. Can be a constant, column, or function, and any combination of map operators." - ) - .build() - }) -} - fn map_keys_inner(args: &[ArrayRef]) -> Result { if args.len() != 1 { return exec_err!("map_keys expects single argument"); diff --git a/datafusion/functions-nested/src/map_values.rs b/datafusion/functions-nested/src/map_values.rs index 816ebe74aff0..0ff621296189 100644 --- a/datafusion/functions-nested/src/map_values.rs +++ b/datafusion/functions-nested/src/map_values.rs @@ -21,11 +21,12 @@ use crate::utils::{get_map_entry_field, make_scalar_function}; use arrow_array::{Array, ArrayRef, ListArray}; use arrow_schema::{DataType, Field}; use datafusion_common::{cast::as_map_array, exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_MAP; +use datafusion_doc::DocSection; use datafusion_expr::{ ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::{Arc, OnceLock}; @@ -37,6 +38,24 @@ make_udf_expr_and_func!( map_values_udf ); +#[user_doc( + doc_section(label = "Map Functions"), + description = "Returns a list of all values in the map.", + syntax_example = "map_values(map)", + sql_example = r#"```sql +SELECT map_values(MAP {'a': 1, 'b': NULL, 'c': 3}); +---- +[1, , 3] + +SELECT map_values(map([100, 5], [42, 43])); +---- +[42, 43] +```"#, + argument( + name = "map", + description = "Map expression. Can be a constant, column, or function, and any combination of map operators." + ) +)] #[derive(Debug)] pub(crate) struct MapValuesFunc { signature: Signature, @@ -87,38 +106,10 @@ impl ScalarUDFImpl for MapValuesFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_map_values_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_map_values_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_MAP, - "Returns a list of all values in the map.", - - "map_values(map)") - .with_sql_example( - r#"```sql -SELECT map_values(MAP {'a': 1, 'b': NULL, 'c': 3}); ----- -[1, , 3] - -SELECT map_values(map([100, 5], [42, 43])); ----- -[42, 43] -```"#, - ) - .with_argument( - "map", - "Map expression. Can be a constant, column, or function, and any combination of map operators." - ) - .build() - }) -} - fn map_values_inner(args: &[ArrayRef]) -> Result { if args.len() != 1 { return exec_err!("map_values expects single argument"); diff --git a/datafusion/functions-nested/src/position.rs b/datafusion/functions-nested/src/position.rs index feacc7006192..a43d3eb2b945 100644 --- a/datafusion/functions-nested/src/position.rs +++ b/datafusion/functions-nested/src/position.rs @@ -19,10 +19,11 @@ use arrow_schema::DataType::{LargeList, List, UInt64}; use arrow_schema::{DataType, Field}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; + use std::any::Any; use std::sync::{Arc, OnceLock}; @@ -34,6 +35,7 @@ use datafusion_common::cast::{ as_generic_list_array, as_int64_array, as_large_list_array, as_list_array, }; use datafusion_common::{exec_err, internal_err, Result}; +use datafusion_doc::DocSection; use itertools::Itertools; use crate::utils::{compare_element_to_list, make_scalar_function}; @@ -46,6 +48,34 @@ make_udf_expr_and_func!( array_position_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns the position of the first occurrence of the specified element in the array.", + syntax_example = "array_position(array, element)\narray_position(array, element, index)", + sql_example = r#"```sql +> select array_position([1, 2, 2, 3, 1, 4], 2); ++----------------------------------------------+ +| array_position(List([1,2,2,3,1,4]),Int64(2)) | ++----------------------------------------------+ +| 2 | ++----------------------------------------------+ +> select array_position([1, 2, 2, 3, 1, 4], 2, 3); ++----------------------------------------------------+ +| array_position(List([1,2,2,3,1,4]),Int64(2), Int64(3)) | ++----------------------------------------------------+ +| 3 | ++----------------------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "element", + description = "Element to search for position in the array." + ), + argument(name = "index", description = "Index at which to start searching.") +)] #[derive(Debug)] pub(super) struct ArrayPosition { signature: Signature, @@ -95,51 +125,10 @@ impl ScalarUDFImpl for ArrayPosition { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_position_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_array_position_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns the position of the first occurrence of the specified element in the array.", - - "array_position(array, element)\narray_position(array, element, index)") - .with_sql_example( - r#"```sql -> select array_position([1, 2, 2, 3, 1, 4], 2); -+----------------------------------------------+ -| array_position(List([1,2,2,3,1,4]),Int64(2)) | -+----------------------------------------------+ -| 2 | -+----------------------------------------------+ -> select array_position([1, 2, 2, 3, 1, 4], 2, 3); -+----------------------------------------------------+ -| array_position(List([1,2,2,3,1,4]),Int64(2), Int64(3)) | -+----------------------------------------------------+ -| 3 | -+----------------------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "element", - "Element to search for position in the array.", - ) - .with_argument( - "index", - "Index at which to start searching.", - ) - .build() - }) -} - /// Array_position SQL function pub fn array_position_inner(args: &[ArrayRef]) -> Result { if args.len() < 2 || args.len() > 3 { @@ -224,6 +213,28 @@ make_udf_expr_and_func!( "searches for an element in the array, returns all occurrences.", // doc array_positions_udf // internal function name ); + +#[user_doc( + doc_section(label = "Array Functions"), + description = "Searches for an element in the array, returns all occurrences.", + syntax_example = "array_positions(array, element)", + sql_example = r#"```sql +> select array_positions([1, 2, 2, 3, 1, 4], 2); ++-----------------------------------------------+ +| array_positions(List([1,2,2,3,1,4]),Int64(2)) | ++-----------------------------------------------+ +| [2, 3] | ++-----------------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "element", + description = "Element to search for position in the array." + ) +)] #[derive(Debug)] pub(super) struct ArrayPositions { signature: Signature, @@ -268,39 +279,10 @@ impl ScalarUDFImpl for ArrayPositions { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_positions_doc()) + self.doc() } } -fn get_array_positions_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Searches for an element in the array, returns all occurrences.", - - "array_positions(array, element)") - .with_sql_example( - r#"```sql -> select array_positions([1, 2, 2, 3, 1, 4], 2); -+-----------------------------------------------+ -| array_positions(List([1,2,2,3,1,4]),Int64(2)) | -+-----------------------------------------------+ -| [2, 3] | -+-----------------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "element", - "Element to search for positions in the array.", - ) - .build() - }) -} - /// Array_positions SQL function pub fn array_positions_inner(args: &[ArrayRef]) -> Result { if args.len() != 2 { diff --git a/datafusion/functions-nested/src/range.rs b/datafusion/functions-nested/src/range.rs index 8344c1a261db..626bedfef6bc 100644 --- a/datafusion/functions-nested/src/range.rs +++ b/datafusion/functions-nested/src/range.rs @@ -37,10 +37,11 @@ use datafusion_common::cast::{ use datafusion_common::{ exec_datafusion_err, exec_err, internal_err, not_impl_datafusion_err, Result, }; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use itertools::Itertools; use std::any::Any; use std::cmp::Ordering; @@ -55,6 +56,39 @@ make_udf_expr_and_func!( "create a list of values in the range between start and stop", range_udf ); + +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns an Arrow array between start and stop with step. The range start..end contains all values with start <= x < end. It is empty if start >= end. Step cannot be 0.", + syntax_example = "range(start, stop, step)", + sql_example = r#"```sql +> select range(2, 10, 3); ++-----------------------------------+ +| range(Int64(2),Int64(10),Int64(3))| ++-----------------------------------+ +| [2, 5, 8] | ++-----------------------------------+ + +> select range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH); ++--------------------------------------------------------------+ +| range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH) | ++--------------------------------------------------------------+ +| [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01] | ++--------------------------------------------------------------+ +```"#, + argument( + name = "start", + description = "Start of the range. Ints, timestamps, dates or string types that can be coerced to Date32 are supported." + ), + argument( + name = "end", + description = "End of the range (not included). Type must be the same as start." + ), + argument( + name = "step", + description = "Increase by step (cannot be 0). Steps less than a day are supported only for timestamp ranges." + ) +)] #[derive(Debug)] pub(super) struct Range { signature: Signature, @@ -141,52 +175,10 @@ impl ScalarUDFImpl for Range { } fn documentation(&self) -> Option<&Documentation> { - Some(get_range_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_range_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns an Arrow array between start and stop with step. The range start..end contains all values with start <= x < end. It is empty if start >= end. Step cannot be 0.", - - "range(start, stop, step)") - .with_sql_example( - r#"```sql -> select range(2, 10, 3); -+-----------------------------------+ -| range(Int64(2),Int64(10),Int64(3))| -+-----------------------------------+ -| [2, 5, 8] | -+-----------------------------------+ - -> select range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH); -+--------------------------------------------------------------+ -| range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH) | -+--------------------------------------------------------------+ -| [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01] | -+--------------------------------------------------------------+ -```"#, - ) - .with_argument( - "start", - "Start of the range. Ints, timestamps, dates or string types that can be coerced to Date32 are supported.", - ) - .with_argument( - "end", - "End of the range (not included). Type must be the same as start.", - ) - .with_argument( - "step", - "Increase by step (cannot be 0). Steps less than a day are supported only for timestamp ranges.", - ) - .build() - }) -} - make_udf_expr_and_func!( GenSeries, gen_series, @@ -194,6 +186,32 @@ make_udf_expr_and_func!( "create a list of values in the range between start and stop, include upper bound", gen_series_udf ); + +#[user_doc( + doc_section(label = "Array Functions"), + description = "Similar to the range function, but it includes the upper bound.", + syntax_example = "generate_series(start, stop, step)", + sql_example = r#"```sql +> select generate_series(1,3); ++------------------------------------+ +| generate_series(Int64(1),Int64(3)) | ++------------------------------------+ +| [1, 2, 3] | ++------------------------------------+ +```"#, + argument( + name = "start", + description = "Start of the range. Ints, timestamps, dates or string types that can be coerced to Date32 are supported." + ), + argument( + name = "end", + description = "End of the range (not included). Type must be the same as start." + ), + argument( + name = "step", + description = "Increase by step (cannot be 0). Steps less than a day are supported only for timestamp ranges." + ) +)] #[derive(Debug)] pub(super) struct GenSeries { signature: Signature, @@ -283,45 +301,10 @@ impl ScalarUDFImpl for GenSeries { } fn documentation(&self) -> Option<&Documentation> { - Some(get_generate_series_doc()) + self.doc() } } -static GENERATE_SERIES_DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_generate_series_doc() -> &'static Documentation { - GENERATE_SERIES_DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Similar to the range function, but it includes the upper bound.", - - "generate_series(start, stop, step)") - .with_sql_example( - r#"```sql -> select generate_series(1,3); -+------------------------------------+ -| generate_series(Int64(1),Int64(3)) | -+------------------------------------+ -| [1, 2, 3] | -+------------------------------------+ -```"#, - ) - .with_argument( - "start", - "start of the series. Ints, timestamps, dates or string types that can be coerced to Date32 are supported.", - ) - .with_argument( - "end", - "end of the series (included). Type must be the same as start.", - ) - .with_argument( - "step", - "increase by step (can not be 0). Steps less than a day are supported only for timestamp ranges.", - ) - .build() - }) -} - /// Generates an array of integers from start to stop with a given step. /// /// This function takes 1 to 3 ArrayRefs as arguments, representing start, stop, and step values. diff --git a/datafusion/functions-nested/src/remove.rs b/datafusion/functions-nested/src/remove.rs index e5521706bece..c17ef3965e92 100644 --- a/datafusion/functions-nested/src/remove.rs +++ b/datafusion/functions-nested/src/remove.rs @@ -27,10 +27,11 @@ use arrow_buffer::OffsetBuffer; use arrow_schema::{DataType, Field}; use datafusion_common::cast::as_int64_array; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::{Arc, OnceLock}; @@ -42,6 +43,27 @@ make_udf_expr_and_func!( array_remove_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Removes the first element from the array equal to the given value.", + syntax_example = "array_remove(array, element)", + sql_example = r#"```sql +> select array_remove([1, 2, 2, 3, 2, 1, 4], 2); ++----------------------------------------------+ +| array_remove(List([1,2,2,3,2,1,4]),Int64(2)) | ++----------------------------------------------+ +| [1, 2, 3, 2, 1, 4] | ++----------------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "element", + description = "Element to be removed from the array." + ) +)] #[derive(Debug)] pub(super) struct ArrayRemove { signature: Signature, @@ -87,41 +109,10 @@ impl ScalarUDFImpl for ArrayRemove { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_remove_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_array_remove_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Removes the first element from the array equal to the given value.", - - "array_remove(array, element)") - .with_sql_example( - r#"```sql -> select array_remove([1, 2, 2, 3, 2, 1, 4], 2); -+----------------------------------------------+ -| array_remove(List([1,2,2,3,2,1,4]),Int64(2)) | -+----------------------------------------------+ -| [1, 2, 3, 2, 1, 4] | -+----------------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "element", - "Element to be removed from the array.", - ) - .build() - }) -} - make_udf_expr_and_func!( ArrayRemoveN, array_remove_n, @@ -130,6 +121,28 @@ make_udf_expr_and_func!( array_remove_n_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Removes the first `max` elements from the array equal to the given value.", + syntax_example = "array_remove_n(array, element, max))", + sql_example = r#"```sql +> select array_remove_n([1, 2, 2, 3, 2, 1, 4], 2, 2); ++---------------------------------------------------------+ +| array_remove_n(List([1,2,2,3,2,1,4]),Int64(2),Int64(2)) | ++---------------------------------------------------------+ +| [1, 3, 2, 1, 4] | ++---------------------------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "element", + description = "Element to be removed from the array." + ), + argument(name = "max", description = "Number of first occurrences to remove.") +)] #[derive(Debug)] pub(super) struct ArrayRemoveN { signature: Signature, @@ -175,43 +188,10 @@ impl ScalarUDFImpl for ArrayRemoveN { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_remove_n_doc()) + self.doc() } } -fn get_array_remove_n_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Removes the first `max` elements from the array equal to the given value.", - - "array_remove_n(array, element, max)") - .with_sql_example( - r#"```sql -> select array_remove_n([1, 2, 2, 3, 2, 1, 4], 2, 2); -+---------------------------------------------------------+ -| array_remove_n(List([1,2,2,3,2,1,4]),Int64(2),Int64(2)) | -+---------------------------------------------------------+ -| [1, 3, 2, 1, 4] | -+---------------------------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "element", - "Element to be removed from the array.", - ) - .with_argument( - "max", - "Number of first occurrences to remove.", - ) - .build() - }) -} - make_udf_expr_and_func!( ArrayRemoveAll, array_remove_all, @@ -220,6 +200,27 @@ make_udf_expr_and_func!( array_remove_all_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Removes the first `max` elements from the array equal to the given value.", + syntax_example = "array_remove_all(array, element)", + sql_example = r#"```sql +> select array_remove_all([1, 2, 2, 3, 2, 1, 4], 2); ++--------------------------------------------------+ +| array_remove_all(List([1,2,2,3,2,1,4]),Int64(2)) | ++--------------------------------------------------+ +| [1, 3, 1, 4] | ++--------------------------------------------------+ +```"#, + argument( + name = "array", + description = "Removes all elements from the array equal to the given value." + ), + argument( + name = "element", + description = "Element to be removed from the array." + ) +)] #[derive(Debug)] pub(super) struct ArrayRemoveAll { signature: Signature, @@ -265,39 +266,10 @@ impl ScalarUDFImpl for ArrayRemoveAll { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_remove_all_doc()) + self.doc() } } -fn get_array_remove_all_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Removes all elements from the array equal to the given value.", - - "array_remove_all(array, element)") - .with_sql_example( - r#"```sql -> select array_remove_all([1, 2, 2, 3, 2, 1, 4], 2); -+--------------------------------------------------+ -| array_remove_all(List([1,2,2,3,2,1,4]),Int64(2)) | -+--------------------------------------------------+ -| [1, 3, 1, 4] | -+--------------------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "element", - "Element to be removed from the array.", - ) - .build() - }) -} - /// Array_remove SQL function pub fn array_remove_inner(args: &[ArrayRef]) -> Result { if args.len() != 2 { diff --git a/datafusion/functions-nested/src/repeat.rs b/datafusion/functions-nested/src/repeat.rs index 2842b91a781b..dac8826a6025 100644 --- a/datafusion/functions-nested/src/repeat.rs +++ b/datafusion/functions-nested/src/repeat.rs @@ -29,10 +29,11 @@ use arrow_schema::DataType::{LargeList, List}; use arrow_schema::{DataType, Field}; use datafusion_common::cast::{as_int64_array, as_large_list_array, as_list_array}; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::{Arc, OnceLock}; @@ -43,6 +44,34 @@ make_udf_expr_and_func!( "returns an array containing element `count` times.", // doc array_repeat_udf // internal function name ); + +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns an array containing element `count` times.", + syntax_example = "array_repeat(element, count)", + sql_example = r#"```sql +> select array_repeat(1, 3); ++---------------------------------+ +| array_repeat(Int64(1),Int64(3)) | ++---------------------------------+ +| [1, 1, 1] | ++---------------------------------+ +> select array_repeat([1, 2], 2); ++------------------------------------+ +| array_repeat(List([1,2]),Int64(2)) | ++------------------------------------+ +| [[1, 2], [1, 2]] | ++------------------------------------+ +```"#, + argument( + name = "element", + description = "Element expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "count", + description = "Value of how many times to repeat the element." + ) +)] #[derive(Debug)] pub(super) struct ArrayRepeat { signature: Signature, @@ -91,47 +120,10 @@ impl ScalarUDFImpl for ArrayRepeat { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_repeat_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_array_repeat_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns an array containing element `count` times.", - - "array_repeat(element, count)") - .with_sql_example( - r#"```sql -> select array_repeat(1, 3); -+---------------------------------+ -| array_repeat(Int64(1),Int64(3)) | -+---------------------------------+ -| [1, 1, 1] | -+---------------------------------+ -> select array_repeat([1, 2], 2); -+------------------------------------+ -| array_repeat(List([1,2]),Int64(2)) | -+------------------------------------+ -| [[1, 2], [1, 2]] | -+------------------------------------+ -```"#, - ) - .with_argument( - "element", - "Element expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "count", - "Value of how many times to repeat the element.", - ) - .build() - }) -} - /// Array_repeat SQL function pub fn array_repeat_inner(args: &[ArrayRef]) -> Result { if args.len() != 2 { diff --git a/datafusion/functions-nested/src/replace.rs b/datafusion/functions-nested/src/replace.rs index e971d97dbf2b..0774d14d52a1 100644 --- a/datafusion/functions-nested/src/replace.rs +++ b/datafusion/functions-nested/src/replace.rs @@ -27,10 +27,11 @@ use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer}; use arrow_schema::Field; use datafusion_common::cast::as_int64_array; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use crate::utils::compare_element_to_list; use crate::utils::make_scalar_function; @@ -58,6 +59,25 @@ make_udf_expr_and_func!(ArrayReplaceAll, array_replace_all_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Replaces the first occurrence of the specified element with another specified element.", + syntax_example = "array_replace(array, from, to)", + sql_example = r#"```sql +> select array_replace([1, 2, 2, 3, 2, 1, 4], 2, 5); ++--------------------------------------------------------+ +| array_replace(List([1,2,2,3,2,1,4]),Int64(2),Int64(5)) | ++--------------------------------------------------------+ +| [1, 5, 2, 3, 2, 1, 4] | ++--------------------------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument(name = "from", description = "Initial element."), + argument(name = "to", description = "Final element.") +)] #[derive(Debug)] pub(super) struct ArrayReplace { signature: Signature, @@ -103,21 +123,15 @@ impl ScalarUDFImpl for ArrayReplace { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_replace_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_array_replace_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Replaces the first occurrence of the specified element with another specified element.", - - "array_replace(array, from, to)") - .with_sql_example( - r#"```sql +#[user_doc( + doc_section(label = "Array Functions"), + description = "Replaces the first occurrence of the specified element with another specified element.", + syntax_example = "array_replace(array, from, to)", + sql_example = r#"```sql > select array_replace([1, 2, 2, 3, 2, 1, 4], 2, 5); +--------------------------------------------------------+ | array_replace(List([1,2,2,3,2,1,4]),Int64(2),Int64(5)) | @@ -125,23 +139,14 @@ fn get_array_replace_doc() -> &'static Documentation { | [1, 5, 2, 3, 2, 1, 4] | +--------------------------------------------------------+ ```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "from", - "Initial element.", - ) - .with_argument( - "to", - "Final element.", - ) - .build() - }) -} - + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument(name = "from", description = "Initial element."), + argument(name = "to", description = "Final element."), + argument(name = "max", description = "Number of first occurrences to replace.") +)] #[derive(Debug)] pub(super) struct ArrayReplaceN { signature: Signature, @@ -187,47 +192,29 @@ impl ScalarUDFImpl for ArrayReplaceN { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_replace_n_doc()) + self.doc() } } -fn get_array_replace_n_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Replaces the first `max` occurrences of the specified element with another specified element.", - - "array_replace_n(array, from, to, max)") - .with_sql_example( - r#"```sql -> select array_replace_n([1, 2, 2, 3, 2, 1, 4], 2, 5, 2); -+-------------------------------------------------------------------+ -| array_replace_n(List([1,2,2,3,2,1,4]),Int64(2),Int64(5),Int64(2)) | -+-------------------------------------------------------------------+ -| [1, 5, 5, 3, 2, 1, 4] | -+-------------------------------------------------------------------+ +#[user_doc( + doc_section(label = "Array Functions"), + description = "Replaces all occurrences of the specified element with another specified element.", + syntax_example = "array_replace_all(array, from, to)", + sql_example = r#"```sql +> select array_replace_all([1, 2, 2, 3, 2, 1, 4], 2, 5); ++------------------------------------------------------------+ +| array_replace_all(List([1,2,2,3,2,1,4]),Int64(2),Int64(5)) | ++------------------------------------------------------------+ +| [1, 5, 5, 3, 5, 1, 4] | ++------------------------------------------------------------+ ```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "from", - "Initial element.", - ) - .with_argument( - "to", - "Final element.", - ) - .with_argument( - "max", - "Number of first occurrences to replace.", - ) - .build() - }) -} - + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument(name = "from", description = "Initial element."), + argument(name = "to", description = "Final element.") +)] #[derive(Debug)] pub(super) struct ArrayReplaceAll { signature: Signature, @@ -273,43 +260,10 @@ impl ScalarUDFImpl for ArrayReplaceAll { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_replace_all_doc()) + self.doc() } } -fn get_array_replace_all_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Replaces all occurrences of the specified element with another specified element.", - - "array_replace_all(array, from, to)") - .with_sql_example( - r#"```sql -> select array_replace_all([1, 2, 2, 3, 2, 1, 4], 2, 5); -+------------------------------------------------------------+ -| array_replace_all(List([1,2,2,3,2,1,4]),Int64(2),Int64(5)) | -+------------------------------------------------------------+ -| [1, 5, 5, 3, 5, 1, 4] | -+------------------------------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "from", - "Initial element.", - ) - .with_argument( - "to", - "Final element.", - ) - .build() - }) -} - /// For each element of `list_array[i]`, replaces up to `arr_n[i]` occurrences /// of `from_array[i]`, `to_array[i]`. /// diff --git a/datafusion/functions-nested/src/resize.rs b/datafusion/functions-nested/src/resize.rs index c9487dd81843..1653a5bd202d 100644 --- a/datafusion/functions-nested/src/resize.rs +++ b/datafusion/functions-nested/src/resize.rs @@ -27,10 +27,11 @@ use arrow_schema::DataType::{FixedSizeList, LargeList, List}; use arrow_schema::{DataType, FieldRef}; use datafusion_common::cast::{as_int64_array, as_large_list_array, as_list_array}; use datafusion_common::{exec_err, internal_datafusion_err, Result, ScalarValue}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::{Arc, OnceLock}; @@ -42,6 +43,28 @@ make_udf_expr_and_func!( array_resize_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Resizes the list to contain size elements. Initializes new elements with value or empty if value is not set.", + syntax_example = "array_resize(array, size, value)", + sql_example = r#"```sql +> select array_resize([1, 2, 3], 5, 0); ++-------------------------------------+ +| array_resize(List([1,2,3],5,0)) | ++-------------------------------------+ +| [1, 2, 3, 0, 0] | ++-------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument(name = "size", description = "New size of given array."), + argument( + name = "value", + description = "Defines new elements' value or empty if value is not set." + ) +)] #[derive(Debug)] pub(super) struct ArrayResize { signature: Signature, @@ -93,45 +116,10 @@ impl ScalarUDFImpl for ArrayResize { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_resize_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_array_resize_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Resizes the list to contain size elements. Initializes new elements with value or empty if value is not set.", - - "array_resize(array, size, value)") - .with_sql_example( - r#"```sql -> select array_resize([1, 2, 3], 5, 0); -+-------------------------------------+ -| array_resize(List([1,2,3],5,0)) | -+-------------------------------------+ -| [1, 2, 3, 0, 0] | -+-------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "size", - "New size of given array.", - ) - .with_argument( - "value", - "Defines new elements' value or empty if value is not set.", - ) - .build() - }) -} - /// array_resize SQL function pub(crate) fn array_resize_inner(arg: &[ArrayRef]) -> Result { if arg.len() < 2 || arg.len() > 3 { diff --git a/datafusion/functions-nested/src/reverse.rs b/datafusion/functions-nested/src/reverse.rs index aa898268d10b..f225d3b08fd4 100644 --- a/datafusion/functions-nested/src/reverse.rs +++ b/datafusion/functions-nested/src/reverse.rs @@ -25,10 +25,11 @@ use arrow_schema::DataType::{LargeList, List, Null}; use arrow_schema::{DataType, FieldRef}; use datafusion_common::cast::{as_large_list_array, as_list_array}; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::{Arc, OnceLock}; @@ -40,6 +41,23 @@ make_udf_expr_and_func!( array_reverse_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns the array with the order of the elements reversed.", + syntax_example = "array_reverse(array)", + sql_example = r#"```sql +> select array_reverse([1, 2, 3, 4]); ++------------------------------------------------------------+ +| array_reverse(List([1, 2, 3, 4])) | ++------------------------------------------------------------+ +| [4, 3, 2, 1] | ++------------------------------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub(super) struct ArrayReverse { signature: Signature, @@ -85,37 +103,10 @@ impl ScalarUDFImpl for ArrayReverse { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_reverse_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_array_reverse_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns the array with the order of the elements reversed.", - - "array_reverse(array)") - .with_sql_example( - r#"```sql -> select array_reverse([1, 2, 3, 4]); -+------------------------------------------------------------+ -| array_reverse(List([1, 2, 3, 4])) | -+------------------------------------------------------------+ -| [4, 3, 2, 1] | -+------------------------------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - /// array_reverse SQL function pub fn array_reverse_inner(arg: &[ArrayRef]) -> Result { if arg.len() != 1 { diff --git a/datafusion/functions-nested/src/set_ops.rs b/datafusion/functions-nested/src/set_ops.rs index 642fd9ad54cd..2b90ed20164a 100644 --- a/datafusion/functions-nested/src/set_ops.rs +++ b/datafusion/functions-nested/src/set_ops.rs @@ -27,10 +27,11 @@ use arrow::row::{RowConverter, SortField}; use arrow_schema::DataType::{FixedSizeList, LargeList, List, Null}; use datafusion_common::cast::{as_large_list_array, as_list_array}; use datafusion_common::{exec_err, internal_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use itertools::Itertools; use std::any::Any; use std::collections::HashSet; @@ -62,6 +63,33 @@ make_udf_expr_and_func!( array_distinct_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns an array of elements that are present in both arrays (all elements from both arrays) with out duplicates.", + syntax_example = "array_union(array1, array2)", + sql_example = r#"```sql +> select array_union([1, 2, 3, 4], [5, 6, 3, 4]); ++----------------------------------------------------+ +| array_union([1, 2, 3, 4], [5, 6, 3, 4]); | ++----------------------------------------------------+ +| [1, 2, 3, 4, 5, 6] | ++----------------------------------------------------+ +> select array_union([1, 2, 3, 4], [5, 6, 7, 8]); ++----------------------------------------------------+ +| array_union([1, 2, 3, 4], [5, 6, 7, 8]); | ++----------------------------------------------------+ +| [1, 2, 3, 4, 5, 6, 7, 8] | ++----------------------------------------------------+ +```"#, + argument( + name = "array1", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "array2", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub(super) struct ArrayUnion { signature: Signature, @@ -111,47 +139,37 @@ impl ScalarUDFImpl for ArrayUnion { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_union_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_array_union_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns an array of elements that are present in both arrays (all elements from both arrays) with out duplicates.", - - "array_union(array1, array2)") - .with_sql_example( - r#"```sql -> select array_union([1, 2, 3, 4], [5, 6, 3, 4]); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns an array of elements in the intersection of array1 and array2.", + syntax_example = "array_intersect(array1, array2)", + sql_example = r#"```sql +> select array_intersect([1, 2, 3, 4], [5, 6, 3, 4]); +----------------------------------------------------+ -| array_union([1, 2, 3, 4], [5, 6, 3, 4]); | +| array_intersect([1, 2, 3, 4], [5, 6, 3, 4]); | +----------------------------------------------------+ -| [1, 2, 3, 4, 5, 6] | +| [3, 4] | +----------------------------------------------------+ -> select array_union([1, 2, 3, 4], [5, 6, 7, 8]); +> select array_intersect([1, 2, 3, 4], [5, 6, 7, 8]); +----------------------------------------------------+ -| array_union([1, 2, 3, 4], [5, 6, 7, 8]); | +| array_intersect([1, 2, 3, 4], [5, 6, 7, 8]); | +----------------------------------------------------+ -| [1, 2, 3, 4, 5, 6, 7, 8] | +| [] | +----------------------------------------------------+ ```"#, - ) - .with_argument( - "array1", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "array2", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - + argument( + name = "array1", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "array2", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub(super) struct ArrayIntersect { signature: Signature, @@ -201,45 +219,27 @@ impl ScalarUDFImpl for ArrayIntersect { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_intersect_doc()) + self.doc() } } -fn get_array_intersect_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns an array of elements in the intersection of array1 and array2.", - - "array_intersect(array1, array2)") - .with_sql_example( - r#"```sql -> select array_intersect([1, 2, 3, 4], [5, 6, 3, 4]); -+----------------------------------------------------+ -| array_intersect([1, 2, 3, 4], [5, 6, 3, 4]); | -+----------------------------------------------------+ -| [3, 4] | -+----------------------------------------------------+ -> select array_intersect([1, 2, 3, 4], [5, 6, 7, 8]); -+----------------------------------------------------+ -| array_intersect([1, 2, 3, 4], [5, 6, 7, 8]); | -+----------------------------------------------------+ -| [] | -+----------------------------------------------------+ +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns distinct values from the array after removing duplicates.", + syntax_example = "array_distinct(array)", + sql_example = r#"```sql +> select array_distinct([1, 3, 2, 3, 1, 2, 4]); ++---------------------------------+ +| array_distinct(List([1,2,3,4])) | ++---------------------------------+ +| [1, 2, 3, 4] | ++---------------------------------+ ```"#, - ) - .with_argument( - "array1", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "array2", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] #[derive(Debug)] pub(super) struct ArrayDistinct { signature: Signature, @@ -296,35 +296,10 @@ impl ScalarUDFImpl for ArrayDistinct { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_distinct_doc()) + self.doc() } } -fn get_array_distinct_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Returns distinct values from the array after removing duplicates.", - - "array_distinct(array)") - .with_sql_example( - r#"```sql -> select array_distinct([1, 3, 2, 3, 1, 2, 4]); -+---------------------------------+ -| array_distinct(List([1,2,3,4])) | -+---------------------------------+ -| [1, 2, 3, 4] | -+---------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .build() - }) -} - /// array_distinct SQL function /// example: from list [1, 3, 2, 3, 1, 2, 4] to [1, 2, 3, 4] fn array_distinct_inner(args: &[ArrayRef]) -> Result { diff --git a/datafusion/functions-nested/src/sort.rs b/datafusion/functions-nested/src/sort.rs index 043fedd89bf8..df823769ccd8 100644 --- a/datafusion/functions-nested/src/sort.rs +++ b/datafusion/functions-nested/src/sort.rs @@ -25,10 +25,11 @@ use arrow_schema::DataType::{FixedSizeList, LargeList, List}; use arrow_schema::{DataType, Field, SortOptions}; use datafusion_common::cast::{as_list_array, as_string_array}; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::{Arc, OnceLock}; @@ -40,6 +41,31 @@ make_udf_expr_and_func!( array_sort_udf ); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Sort array.", + syntax_example = "array_sort(array, desc, nulls_first)", + sql_example = r#"```sql +> select array_sort([3, 1, 2]); ++-----------------------------+ +| array_sort(List([3,1,2])) | ++-----------------------------+ +| [1, 2, 3] | ++-----------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument( + name = "desc", + description = "Whether to sort in descending order(`ASC` or `DESC" + ), + argument( + name = "null_first", + description = "Whether to sort nulls first(`NULLS FIRST` or `NULLS LAST`)." + ) +)] #[derive(Debug)] pub(super) struct ArraySort { signature: Signature, @@ -96,45 +122,10 @@ impl ScalarUDFImpl for ArraySort { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_sort_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_array_sort_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Sort array.", - - "array_sort(array, desc, nulls_first)") - .with_sql_example( - r#"```sql -> select array_sort([3, 1, 2]); -+-----------------------------+ -| array_sort(List([3,1,2])) | -+-----------------------------+ -| [1, 2, 3] | -+-----------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "desc", - "Whether to sort in descending order(`ASC` or `DESC`).", - ) - .with_argument( - "nulls_first", - "Whether to sort nulls first(`NULLS FIRST` or `NULLS LAST`).", - ) - .build() - }) -} - /// Array_sort SQL function pub fn array_sort_inner(args: &[ArrayRef]) -> Result { if args.is_empty() || args.len() > 3 { diff --git a/datafusion/functions-nested/src/string.rs b/datafusion/functions-nested/src/string.rs index 143a3d06a32a..10c58ddc70b2 100644 --- a/datafusion/functions-nested/src/string.rs +++ b/datafusion/functions-nested/src/string.rs @@ -40,11 +40,12 @@ use arrow_schema::DataType::{ }; use datafusion_common::cast::{as_large_list_array, as_list_array}; use datafusion_common::exec_err; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; use datafusion_functions::strings::StringArrayType; +use datafusion_macros::user_doc; use std::sync::{Arc, OnceLock}; macro_rules! call_array_function { @@ -118,6 +119,29 @@ make_udf_expr_and_func!( "converts each element to its text representation.", // doc array_to_string_udf // internal function name ); + +#[user_doc( + doc_section(label = "Array Functions"), + description = "Converts each element to its text representation.", + syntax_example = "array_to_string(array, delimiter[, null_string])", + sql_example = r#"```sql +> select array_to_string([[1, 2, 3, 4], [5, 6, 7, 8]], ','); ++----------------------------------------------------+ +| array_to_string(List([1,2,3,4,5,6,7,8]),Utf8(",")) | ++----------------------------------------------------+ +| 1,2,3,4,5,6,7,8 | ++----------------------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ), + argument(name = "delimiter", description = "Array element separator."), + argument( + name = "null_string", + description = "Optional. String to replace null values in the array. If not provided, nulls will be handled by default behavior." + ) +)] #[derive(Debug)] pub(super) struct ArrayToString { signature: Signature, @@ -172,45 +196,10 @@ impl ScalarUDFImpl for ArrayToString { } fn documentation(&self) -> Option<&Documentation> { - Some(get_array_to_string_doc()) + self.doc() } } -static DOCUMENTATION_ARRAY_TO_STRING: OnceLock = OnceLock::new(); - -fn get_array_to_string_doc() -> &'static Documentation { - DOCUMENTATION_ARRAY_TO_STRING.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Converts each element to its text representation.", - - "array_to_string(array, delimiter[, null_string])") - .with_sql_example( - r#"```sql -> select array_to_string([[1, 2, 3, 4], [5, 6, 7, 8]], ','); -+----------------------------------------------------+ -| array_to_string(List([1,2,3,4,5,6,7,8]),Utf8(",")) | -+----------------------------------------------------+ -| 1,2,3,4,5,6,7,8 | -+----------------------------------------------------+ -```"#, - ) - .with_argument( - "array", - "Array expression. Can be a constant, column, or function, and any combination of array operators.", - ) - .with_argument( - "delimiter", - "Array element separator.", - ) - .with_argument( - "null_string", - "Optional. String to replace null values in the array. If not provided, nulls will be handled by default behavior.", - ) - .build() - }) -} - make_udf_expr_and_func!( StringToArray, string_to_array, @@ -218,6 +207,32 @@ make_udf_expr_and_func!( "splits a `string` based on a `delimiter` and returns an array of parts. Any parts matching the optional `null_string` will be replaced with `NULL`", // doc string_to_array_udf // internal function name ); + +#[user_doc( + doc_section(label = "Array Functions"), + description = "Splits a string into an array of substrings based on a delimiter. Any substrings matching the optional `null_str` argument are replaced with NULL.", + syntax_example = "string_to_array(str, delimiter[, null_str])", + sql_example = r#"```sql +> select string_to_array('abc##def', '##'); ++-----------------------------------+ +| string_to_array(Utf8('abc##def')) | ++-----------------------------------+ +| ['abc', 'def'] | ++-----------------------------------+ +> select string_to_array('abc def', ' ', 'def'); ++---------------------------------------------+ +| string_to_array(Utf8('abc def'), Utf8(' '), Utf8('def')) | ++---------------------------------------------+ +| ['abc', NULL] | ++---------------------------------------------+ +```"#, + argument(name = "string", description = "String expression to split."), + argument(name = "delimiter", description = "Array element separator."), + argument( + name = "null_string", + description = "Substring values to be replaced with `NULL`." + ) +)] #[derive(Debug)] pub(super) struct StringToArray { signature: Signature, @@ -281,51 +296,10 @@ impl ScalarUDFImpl for StringToArray { } fn documentation(&self) -> Option<&Documentation> { - Some(get_string_to_array_doc()) + self.doc() } } -static DOCUMENTATION_STRING_TO_ARRAY: OnceLock = OnceLock::new(); - -fn get_string_to_array_doc() -> &'static Documentation { - DOCUMENTATION_STRING_TO_ARRAY.get_or_init(|| { - Documentation::builder( - DOC_SECTION_ARRAY, - "Splits a string into an array of substrings based on a delimiter. Any substrings matching the optional `null_str` argument are replaced with NULL.", - - "string_to_array(str, delimiter[, null_str])") - .with_sql_example( - r#"```sql -> select string_to_array('abc##def', '##'); -+-----------------------------------+ -| string_to_array(Utf8('abc##def')) | -+-----------------------------------+ -| ['abc', 'def'] | -+-----------------------------------+ -> select string_to_array('abc def', ' ', 'def'); -+---------------------------------------------+ -| string_to_array(Utf8('abc def'), Utf8(' '), Utf8('def')) | -+---------------------------------------------+ -| ['abc', NULL] | -+---------------------------------------------+ -```"#, - ) - .with_argument( - "str", - "String expression to split.", - ) - .with_argument( - "delimiter", - "Delimiter string to split on.", - ) - .with_argument( - "null_str", - "Substring values to be replaced with `NULL`.", - ) - .build() - }) -} - /// Array_to_string SQL function pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { if args.len() < 2 || args.len() > 3 { diff --git a/datafusion/functions/src/crypto/digest.rs b/datafusion/functions/src/crypto/digest.rs index 0d2d130cdd71..19cf893ae618 100644 --- a/datafusion/functions/src/crypto/digest.rs +++ b/datafusion/functions/src/crypto/digest.rs @@ -19,13 +19,40 @@ use super::basic::{digest, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::Result; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature::*, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::OnceLock; +#[user_doc( + doc_section(label = "Hashing Functions"), + description = "Computes the binary hash of an expression using the specified algorithm.", + syntax_example = "digest(expression, algorithm)", + sql_example = r#"```sql +> select digest('foo', 'sha256'); ++------------------------------------------+ +| digest(Utf8("foo"), Utf8("sha256")) | ++------------------------------------------+ +| | ++------------------------------------------+ +```"#, + standard_argument(name = "expression", prefix = "String"), + argument( + name = "algorithm", + description = "String expression specifying algorithm to use. Must be one of: +- md5 +- sha224 +- sha256 +- sha384 +- sha512 +- blake2s +- blake2b +- blake3" + ) +)] #[derive(Debug)] pub struct DigestFunc { signature: Signature, @@ -78,43 +105,6 @@ impl ScalarUDFImpl for DigestFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_digest_doc()) + self.doc() } } - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_digest_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_HASHING, - "Computes the binary hash of an expression using the specified algorithm.", - "digest(expression, algorithm)", - ) - .with_sql_example( - r#"```sql -> select digest('foo', 'sha256'); -+------------------------------------------+ -| digest(Utf8("foo"), Utf8("sha256")) | -+------------------------------------------+ -| | -+------------------------------------------+ -```"#, - ) - .with_standard_argument("expression", Some("String")) - .with_argument( - "algorithm", - "String expression specifying algorithm to use. Must be one of: - -- md5 -- sha224 -- sha256 -- sha384 -- sha512 -- blake2s -- blake2b -- blake3", - ) - .build() - }) -} diff --git a/datafusion/functions/src/crypto/md5.rs b/datafusion/functions/src/crypto/md5.rs index e6cc59a4a4f7..0925a4291c54 100644 --- a/datafusion/functions/src/crypto/md5.rs +++ b/datafusion/functions/src/crypto/md5.rs @@ -19,13 +19,28 @@ use crate::crypto::basic::md5; use arrow::datatypes::DataType; use datafusion_common::{plan_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::OnceLock; +#[user_doc( + doc_section(label = "Hashing Functions"), + description = "Computes an MD5 128-bit checksum for a string expression", + syntax_example = "md5(expression)", + sql_example = r#"```sql +> select md5('foo'); ++-------------------------------------+ +| md5(Utf8("foo")) | ++-------------------------------------+ +| | ++-------------------------------------+ +```"#, + standard_argument(name = "expression", prefix = "String") +)] #[derive(Debug)] pub struct Md5Func { signature: Signature, @@ -94,30 +109,6 @@ impl ScalarUDFImpl for Md5Func { } fn documentation(&self) -> Option<&Documentation> { - Some(get_md5_doc()) + self.doc() } } - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_md5_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_HASHING, - "Computes an MD5 128-bit checksum for a string expression.", - "md5(expression)", - ) - .with_sql_example( - r#"```sql -> select md5('foo'); -+-------------------------------------+ -| md5(Utf8("foo")) | -+-------------------------------------+ -| | -+-------------------------------------+ -```"#, - ) - .with_standard_argument("expression", Some("String")) - .build() - }) -} diff --git a/datafusion/functions/src/crypto/sha224.rs b/datafusion/functions/src/crypto/sha224.rs index eba22bb3de37..f16355b4d57a 100644 --- a/datafusion/functions/src/crypto/sha224.rs +++ b/datafusion/functions/src/crypto/sha224.rs @@ -19,13 +19,28 @@ use super::basic::{sha224, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::Result; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::OnceLock; +#[user_doc( + doc_section(label = "Hashing Functions"), + description = "Computes the SHA-224 hash of a binary string.", + syntax_example = "sha224(expression", + sql_example = r#"```sql +> select sha224('foo'); ++------------------------------------------+ +| sha224(Utf8("foo")) | ++------------------------------------------+ +| | ++------------------------------------------+ +```"#, + standard_argument(name = "expression", prefix = "String") +)] #[derive(Debug)] pub struct SHA224Func { signature: Signature, @@ -50,30 +65,6 @@ impl SHA224Func { } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_sha224_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_HASHING, - "Computes the SHA-224 hash of a binary string.", - "sha224(expression)", - ) - .with_sql_example( - r#"```sql -> select sha224('foo'); -+------------------------------------------+ -| sha224(Utf8("foo")) | -+------------------------------------------+ -| | -+------------------------------------------+ -```"#, - ) - .with_standard_argument("expression", Some("String")) - .build() - }) -} - impl ScalarUDFImpl for SHA224Func { fn as_any(&self) -> &dyn Any { self @@ -100,6 +91,6 @@ impl ScalarUDFImpl for SHA224Func { } fn documentation(&self) -> Option<&Documentation> { - Some(get_sha224_doc()) + self.doc() } } diff --git a/datafusion/functions/src/crypto/sha256.rs b/datafusion/functions/src/crypto/sha256.rs index 9343fa0af942..ec187ed6b0e8 100644 --- a/datafusion/functions/src/crypto/sha256.rs +++ b/datafusion/functions/src/crypto/sha256.rs @@ -19,13 +19,28 @@ use super::basic::{sha256, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::Result; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::OnceLock; +#[user_doc( + doc_section(label = "Hashing Functions"), + description = "Computes the SHA-256 hash of a binary string.", + syntax_example = "sha256(expression", + sql_example = r#"```sql +> select sha256('foo'); ++--------------------------------------+ +| sha256(Utf8("foo")) | ++--------------------------------------+ +| | ++--------------------------------------+ +```"#, + standard_argument(name = "expression", prefix = "String") +)] #[derive(Debug)] pub struct SHA256Func { signature: Signature, @@ -74,30 +89,6 @@ impl ScalarUDFImpl for SHA256Func { } fn documentation(&self) -> Option<&Documentation> { - Some(get_sha256_doc()) + self.doc() } } - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_sha256_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_HASHING, - "Computes the SHA-256 hash of a binary string.", - "sha256(expression)", - ) - .with_sql_example( - r#"```sql -> select sha256('foo'); -+--------------------------------------+ -| sha256(Utf8("foo")) | -+--------------------------------------+ -| | -+--------------------------------------+ -```"#, - ) - .with_standard_argument("expression", Some("String")) - .build() - }) -} diff --git a/datafusion/functions/src/crypto/sha384.rs b/datafusion/functions/src/crypto/sha384.rs index 495036d02474..c6575a3ad2c8 100644 --- a/datafusion/functions/src/crypto/sha384.rs +++ b/datafusion/functions/src/crypto/sha384.rs @@ -19,13 +19,28 @@ use super::basic::{sha384, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::Result; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::OnceLock; +#[user_doc( + doc_section(label = "Hashing Functions"), + description = "Computes the SHA-384 hash of a binary string.", + syntax_example = "sha384(expression", + sql_example = r#"```sql +> select sha384('foo'); ++--------------------------------------+ +| sha384(Utf8("foo")) | ++--------------------------------------+ +| | ++--------------------------------------+ +```"#, + standard_argument(name = "expression", prefix = "String") +)] #[derive(Debug)] pub struct SHA384Func { signature: Signature, @@ -74,30 +89,6 @@ impl ScalarUDFImpl for SHA384Func { } fn documentation(&self) -> Option<&Documentation> { - Some(get_sha384_doc()) + self.doc() } } - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_sha384_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_HASHING, - "Computes the SHA-384 hash of a binary string.", - "sha384(expression)", - ) - .with_sql_example( - r#"```sql -> select sha384('foo'); -+-----------------------------------------+ -| sha384(Utf8("foo")) | -+-----------------------------------------+ -| | -+-----------------------------------------+ -```"#, - ) - .with_standard_argument("expression", Some("String")) - .build() - }) -} diff --git a/datafusion/functions/src/crypto/sha512.rs b/datafusion/functions/src/crypto/sha512.rs index 7d454ff8da35..3d0c82648cca 100644 --- a/datafusion/functions/src/crypto/sha512.rs +++ b/datafusion/functions/src/crypto/sha512.rs @@ -19,13 +19,28 @@ use super::basic::{sha512, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::Result; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::OnceLock; +#[user_doc( + doc_section(label = "Hashing Functions"), + description = "Computes the SHA-512 hash of a binary string.", + syntax_example = "sha512(expression", + sql_example = r#"```sql +> select sha512('foo'); ++--------------------------------------+ +| sha512(Utf8("foo")) | ++--------------------------------------+ +| | ++--------------------------------------+ +```"#, + standard_argument(name = "expression", prefix = "String") +)] #[derive(Debug)] pub struct SHA512Func { signature: Signature, @@ -74,30 +89,6 @@ impl ScalarUDFImpl for SHA512Func { } fn documentation(&self) -> Option<&Documentation> { - Some(get_sha512_doc()) + self.doc() } } - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_sha512_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_HASHING, - "Computes the SHA-512 hash of a binary string.", - "sha512(expression)", - ) - .with_sql_example( - r#"```sql -> select sha512('foo'); -+-------------------------------------------+ -| sha512(Utf8("foo")) | -+-------------------------------------------+ -| | -+-------------------------------------------+ -```"#, - ) - .with_argument("expression", "String") - .build() - }) -} diff --git a/datafusion/functions/src/datetime/current_date.rs b/datafusion/functions/src/datetime/current_date.rs index 97d97939d329..7e0214e43d19 100644 --- a/datafusion/functions/src/datetime/current_date.rs +++ b/datafusion/functions/src/datetime/current_date.rs @@ -22,13 +22,23 @@ use arrow::datatypes::DataType::Date32; use chrono::{Datelike, NaiveDate}; use datafusion_common::{internal_err, Result, ScalarValue}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME; +use datafusion_doc::DocSection; use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; use datafusion_expr::{ ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::sync::OnceLock; +#[user_doc( + doc_section(label = "Time and Date Functions"), + description = r#" +Returns the current UTC date. + +The `current_date()` return value is determined at query time and will return the same date, no matter when in the query plan the function executes. +"#, + syntax_example = "current_date()" +)] #[derive(Debug)] pub struct CurrentDateFunc { signature: Signature, @@ -105,22 +115,6 @@ impl ScalarUDFImpl for CurrentDateFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_current_date_doc()) + self.doc() } } - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_current_date_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_DATETIME, - r#" -Returns the current UTC date. - -The `current_date()` return value is determined at query time and will return the same date, no matter when in the query plan the function executes. -"#, - "current_date()") - .build() - }) -} diff --git a/datafusion/functions/src/datetime/current_time.rs b/datafusion/functions/src/datetime/current_time.rs index 1cd39e5777ea..9a5c3bd15a5e 100644 --- a/datafusion/functions/src/datetime/current_time.rs +++ b/datafusion/functions/src/datetime/current_time.rs @@ -22,12 +22,22 @@ use std::any::Any; use std::sync::OnceLock; use datafusion_common::{internal_err, Result, ScalarValue}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME; +use datafusion_doc::DocSection; use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; use datafusion_expr::{ ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "Time and Date Functions"), + description = r#" +Returns the current UTC time. + +The `current_time()` return value is determined at query time and will return the same time, no matter when in the query plan the function executes. +"#, + syntax_example = "current_time()" +)] #[derive(Debug)] pub struct CurrentTimeFunc { signature: Signature, @@ -93,22 +103,6 @@ impl ScalarUDFImpl for CurrentTimeFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_current_time_doc()) + self.doc() } } - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_current_time_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_DATETIME, - r#" -Returns the current UTC time. - -The `current_time()` return value is determined at query time and will return the same time, no matter when in the query plan the function executes. -"#, - "current_time()") - .build() - }) -} diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs index 49bf00d5c17b..aa32e4c8cf15 100644 --- a/datafusion/functions/src/datetime/date_bin.rs +++ b/datafusion/functions/src/datetime/date_bin.rs @@ -32,15 +32,72 @@ use arrow::datatypes::{DataType, TimeUnit}; use datafusion_common::cast::as_primitive_array; use datafusion_common::{exec_err, not_impl_err, plan_err, Result, ScalarValue}; +use datafusion_doc::DocSection; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD, }; +use datafusion_macros::user_doc; use chrono::{DateTime, Datelike, Duration, Months, TimeDelta, Utc}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME; +#[user_doc( + doc_section(label = "Time and Date Functions"), + description = r#" +Calculates time intervals and returns the start of the interval nearest to the specified timestamp. Use `date_bin` to downsample time series data by grouping rows into time-based "bins" or "windows" and applying an aggregate or selector function to each window. + +For example, if you "bin" or "window" data into 15 minute intervals, an input timestamp of `2023-01-01T18:18:18Z` will be updated to the start time of the 15 minute bin it is in: `2023-01-01T18:15:00Z`. +"#, + syntax_example = "date_bin(interval, expression, origin-timestamp)", + sql_example = r#"```sql +-- Bin the timestamp into 1 day intervals +> SELECT date_bin(interval '1 day', time) as bin +FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time); ++---------------------+ +| bin | ++---------------------+ +| 2023-01-01T00:00:00 | +| 2023-01-03T00:00:00 | ++---------------------+ +2 row(s) fetched. + +-- Bin the timestamp into 1 day intervals starting at 3AM on 2023-01-01 +> SELECT date_bin(interval '1 day', time, '2023-01-01T03:00:00') as bin +FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time); ++---------------------+ +| bin | ++---------------------+ +| 2023-01-01T03:00:00 | +| 2023-01-03T03:00:00 | ++---------------------+ +2 row(s) fetched. +```"#, + argument(name = "interval", description = "Bin interval"), + argument( + name = "expression", + description = "Time expression to operate on. Can be a constant, column, or function." + ), + argument( + name = "origin-timestamp", + description = "Optional. Starting point used to determine bin boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in UTC). + +The following intervals are supported: + +- nanoseconds +- microseconds +- milliseconds +- seconds +- minutes +- hours +- days +- weeks +- months +- years +- century +" + ) +)] #[derive(Debug)] pub struct DateBinFunc { signature: Signature, @@ -169,68 +226,10 @@ impl ScalarUDFImpl for DateBinFunc { } } fn documentation(&self) -> Option<&Documentation> { - Some(get_date_bin_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_date_bin_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_DATETIME, - r#" -Calculates time intervals and returns the start of the interval nearest to the specified timestamp. Use `date_bin` to downsample time series data by grouping rows into time-based "bins" or "windows" and applying an aggregate or selector function to each window. - -For example, if you "bin" or "window" data into 15 minute intervals, an input timestamp of `2023-01-01T18:18:18Z` will be updated to the start time of the 15 minute bin it is in: `2023-01-01T18:15:00Z`. -"#, - "date_bin(interval, expression, origin-timestamp)") - .with_sql_example(r#"```sql --- Bin the timestamp into 1 day intervals -> SELECT date_bin(interval '1 day', time) as bin -FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time); -+---------------------+ -| bin | -+---------------------+ -| 2023-01-01T00:00:00 | -| 2023-01-03T00:00:00 | -+---------------------+ -2 row(s) fetched. - --- Bin the timestamp into 1 day intervals starting at 3AM on 2023-01-01 -> SELECT date_bin(interval '1 day', time, '2023-01-01T03:00:00') as bin -FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time); -+---------------------+ -| bin | -+---------------------+ -| 2023-01-01T03:00:00 | -| 2023-01-03T03:00:00 | -+---------------------+ -2 row(s) fetched. -``` -"#) - .with_argument("interval", "Bin interval.") - .with_argument("expression", "Time expression to operate on. Can be a constant, column, or function.") - .with_argument("origin-timestamp", "Optional. Starting point used to determine bin boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in UTC). - -The following intervals are supported: - -- nanoseconds -- microseconds -- milliseconds -- seconds -- minutes -- hours -- days -- weeks -- months -- years -- century -") - .build() - }) -} - enum Interval { Nanoseconds(i64), Months(i64), diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs index b8c58a11d999..2cace4a20273 100644 --- a/datafusion/functions/src/datetime/date_part.rs +++ b/datafusion/functions/src/datetime/date_part.rs @@ -36,13 +36,43 @@ use datafusion_common::cast::{ as_timestamp_nanosecond_array, as_timestamp_second_array, }; use datafusion_common::{exec_err, internal_err, ExprSchema, Result, ScalarValue}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME; +use datafusion_doc::DocSection; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD, }; - +use datafusion_macros::user_doc; + +#[user_doc( + doc_section(label = "Time and Date Functions"), + description = "Returns the specified part of the date as an integer.", + syntax_example = "date_part(part, expression)", + argument( + name = "part", + description = "Optional. Starting point used to determine bin boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in UTC). + +The following intervals are supported: + +- nanoseconds +- microseconds +- milliseconds +- seconds +- minutes +- hours +- days +- weeks +- months +- years +- century +" + ), + argument( + name = "expression", + description = "Time expression to operate on. Can be a constant, column, or function." + ), + alternative_syntax = "extract(field FROM source)" +)] #[derive(Debug)] pub struct DatePartFunc { signature: Signature, @@ -233,7 +263,7 @@ impl ScalarUDFImpl for DatePartFunc { &self.aliases } fn documentation(&self) -> Option<&Documentation> { - Some(get_date_part_doc()) + self.doc() } } @@ -249,43 +279,6 @@ fn part_normalization(part: &str) -> &str { .unwrap_or(part) } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_date_part_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_DATETIME, - "Returns the specified part of the date as an integer.", - "date_part(part, expression)") - .with_argument( - "part", - r#"Part of the date to return. The following date parts are supported: - - - year - - quarter (emits value in inclusive range [1, 4] based on which quartile of the year the date is in) - - month - - week (week of the year) - - day (day of the month) - - hour - - minute - - second - - millisecond - - microsecond - - nanosecond - - dow (day of the week) - - doy (day of the year) - - epoch (seconds since Unix epoch) -"#, - ) - .with_argument( - "expression", - "Time expression to operate on. Can be a constant, column, or function.", - ) - .with_alternative_syntax("extract(field FROM source)") - .build() - }) -} - /// Invoke [`date_part`] on an `array` (e.g. Timestamp) and convert the /// result to a total number of seconds, milliseconds, microseconds or /// nanoseconds diff --git a/datafusion/functions/src/datetime/date_trunc.rs b/datafusion/functions/src/datetime/date_trunc.rs index b9f3bbf65973..6690c67b491a 100644 --- a/datafusion/functions/src/datetime/date_trunc.rs +++ b/datafusion/functions/src/datetime/date_trunc.rs @@ -33,17 +33,41 @@ use arrow::datatypes::DataType::{self, Null, Timestamp, Utf8, Utf8View}; use arrow::datatypes::TimeUnit::{self, Microsecond, Millisecond, Nanosecond, Second}; use datafusion_common::cast::as_primitive_array; use datafusion_common::{exec_err, plan_err, DataFusionError, Result, ScalarValue}; +use datafusion_doc::DocSection; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD, }; +use datafusion_macros::user_doc; use chrono::{ DateTime, Datelike, Duration, LocalResult, NaiveDateTime, Offset, TimeDelta, Timelike, }; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME; +#[user_doc( + doc_section(label = "Time and Date Functions"), + description = "Truncates a timestamp value to a specified precision.", + syntax_example = "date_trunc(precision, expression)", + argument( + name = "precision", + description = r#"Time precision to truncate to. The following precisions are supported: + + - year / YEAR + - quarter / QUARTER + - month / MONTH + - week / WEEK + - day / DAY + - hour / HOUR + - minute / MINUTE + - second / SECOND +"# + ), + argument( + name = "expression", + description = "Time expression to operate on. Can be a constant, column, or function." + ) +)] #[derive(Debug)] pub struct DateTruncFunc { signature: Signature, @@ -247,41 +271,10 @@ impl ScalarUDFImpl for DateTruncFunc { } } fn documentation(&self) -> Option<&Documentation> { - Some(get_date_trunc_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_date_trunc_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_DATETIME, - "Truncates a timestamp value to a specified precision.", - "date_trunc(precision, expression)", - ) - .with_argument( - "precision", - r#"Time precision to truncate to. The following precisions are supported: - - - year / YEAR - - quarter / QUARTER - - month / MONTH - - week / WEEK - - day / DAY - - hour / HOUR - - minute / MINUTE - - second / SECOND -"#, - ) - .with_argument( - "expression", - "Time expression to operate on. Can be a constant, column, or function.", - ) - .build() - }) -} - fn _date_trunc_coarse(granularity: &str, value: Option) -> Result> where T: Datelike + Timelike + Sub + Copy, diff --git a/datafusion/functions/src/datetime/from_unixtime.rs b/datafusion/functions/src/datetime/from_unixtime.rs index 374c744915f7..376ea7d14ef0 100644 --- a/datafusion/functions/src/datetime/from_unixtime.rs +++ b/datafusion/functions/src/datetime/from_unixtime.rs @@ -22,12 +22,31 @@ use arrow::datatypes::DataType; use arrow::datatypes::DataType::{Int64, Timestamp, Utf8}; use arrow::datatypes::TimeUnit::Second; use datafusion_common::{exec_err, internal_err, ExprSchema, Result, ScalarValue}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME; +use datafusion_doc::DocSection; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "Time and Date Functions"), + description = "Converts an integer to RFC3339 timestamp format (`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) return the corresponding timestamp.", + syntax_example = "from_unixtime(expression[, timezone])", + sql_example = r#"```sql +> select from_unixtime(1599572549, 'America/New_York'); ++-----------------------------------------------------------+ +| from_unixtime(Int64(1599572549),Utf8("America/New_York")) | ++-----------------------------------------------------------+ +| 2020-09-08T09:42:29-04:00 | ++-----------------------------------------------------------+ +```"#, + standard_argument(name = "expression",), + argument( + name = "timezone", + description = "Optional timezone to use when converting the integer to a timestamp. If not provided, the default timezone is UTC." + ) +)] #[derive(Debug)] pub struct FromUnixtimeFunc { signature: Signature, @@ -125,35 +144,10 @@ impl ScalarUDFImpl for FromUnixtimeFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_from_unixtime_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_from_unixtime_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_DATETIME, - "Converts an integer to RFC3339 timestamp format (`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) return the corresponding timestamp.", - "from_unixtime(expression[, timezone])") - .with_standard_argument("expression", None) - .with_argument( - "timezone", - "Optional timezone to use when converting the integer to a timestamp. If not provided, the default timezone is UTC.", - ) - .with_sql_example(r#"```sql -> select from_unixtime(1599572549, 'America/New_York'); -+-----------------------------------------------------------+ -| from_unixtime(Int64(1599572549),Utf8("America/New_York")) | -+-----------------------------------------------------------+ -| 2020-09-08T09:42:29-04:00 | -+-----------------------------------------------------------+ -```"#) - .build() - }) -} - #[cfg(test)] mod test { use crate::datetime::from_unixtime::FromUnixtimeFunc; diff --git a/datafusion/functions/src/datetime/make_date.rs b/datafusion/functions/src/datetime/make_date.rs index a9340f82f23d..59cbc9e0cc83 100644 --- a/datafusion/functions/src/datetime/make_date.rs +++ b/datafusion/functions/src/datetime/make_date.rs @@ -27,11 +27,46 @@ use arrow::datatypes::DataType::{Date32, Int32, Int64, UInt32, UInt64, Utf8, Utf use chrono::prelude::*; use datafusion_common::{exec_err, Result, ScalarValue}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "Time and Date Functions"), + description = "Make a date from year/month/day component parts.", + syntax_example = "from_unixtime(expression[, timezone])", + sql_example = r#"```sql +> select make_date(2023, 1, 31); ++-------------------------------------------+ +| make_date(Int64(2023),Int64(1),Int64(31)) | ++-------------------------------------------+ +| 2023-01-31 | ++-------------------------------------------+ +> select make_date('2023', '01', '31'); ++-----------------------------------------------+ +| make_date(Utf8("2023"),Utf8("01"),Utf8("31")) | ++-----------------------------------------------+ +| 2023-01-31 | ++-----------------------------------------------+ +``` + +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/make_date.rs) +"#, + argument( + name = "year", + description = "Year to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators." + ), + argument( + name = "month", + description = "Month to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators." + ), + argument( + name = "day", + description = "Day to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators." + ) +)] #[derive(Debug)] pub struct MakeDateFunc { signature: Signature, @@ -156,47 +191,10 @@ impl ScalarUDFImpl for MakeDateFunc { Ok(value) } fn documentation(&self) -> Option<&Documentation> { - Some(get_make_date_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_make_date_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_DATETIME, - "Make a date from year/month/day component parts.", - "make_date(year, month, day)") - .with_argument( - "year", - " Year to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators.", ) - .with_argument( - "month", - "Month to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators.", - ) - .with_argument("day", "Day to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators.") - .with_sql_example(r#"```sql -> select make_date(2023, 1, 31); -+-------------------------------------------+ -| make_date(Int64(2023),Int64(1),Int64(31)) | -+-------------------------------------------+ -| 2023-01-31 | -+-------------------------------------------+ -> select make_date('2023', '01', '31'); -+-----------------------------------------------+ -| make_date(Utf8("2023"),Utf8("01"),Utf8("31")) | -+-----------------------------------------------+ -| 2023-01-31 | -+-----------------------------------------------+ -``` - -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/make_date.rs) -"#) - .build() - }) -} - /// Converts the year/month/day fields to an `i32` representing the days from /// the unix epoch and invokes `date_consumer_fn` with the value fn make_date_inner( diff --git a/datafusion/functions/src/datetime/now.rs b/datafusion/functions/src/datetime/now.rs index 58381473a9ab..b6c37935386f 100644 --- a/datafusion/functions/src/datetime/now.rs +++ b/datafusion/functions/src/datetime/now.rs @@ -22,12 +22,22 @@ use std::any::Any; use std::sync::OnceLock; use datafusion_common::{internal_err, ExprSchema, Result, ScalarValue}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME; +use datafusion_doc::DocSection; use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; use datafusion_expr::{ ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "Time and Date Functions"), + description = r#" +Returns the current UTC timestamp. + +The `now()` return value is determined at query time and will return the same timestamp, no matter when in the query plan the function executes. +"#, + syntax_example = "now()" +)] #[derive(Debug)] pub struct NowFunc { signature: Signature, @@ -93,9 +103,6 @@ impl ScalarUDFImpl for NowFunc { ScalarValue::TimestampNanosecond(now_ts, Some("+00:00".into())), ))) } - fn documentation(&self) -> Option<&Documentation> { - Some(get_to_unixtime_doc()) - } fn aliases(&self) -> &[String] { &self.aliases @@ -104,20 +111,8 @@ impl ScalarUDFImpl for NowFunc { fn is_nullable(&self, _args: &[Expr], _schema: &dyn ExprSchema) -> bool { false } -} -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_to_unixtime_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_DATETIME, - r#" -Returns the current UTC timestamp. - -The `now()` return value is determined at query time and will return the same timestamp, no matter when in the query plan the function executes. -"#, - "now()") - .build() - }) + fn documentation(&self) -> Option<&Documentation> { + self.doc() + } } diff --git a/datafusion/functions/src/datetime/to_char.rs b/datafusion/functions/src/datetime/to_char.rs index 4e3fcd1dc13a..96c0a096e3a9 100644 --- a/datafusion/functions/src/datetime/to_char.rs +++ b/datafusion/functions/src/datetime/to_char.rs @@ -29,12 +29,41 @@ use arrow::error::ArrowError; use arrow::util::display::{ArrayFormatter, DurationFormat, FormatOptions}; use datafusion_common::{exec_err, Result, ScalarValue}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME; +use datafusion_doc::DocSection; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD, }; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "Time and Date Functions"), + description = "Returns a string representation of a date, time, timestamp or duration based on a [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html). Unlike the PostgreSQL equivalent of this function numerical formatting is not supported.", + syntax_example = "to_char(expression, format)", + sql_example = r#"```sql +> select to_char('2023-03-01'::date, '%d-%m-%Y'); ++----------------------------------------------+ +| to_char(Utf8("2023-03-01"),Utf8("%d-%m-%Y")) | ++----------------------------------------------+ +| 01-03-2023 | ++----------------------------------------------+ +``` + +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_char.rs) +"#, + argument( + name = "expression", + description = "Expression to operate on. Can be a constant, column, or function that results in a date, time, timestamp or duration." + ), + argument( + name = "format", + description = "A [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) string to use to convert the expression." + ), + argument( + name = "day", + description = "Day to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators." + ) +)] #[derive(Debug)] pub struct ToCharFunc { signature: Signature, @@ -143,42 +172,10 @@ impl ScalarUDFImpl for ToCharFunc { &self.aliases } fn documentation(&self) -> Option<&Documentation> { - Some(get_to_char_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_to_char_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_DATETIME, - "Returns a string representation of a date, time, timestamp or duration based on a [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html). Unlike the PostgreSQL equivalent of this function numerical formatting is not supported.", - "to_char(expression, format)") - .with_argument( - "expression", - " Expression to operate on. Can be a constant, column, or function that results in a date, time, timestamp or duration." - ) - .with_argument( - "format", - "A [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) string to use to convert the expression.", - ) - .with_argument("day", "Day to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators.") - .with_sql_example(r#"```sql -> select to_char('2023-03-01'::date, '%d-%m-%Y'); -+----------------------------------------------+ -| to_char(Utf8("2023-03-01"),Utf8("%d-%m-%Y")) | -+----------------------------------------------+ -| 01-03-2023 | -+----------------------------------------------+ -``` - -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_char.rs) -"#) - .build() - }) -} - fn _build_format_options<'a>( data_type: &DataType, format: Option<&'a str>, diff --git a/datafusion/functions/src/datetime/to_date.rs b/datafusion/functions/src/datetime/to_date.rs index e2edea843e98..e6e84e945837 100644 --- a/datafusion/functions/src/datetime/to_date.rs +++ b/datafusion/functions/src/datetime/to_date.rs @@ -40,21 +40,23 @@ Returns the corresponding date. Note: `to_date` returns Date32, which represents its values as the number of days since unix epoch(`1970-01-01`) stored as signed 32 bit value. The largest supported date value is `9999-12-31`.", syntax_example = "to_date('2017-05-31', '%Y-%m-%d')", - sql_example = "```sql\n\ -> select to_date('2023-01-31');\n\ -+-----------------------------+\n\ -| to_date(Utf8(\"2023-01-31\")) |\n\ -+-----------------------------+\n\ -| 2023-01-31 |\n\ -+-----------------------------+\n\ -> select to_date('2023/01/31', '%Y-%m-%d', '%Y/%m/%d');\n\ -+---------------------------------------------------------------+\n\ -| to_date(Utf8(\"2023/01/31\"),Utf8(\"%Y-%m-%d\"),Utf8(\"%Y/%m/%d\")) |\n\ -+---------------------------------------------------------------+\n\ -| 2023-01-31 |\n\ -+---------------------------------------------------------------+\n\ -```\n\n\ -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_date.rs)", + sql_example = r#"```sql +> select to_date('2023-01-31'); ++-------------------------------+ +| to_date(Utf8(\"2023-01-31\")) | ++-------------------------------+ +| 2023-01-31 | ++-------------------------------+ +> select to_date('2023/01/31', '%Y-%m-%d', '%Y/%m/%d'); ++---------------------------------------------------------------------+ +| to_date(Utf8(\"2023/01/31\"),Utf8(\"%Y-%m-%d\"),Utf8(\"%Y/%m/%d\")) | ++---------------------------------------------------------------------+ +| 2023-01-31 | ++---------------------------------------------------------------------+ +``` + +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_date.rs) +"#, standard_argument(name = "expression", prefix = "String"), argument( name = "format_n", diff --git a/datafusion/functions/src/datetime/to_local_time.rs b/datafusion/functions/src/datetime/to_local_time.rs index eaa91d1140ba..3db4d622737a 100644 --- a/datafusion/functions/src/datetime/to_local_time.rs +++ b/datafusion/functions/src/datetime/to_local_time.rs @@ -31,14 +31,70 @@ use arrow::datatypes::{ use chrono::{DateTime, MappedLocalTime, Offset, TimeDelta, TimeZone, Utc}; use datafusion_common::cast::as_primitive_array; use datafusion_common::{exec_err, plan_err, DataFusionError, Result, ScalarValue}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; /// A UDF function that converts a timezone-aware timestamp to local time (with no offset or /// timezone information). In other words, this function strips off the timezone from the timestamp, /// while keep the display value of the timestamp the same. +#[user_doc( + doc_section(label = "Time and Date Functions"), + description = "Converts a timestamp with a timezone to a timestamp without a timezone (with no offset or timezone information). This function handles daylight saving time changes.", + syntax_example = "to_local_time(expression)", + sql_example = r#"```sql +> SELECT to_local_time('2024-04-01T00:00:20Z'::timestamp); ++---------------------------------------------+ +| to_local_time(Utf8("2024-04-01T00:00:20Z")) | ++---------------------------------------------+ +| 2024-04-01T00:00:20 | ++---------------------------------------------+ + +> SELECT to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels'); ++---------------------------------------------+ +| to_local_time(Utf8("2024-04-01T00:00:20Z")) | ++---------------------------------------------+ +| 2024-04-01T00:00:20 | ++---------------------------------------------+ + +> SELECT + time, + arrow_typeof(time) as type, + to_local_time(time) as to_local_time, + arrow_typeof(to_local_time(time)) as to_local_time_type +FROM ( + SELECT '2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels' AS time +); ++---------------------------+------------------------------------------------+---------------------+-----------------------------+ +| time | type | to_local_time | to_local_time_type | ++---------------------------+------------------------------------------------+---------------------+-----------------------------+ +| 2024-04-01T00:00:20+02:00 | Timestamp(Nanosecond, Some("Europe/Brussels")) | 2024-04-01T00:00:20 | Timestamp(Nanosecond, None) | ++---------------------------+------------------------------------------------+---------------------+-----------------------------+ + +# combine `to_local_time()` with `date_bin()` to bin on boundaries in the timezone rather +# than UTC boundaries + +> SELECT date_bin(interval '1 day', to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels')) AS date_bin; ++---------------------+ +| date_bin | ++---------------------+ +| 2024-04-01T00:00:00 | ++---------------------+ + +> SELECT date_bin(interval '1 day', to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels')) AT TIME ZONE 'Europe/Brussels' AS date_bin_with_timezone; ++---------------------------+ +| date_bin_with_timezone | ++---------------------------+ +| 2024-04-01T00:00:00+02:00 | ++---------------------------+ +```"#, + argument( + name = "expression", + description = "Time expression to operate on. Can be a constant, column, or function." + ) +)] #[derive(Debug)] pub struct ToLocalTimeFunc { signature: Signature, @@ -359,72 +415,10 @@ impl ScalarUDFImpl for ToLocalTimeFunc { } } fn documentation(&self) -> Option<&Documentation> { - Some(get_to_local_time_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_to_local_time_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_DATETIME, - "Converts a timestamp with a timezone to a timestamp without a timezone (with no offset or timezone information). This function handles daylight saving time changes.", - "to_local_time(expression)") - .with_argument( - "expression", - "Time expression to operate on. Can be a constant, column, or function." - ) - .with_sql_example(r#"```sql -> SELECT to_local_time('2024-04-01T00:00:20Z'::timestamp); -+---------------------------------------------+ -| to_local_time(Utf8("2024-04-01T00:00:20Z")) | -+---------------------------------------------+ -| 2024-04-01T00:00:20 | -+---------------------------------------------+ - -> SELECT to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels'); -+---------------------------------------------+ -| to_local_time(Utf8("2024-04-01T00:00:20Z")) | -+---------------------------------------------+ -| 2024-04-01T00:00:20 | -+---------------------------------------------+ - -> SELECT - time, - arrow_typeof(time) as type, - to_local_time(time) as to_local_time, - arrow_typeof(to_local_time(time)) as to_local_time_type -FROM ( - SELECT '2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels' AS time -); -+---------------------------+------------------------------------------------+---------------------+-----------------------------+ -| time | type | to_local_time | to_local_time_type | -+---------------------------+------------------------------------------------+---------------------+-----------------------------+ -| 2024-04-01T00:00:20+02:00 | Timestamp(Nanosecond, Some("Europe/Brussels")) | 2024-04-01T00:00:20 | Timestamp(Nanosecond, None) | -+---------------------------+------------------------------------------------+---------------------+-----------------------------+ - -# combine `to_local_time()` with `date_bin()` to bin on boundaries in the timezone rather -# than UTC boundaries - -> SELECT date_bin(interval '1 day', to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels')) AS date_bin; -+---------------------+ -| date_bin | -+---------------------+ -| 2024-04-01T00:00:00 | -+---------------------+ - -> SELECT date_bin(interval '1 day', to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels')) AT TIME ZONE 'Europe/Brussels' AS date_bin_with_timezone; -+---------------------------+ -| date_bin_with_timezone | -+---------------------------+ -| 2024-04-01T00:00:00+02:00 | -+---------------------------+ -```"#) - .build() - }) -} - #[cfg(test)] mod tests { use std::sync::Arc; diff --git a/datafusion/functions/src/datetime/to_timestamp.rs b/datafusion/functions/src/datetime/to_timestamp.rs index 430d4a970810..41173f30dd42 100644 --- a/datafusion/functions/src/datetime/to_timestamp.rs +++ b/datafusion/functions/src/datetime/to_timestamp.rs @@ -27,31 +27,181 @@ use arrow::datatypes::{ use crate::datetime::common::*; use datafusion_common::{exec_err, Result, ScalarType}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "Time and Date Functions"), + description = r#" +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp. + +Note: `to_timestamp` returns `Timestamp(Nanosecond)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds. +"#, + syntax_example = "to_timestamp(expression[, ..., format_n])", + sql_example = r#"```sql +> select to_timestamp('2023-01-31T09:26:56.123456789-05:00'); ++-----------------------------------------------------------+ +| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) | ++-----------------------------------------------------------+ +| 2023-01-31T14:26:56.123456789 | ++-----------------------------------------------------------+ +> select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); ++--------------------------------------------------------------------------------------------------------+ +| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | ++--------------------------------------------------------------------------------------------------------+ +| 2023-05-17T03:59:00.123456789 | ++--------------------------------------------------------------------------------------------------------+ +``` +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) +"#, + argument( + name = "expression", + description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators." + ), + argument( + name = "format_n", + description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned." + ) +)] #[derive(Debug)] pub struct ToTimestampFunc { signature: Signature, } +#[user_doc( + doc_section(label = "Time and Date Functions"), + description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.", + syntax_example = "to_timestamp(expression[, ..., format_n])", + sql_example = r#"```sql +> select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00'); ++-------------------------------------------------------------------+ +| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) | ++-------------------------------------------------------------------+ +| 2023-01-31T14:26:56 | ++-------------------------------------------------------------------+ +> select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); ++----------------------------------------------------------------------------------------------------------------+ +| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | ++----------------------------------------------------------------------------------------------------------------+ +| 2023-05-17T03:59:00 | ++----------------------------------------------------------------------------------------------------------------+ +``` +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) +"#, + argument( + name = "expression", + description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators." + ), + argument( + name = "format_n", + description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned." + ) +)] #[derive(Debug)] pub struct ToTimestampSecondsFunc { signature: Signature, } +#[user_doc( + doc_section(label = "Time and Date Functions"), + description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.", + syntax_example = "to_timestamp_millis(expression[, ..., format_n])", + sql_example = r#"```sql +> select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00'); ++------------------------------------------------------------------+ +| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) | ++------------------------------------------------------------------+ +| 2023-01-31T14:26:56.123 | ++------------------------------------------------------------------+ +> select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); ++---------------------------------------------------------------------------------------------------------------+ +| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | ++---------------------------------------------------------------------------------------------------------------+ +| 2023-05-17T03:59:00.123 | ++---------------------------------------------------------------------------------------------------------------+ +``` +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) +"#, + argument( + name = "expression", + description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators." + ), + argument( + name = "format_n", + description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned." + ) +)] #[derive(Debug)] pub struct ToTimestampMillisFunc { signature: Signature, } +#[user_doc( + doc_section(label = "Time and Date Functions"), + description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`) Returns the corresponding timestamp.", + syntax_example = "to_timestamp_micros(expression[, ..., format_n])", + sql_example = r#"```sql +> select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00'); ++------------------------------------------------------------------+ +| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) | ++------------------------------------------------------------------+ +| 2023-01-31T14:26:56.123456 | ++------------------------------------------------------------------+ +> select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); ++---------------------------------------------------------------------------------------------------------------+ +| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | ++---------------------------------------------------------------------------------------------------------------+ +| 2023-05-17T03:59:00.123456 | ++---------------------------------------------------------------------------------------------------------------+ +``` +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) +"#, + argument( + name = "expression", + description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators." + ), + argument( + name = "format_n", + description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned." + ) +)] #[derive(Debug)] pub struct ToTimestampMicrosFunc { signature: Signature, } +#[user_doc( + doc_section(label = "Time and Date Functions"), + description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.", + syntax_example = "to_timestamp_nanos(expression[, ..., format_n])", + sql_example = r#"```sql +> select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00'); ++-----------------------------------------------------------------+ +| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) | ++-----------------------------------------------------------------+ +| 2023-01-31T14:26:56.123456789 | ++-----------------------------------------------------------------+ +> select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); ++--------------------------------------------------------------------------------------------------------------+ +| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | ++--------------------------------------------------------------------------------------------------------------+ +| 2023-05-17T03:59:00.123456789 | ++---------------------------------------------------------------------------------------------------------------+ +``` +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) +"#, + argument( + name = "expression", + description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators." + ), + argument( + name = "format_n", + description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned." + ) +)] #[derive(Debug)] pub struct ToTimestampNanosFunc { signature: Signature, @@ -189,50 +339,10 @@ impl ScalarUDFImpl for ToTimestampFunc { } } fn documentation(&self) -> Option<&Documentation> { - Some(get_to_timestamp_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_to_timestamp_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_DATETIME, - r#" -Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp. - -Note: `to_timestamp` returns `Timestamp(Nanosecond)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds. -"#, - "to_timestamp(expression[, ..., format_n])") - .with_argument( - "expression", - "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators." - ) - .with_argument( - "format_n", - "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.", - ) - .with_sql_example(r#"```sql -> select to_timestamp('2023-01-31T09:26:56.123456789-05:00'); -+-----------------------------------------------------------+ -| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) | -+-----------------------------------------------------------+ -| 2023-01-31T14:26:56.123456789 | -+-----------------------------------------------------------+ -> select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); -+--------------------------------------------------------------------------------------------------------+ -| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | -+--------------------------------------------------------------------------------------------------------+ -| 2023-05-17T03:59:00.123456789 | -+--------------------------------------------------------------------------------------------------------+ -``` -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) -"#) - .build() - }) -} - impl ScalarUDFImpl for ToTimestampSecondsFunc { fn as_any(&self) -> &dyn Any { self @@ -284,46 +394,10 @@ impl ScalarUDFImpl for ToTimestampSecondsFunc { } } fn documentation(&self) -> Option<&Documentation> { - Some(get_to_timestamp_seconds_doc()) + self.doc() } } -static TO_TIMESTAMP_SECONDS_DOC: OnceLock = OnceLock::new(); - -fn get_to_timestamp_seconds_doc() -> &'static Documentation { - TO_TIMESTAMP_SECONDS_DOC.get_or_init(|| { - Documentation::builder( - DOC_SECTION_DATETIME, - "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.", - "to_timestamp_seconds(expression[, ..., format_n])") - .with_argument( - "expression", - "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators." - ) - .with_argument( - "format_n", - "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.", - ) - .with_sql_example(r#"```sql -> select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00'); -+-------------------------------------------------------------------+ -| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) | -+-------------------------------------------------------------------+ -| 2023-01-31T14:26:56 | -+-------------------------------------------------------------------+ -> select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); -+----------------------------------------------------------------------------------------------------------------+ -| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | -+----------------------------------------------------------------------------------------------------------------+ -| 2023-05-17T03:59:00 | -+----------------------------------------------------------------------------------------------------------------+ -``` -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) -"#) - .build() - }) -} - impl ScalarUDFImpl for ToTimestampMillisFunc { fn as_any(&self) -> &dyn Any { self @@ -377,46 +451,10 @@ impl ScalarUDFImpl for ToTimestampMillisFunc { } } fn documentation(&self) -> Option<&Documentation> { - Some(get_to_timestamp_millis_doc()) + self.doc() } } -static TO_TIMESTAMP_MILLIS_DOC: OnceLock = OnceLock::new(); - -fn get_to_timestamp_millis_doc() -> &'static Documentation { - TO_TIMESTAMP_MILLIS_DOC.get_or_init(|| { - Documentation::builder( - DOC_SECTION_DATETIME, - "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.", - "to_timestamp_millis(expression[, ..., format_n])") - .with_argument( - "expression", - "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators." - ) - .with_argument( - "format_n", - "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.", - ) - .with_sql_example(r#"```sql -> select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00'); -+------------------------------------------------------------------+ -| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) | -+------------------------------------------------------------------+ -| 2023-01-31T14:26:56.123 | -+------------------------------------------------------------------+ -> select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); -+---------------------------------------------------------------------------------------------------------------+ -| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | -+---------------------------------------------------------------------------------------------------------------+ -| 2023-05-17T03:59:00.123 | -+---------------------------------------------------------------------------------------------------------------+ -``` -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) -"#) - .build() - }) -} - impl ScalarUDFImpl for ToTimestampMicrosFunc { fn as_any(&self) -> &dyn Any { self @@ -470,46 +508,10 @@ impl ScalarUDFImpl for ToTimestampMicrosFunc { } } fn documentation(&self) -> Option<&Documentation> { - Some(get_to_timestamp_micros_doc()) + self.doc() } } -static TO_TIMESTAMP_MICROS_DOC: OnceLock = OnceLock::new(); - -fn get_to_timestamp_micros_doc() -> &'static Documentation { - TO_TIMESTAMP_MICROS_DOC.get_or_init(|| { - Documentation::builder( - DOC_SECTION_DATETIME, - "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`) Returns the corresponding timestamp.", - "to_timestamp_micros(expression[, ..., format_n])") - .with_argument( - "expression", - "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators." - ) - .with_argument( - "format_n", - "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.", - ) - .with_sql_example(r#"```sql -> select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00'); -+------------------------------------------------------------------+ -| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) | -+------------------------------------------------------------------+ -| 2023-01-31T14:26:56.123456 | -+------------------------------------------------------------------+ -> select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); -+---------------------------------------------------------------------------------------------------------------+ -| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | -+---------------------------------------------------------------------------------------------------------------+ -| 2023-05-17T03:59:00.123456 | -+---------------------------------------------------------------------------------------------------------------+ -``` -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) -"#) - .build() - }) -} - impl ScalarUDFImpl for ToTimestampNanosFunc { fn as_any(&self) -> &dyn Any { self @@ -563,46 +565,10 @@ impl ScalarUDFImpl for ToTimestampNanosFunc { } } fn documentation(&self) -> Option<&Documentation> { - Some(get_to_timestamp_nanos_doc()) + self.doc() } } -static TO_TIMESTAMP_NANOS_DOC: OnceLock = OnceLock::new(); - -fn get_to_timestamp_nanos_doc() -> &'static Documentation { - TO_TIMESTAMP_NANOS_DOC.get_or_init(|| { - Documentation::builder( - DOC_SECTION_DATETIME, - "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.", - "to_timestamp_nanos(expression[, ..., format_n])") - .with_argument( - "expression", - "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators." - ) - .with_argument( - "format_n", - "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.", - ) - .with_sql_example(r#"```sql -> select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00'); -+-----------------------------------------------------------------+ -| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) | -+-----------------------------------------------------------------+ -| 2023-01-31T14:26:56.123456789 | -+-----------------------------------------------------------------+ -> select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); -+--------------------------------------------------------------------------------------------------------------+ -| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | -+--------------------------------------------------------------------------------------------------------------+ -| 2023-05-17T03:59:00.123456789 | -+---------------------------------------------------------------------------------------------------------------+ -``` -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) -"#) - .build() - }) -} - /// Returns the return type for the to_timestamp_* function, preserving /// the timezone if it exists. fn return_type_for(arg: &DataType, unit: TimeUnit) -> DataType { diff --git a/datafusion/functions/src/datetime/to_unixtime.rs b/datafusion/functions/src/datetime/to_unixtime.rs index 9e6453a597c3..9ce3fa531e32 100644 --- a/datafusion/functions/src/datetime/to_unixtime.rs +++ b/datafusion/functions/src/datetime/to_unixtime.rs @@ -19,13 +19,44 @@ use super::to_timestamp::ToTimestampSecondsFunc; use crate::datetime::common::*; use arrow::datatypes::{DataType, TimeUnit}; use datafusion_common::{exec_err, Result}; +use datafusion_doc::DocSection; use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::OnceLock; +#[user_doc( + doc_section(label = "Time and Date Functions"), + description = "Converts a value to seconds since the unix epoch (`1970-01-01T00:00:00Z`). Supports strings, dates, timestamps and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided.", + syntax_example = "to_unixtime(expression[, ..., format_n])", + sql_example = r#" +```sql +> select to_unixtime('2020-09-08T12:00:00+00:00'); ++------------------------------------------------+ +| to_unixtime(Utf8("2020-09-08T12:00:00+00:00")) | ++------------------------------------------------+ +| 1599566400 | ++------------------------------------------------+ +> select to_unixtime('01-14-2023 01:01:30+05:30', '%q', '%d-%m-%Y %H/%M/%S', '%+', '%m-%d-%Y %H:%M:%S%#z'); ++-----------------------------------------------------------------------------------------------------------------------------+ +| to_unixtime(Utf8("01-14-2023 01:01:30+05:30"),Utf8("%q"),Utf8("%d-%m-%Y %H/%M/%S"),Utf8("%+"),Utf8("%m-%d-%Y %H:%M:%S%#z")) | ++-----------------------------------------------------------------------------------------------------------------------------+ +| 1673638290 | ++-----------------------------------------------------------------------------------------------------------------------------+ +``` +"#, + argument( + name = "expression", + description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators." + ), + argument( + name = "format_n", + description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned." + ) +)] #[derive(Debug)] pub struct ToUnixtimeFunc { signature: Signature, @@ -93,40 +124,6 @@ impl ScalarUDFImpl for ToUnixtimeFunc { } } fn documentation(&self) -> Option<&Documentation> { - Some(get_to_unixtime_doc()) + self.doc() } } - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_to_unixtime_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_DATETIME, - "Converts a value to seconds since the unix epoch (`1970-01-01T00:00:00Z`). Supports strings, dates, timestamps and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided.", - "to_unixtime(expression[, ..., format_n])") - .with_argument( - "expression", - "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators." - ).with_argument( - "format_n", - "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.") - .with_sql_example(r#" -```sql -> select to_unixtime('2020-09-08T12:00:00+00:00'); -+------------------------------------------------+ -| to_unixtime(Utf8("2020-09-08T12:00:00+00:00")) | -+------------------------------------------------+ -| 1599566400 | -+------------------------------------------------+ -> select to_unixtime('01-14-2023 01:01:30+05:30', '%q', '%d-%m-%Y %H/%M/%S', '%+', '%m-%d-%Y %H:%M:%S%#z'); -+-----------------------------------------------------------------------------------------------------------------------------+ -| to_unixtime(Utf8("01-14-2023 01:01:30+05:30"),Utf8("%q"),Utf8("%d-%m-%Y %H/%M/%S"),Utf8("%+"),Utf8("%m-%d-%Y %H:%M:%S%#z")) | -+-----------------------------------------------------------------------------------------------------------------------------+ -| 1673638290 | -+-----------------------------------------------------------------------------------------------------------------------------+ -``` -"#) - .build() - }) -} diff --git a/datafusion/functions/src/encoding/inner.rs b/datafusion/functions/src/encoding/inner.rs index 42d2ff98c39d..3f88aa80a14d 100644 --- a/datafusion/functions/src/encoding/inner.rs +++ b/datafusion/functions/src/encoding/inner.rs @@ -31,14 +31,29 @@ use datafusion_common::{ }; use datafusion_common::{exec_err, ScalarValue}; use datafusion_common::{DataFusionError, Result}; +use datafusion_doc::DocSection; use datafusion_expr::{ColumnarValue, Documentation}; use std::sync::{Arc, OnceLock}; use std::{fmt, str::FromStr}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_BINARY_STRING; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use datafusion_macros::user_doc; use std::any::Any; +#[user_doc( + doc_section(label = "Binary String Functions"), + description = "Encode binary data into a textual representation.", + syntax_example = "encode(expression, format)", + argument( + name = "expression", + description = "Expression containing string or binary data" + ), + argument( + name = "format", + description = "Supported formats are: `base64`, `hex`" + ), + related_udf(name = "decode") +)] #[derive(Debug)] pub struct EncodeFunc { signature: Signature, @@ -58,22 +73,6 @@ impl EncodeFunc { } } -static ENCODE_DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_encode_doc() -> &'static Documentation { - ENCODE_DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_BINARY_STRING, - "Encode binary data into a textual representation.", - "encode(expression, format)", - ) - .with_argument("expression", "Expression containing string or binary data") - .with_argument("format", "Supported formats are: `base64`, `hex`") - .with_related_udf("decode") - .build() - }) -} - impl ScalarUDFImpl for EncodeFunc { fn as_any(&self) -> &dyn Any { self @@ -126,10 +125,21 @@ impl ScalarUDFImpl for EncodeFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_encode_doc()) + self.doc() } } +#[user_doc( + doc_section(label = "Binary String Functions"), + description = "Decode binary data from textual representation in string.", + syntax_example = "decode(e xpression, format)", + argument( + name = "expression", + description = "Expression containing string or binary data" + ), + argument(name = "format", description = "Same arguments as [encode](#encode)"), + related_udf(name = "encode") +)] #[derive(Debug)] pub struct DecodeFunc { signature: Signature, @@ -149,22 +159,6 @@ impl DecodeFunc { } } -static DECODE_DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_decode_doc() -> &'static Documentation { - DECODE_DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_BINARY_STRING, - "Decode binary data from textual representation in string.", - "decode(expression, format)", - ) - .with_argument("expression", "Expression containing encoded string data") - .with_argument("format", "Same arguments as [encode](#encode)") - .with_related_udf("encode") - .build() - }) -} - impl ScalarUDFImpl for DecodeFunc { fn as_any(&self) -> &dyn Any { self @@ -217,7 +211,7 @@ impl ScalarUDFImpl for DecodeFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_decode_doc()) + self.doc() } } diff --git a/datafusion/functions/src/math/cot.rs b/datafusion/functions/src/math/cot.rs index 2355696a8be7..8c2a780eb54f 100644 --- a/datafusion/functions/src/math/cot.rs +++ b/datafusion/functions/src/math/cot.rs @@ -24,10 +24,17 @@ use arrow::datatypes::{DataType, Float32Type, Float64Type}; use crate::utils::make_scalar_function; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH; +use datafusion_doc::DocSection; use datafusion_expr::{ColumnarValue, Documentation}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; - +use datafusion_macros::user_doc; + +#[user_doc( + doc_section(label = "Math Functions"), + description = "Returns the cotangent of a number.", + syntax_example = r#"cot(numeric_expression)"#, + standard_argument(name = "numeric_expression", prefix = "Numeric") +)] #[derive(Debug)] pub struct CotFunc { signature: Signature, @@ -39,20 +46,6 @@ impl Default for CotFunc { } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_cot_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_MATH, - "Returns the cotangent of a number.", - r#"cot(numeric_expression)"#, - ) - .with_standard_argument("numeric_expression", Some("Numeric")) - .build() - }) -} - impl CotFunc { pub fn new() -> Self { use DataType::*; @@ -92,7 +85,7 @@ impl ScalarUDFImpl for CotFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_cot_doc()) + self.doc() } fn invoke_batch( diff --git a/datafusion/functions/src/math/factorial.rs b/datafusion/functions/src/math/factorial.rs index 083936eb185a..f5852bcc248d 100644 --- a/datafusion/functions/src/math/factorial.rs +++ b/datafusion/functions/src/math/factorial.rs @@ -27,11 +27,18 @@ use arrow::datatypes::DataType::Int64; use crate::utils::make_scalar_function; use datafusion_common::{arrow_datafusion_err, exec_err, DataFusionError, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; - +use datafusion_macros::user_doc; + +#[user_doc( + doc_section(label = "Math Functions"), + description = "Factorial. Returns 1 if value is less than 2.", + syntax_example = "factorial(numeric_expression)", + standard_argument(name = "numeric_expression", prefix = "Numeric") +)] #[derive(Debug)] pub struct FactorialFunc { signature: Signature, @@ -77,24 +84,10 @@ impl ScalarUDFImpl for FactorialFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_factorial_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_factorial_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_MATH, - "Factorial. Returns 1 if value is less than 2.", - "factorial(numeric_expression)", - ) - .with_standard_argument("numeric_expression", Some("Numeric")) - .build() - }) -} - /// Factorial SQL function fn factorial(args: &[ArrayRef]) -> Result { match args[0].data_type() { diff --git a/datafusion/functions/src/math/gcd.rs b/datafusion/functions/src/math/gcd.rs index f4119cd975ab..9c1c486f2871 100644 --- a/datafusion/functions/src/math/gcd.rs +++ b/datafusion/functions/src/math/gcd.rs @@ -26,11 +26,19 @@ use arrow::datatypes::DataType::Int64; use crate::utils::make_scalar_function; use datafusion_common::{arrow_datafusion_err, exec_err, DataFusionError, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; - +use datafusion_macros::user_doc; + +#[user_doc( + doc_section(label = "Math Functions"), + description = "Returns the greatest common divisor of `expression_x` and `expression_y`. Returns 0 if both inputs are zero..", + syntax_example = "gcd(expression_x, expression_y)", + standard_argument(name = "expression_x", prefix = "First numeric"), + standard_argument(name = "expression_y", prefix = "Second numeric") +)] #[derive(Debug)] pub struct GcdFunc { signature: Signature, @@ -77,25 +85,10 @@ impl ScalarUDFImpl for GcdFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_gcd_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_gcd_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_MATH, - "Returns the greatest common divisor of `expression_x` and `expression_y`. Returns 0 if both inputs are zero.", - - "gcd(expression_x, expression_y)") - .with_standard_argument("expression_x", Some("First numeric")) - .with_standard_argument("expression_y", Some("Second numeric")) - .build() - }) -} - /// Gcd SQL function fn gcd(args: &[ArrayRef]) -> Result { match args[0].data_type() { diff --git a/datafusion/functions/src/math/iszero.rs b/datafusion/functions/src/math/iszero.rs index c1498ae36222..642751d398ae 100644 --- a/datafusion/functions/src/math/iszero.rs +++ b/datafusion/functions/src/math/iszero.rs @@ -23,14 +23,21 @@ use arrow::datatypes::DataType::{Boolean, Float32, Float64}; use arrow::datatypes::{DataType, Float32Type, Float64Type}; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH; +use datafusion_doc::DocSection; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use crate::utils::make_scalar_function; +#[user_doc( + doc_section(label = "Math Functions"), + description = "Returns true if a given number is +0.0 or -0.0 otherwise returns false.", + syntax_example = "iszero(numeric_expression)", + standard_argument(name = "numeric_expression", prefix = "Numeric") +)] #[derive(Debug)] pub struct IsZeroFunc { signature: Signature, @@ -80,24 +87,10 @@ impl ScalarUDFImpl for IsZeroFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_iszero_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_iszero_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_MATH, - "Returns true if a given number is +0.0 or -0.0 otherwise returns false.", - "iszero(numeric_expression)", - ) - .with_standard_argument("numeric_expression", Some("Numeric")) - .build() - }) -} - /// Iszero SQL function pub fn iszero(args: &[ArrayRef]) -> Result { match args[0].data_type() { diff --git a/datafusion/functions/src/math/lcm.rs b/datafusion/functions/src/math/lcm.rs index 4e5a9b64f6f5..9b0b43ad4502 100644 --- a/datafusion/functions/src/math/lcm.rs +++ b/datafusion/functions/src/math/lcm.rs @@ -24,14 +24,22 @@ use arrow::datatypes::DataType::Int64; use arrow::error::ArrowError; use datafusion_common::{arrow_datafusion_err, exec_err, DataFusionError, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use super::gcd::unsigned_gcd; use crate::utils::make_scalar_function; +#[user_doc( + doc_section(label = "Math Functions"), + description = "Returns the least common multiple of `expression_x` and `expression_y`. Returns 0 if either input is zero.", + syntax_example = "lcm(expression_x, expression_y)", + standard_argument(name = "expression_x", prefix = "First numeric"), + standard_argument(name = "expression_y", prefix = "Second numeric") +)] #[derive(Debug)] pub struct LcmFunc { signature: Signature, @@ -78,25 +86,10 @@ impl ScalarUDFImpl for LcmFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_lcm_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_lcm_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_MATH, - "Returns the least common multiple of `expression_x` and `expression_y`. Returns 0 if either input is zero.", - - "lcm(expression_x, expression_y)") - .with_standard_argument("expression_x", Some("First numeric")) - .with_standard_argument("expression_y", Some("Second numeric")) - .build() - }) -} - /// Lcm SQL function fn lcm(args: &[ArrayRef]) -> Result { let compute_lcm = |x: i64, y: i64| { diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs index d4bb8ec13b0b..6ec6d0fa3d16 100644 --- a/datafusion/functions/src/math/log.rs +++ b/datafusion/functions/src/math/log.rs @@ -27,15 +27,24 @@ use arrow::datatypes::{DataType, Float32Type, Float64Type}; use datafusion_common::{ exec_err, internal_err, plan_datafusion_err, plan_err, Result, ScalarValue, }; +use datafusion_doc::DocSection; use datafusion_expr::expr::ScalarFunction; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH; use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; use datafusion_expr::{ lit, ColumnarValue, Documentation, Expr, ScalarUDF, TypeSignature::*, }; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; - +use datafusion_macros::user_doc; + +#[user_doc( + doc_section(label = "Math Functions"), + description = "Returns the base-x logarithm of a number. Can either provide a specified base, or if omitted then takes the base-10 of a number.", + syntax_example = r#"log(base, numeric_expression) +log(numeric_expression)"#, + standard_argument(name = "base", prefix = "Base numeric"), + standard_argument(name = "numeric_expression", prefix = "Numeric") +)] #[derive(Debug)] pub struct LogFunc { signature: Signature, @@ -47,21 +56,6 @@ impl Default for LogFunc { } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_log_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_MATH, - "Returns the base-x logarithm of a number. Can either provide a specified base, or if omitted then takes the base-10 of a number.", - r#"log(base, numeric_expression) -log(numeric_expression)"#) - .with_standard_argument("base", Some("Base numeric")) - .with_standard_argument("numeric_expression", Some("Numeric")) - .build() - }) -} - impl LogFunc { pub fn new() -> Self { use DataType::*; @@ -189,7 +183,7 @@ impl ScalarUDFImpl for LogFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_log_doc()) + self.doc() } /// Simplify the `log` function by the relevant rules: diff --git a/datafusion/functions/src/math/nans.rs b/datafusion/functions/src/math/nans.rs index 4cfbf0494812..b5b94183403d 100644 --- a/datafusion/functions/src/math/nans.rs +++ b/datafusion/functions/src/math/nans.rs @@ -19,14 +19,21 @@ use arrow::datatypes::{DataType, Float32Type, Float64Type}; use datafusion_common::{exec_err, Result}; +use datafusion_doc::DocSection; use datafusion_expr::{ColumnarValue, TypeSignature}; use arrow::array::{ArrayRef, AsArray, BooleanArray}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH; use datafusion_expr::{Documentation, ScalarUDFImpl, Signature, Volatility}; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::{Arc, OnceLock}; +#[user_doc( + doc_section(label = "Math Functions"), + description = "Returns true if a given number is +NaN or -NaN otherwise returns false.", + syntax_example = "isnan(numeric_expression)", + standard_argument(name = "numeric_expression", prefix = "Numeric") +)] #[derive(Debug)] pub struct IsNanFunc { signature: Signature, @@ -97,20 +104,6 @@ impl ScalarUDFImpl for IsNanFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_isnan_doc()) + self.doc() } } - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_isnan_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_MATH, - "Returns true if a given number is +NaN or -NaN otherwise returns false.", - "isnan(numeric_expression)", - ) - .with_standard_argument("numeric_expression", Some("Numeric")) - .build() - }) -} diff --git a/datafusion/functions/src/math/nanvl.rs b/datafusion/functions/src/math/nanvl.rs index 0715dc7f7eac..44b2db9622dc 100644 --- a/datafusion/functions/src/math/nanvl.rs +++ b/datafusion/functions/src/math/nanvl.rs @@ -24,12 +24,27 @@ use arrow::array::{ArrayRef, AsArray, Float32Array, Float64Array}; use arrow::datatypes::DataType::{Float32, Float64}; use arrow::datatypes::{DataType, Float32Type, Float64Type}; use datafusion_common::{exec_err, DataFusionError, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH; +use datafusion_doc::DocSection; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "Math Functions"), + description = r#"Returns the first argument if it's not _NaN_. +Returns the second argument otherwise."#, + syntax_example = "nanvl(expression_x, expression_y)", + argument( + name = "expression_x", + description = "Numeric expression to return if it's not _NaN_. Can be a constant, column, or function, and any combination of arithmetic operators." + ), + argument( + name = "expression_y", + description = "Numeric expression to return if the first expression is _NaN_. Can be a constant, column, or function, and any combination of arithmetic operators." + ) +)] #[derive(Debug)] pub struct NanvlFunc { signature: Signature, @@ -82,26 +97,10 @@ impl ScalarUDFImpl for NanvlFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_nanvl_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_nanvl_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_MATH, - r#"Returns the first argument if it's not _NaN_. -Returns the second argument otherwise."#, - - "nanvl(expression_x, expression_y)") - .with_argument("expression_x", "Numeric expression to return if it's not _NaN_. Can be a constant, column, or function, and any combination of arithmetic operators.") - .with_argument("expression_y", "Numeric expression to return if the first expression is _NaN_. Can be a constant, column, or function, and any combination of arithmetic operators.") - .build() - }) -} - /// Nanvl SQL function fn nanvl(args: &[ArrayRef]) -> Result { match args[0].data_type() { diff --git a/datafusion/functions/src/math/pi.rs b/datafusion/functions/src/math/pi.rs index a96ca176622d..5fba38fd0ba1 100644 --- a/datafusion/functions/src/math/pi.rs +++ b/datafusion/functions/src/math/pi.rs @@ -21,12 +21,19 @@ use std::sync::OnceLock; use arrow::datatypes::DataType; use arrow::datatypes::DataType::Float64; use datafusion_common::{internal_err, Result, ScalarValue}; +use datafusion_doc::DocSection; use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "Math Functions"), + description = "Returns an approximate value of π.", + syntax_example = "pi()" +)] #[derive(Debug)] pub struct PiFunc { signature: Signature, @@ -82,19 +89,6 @@ impl ScalarUDFImpl for PiFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_pi_doc()) + self.doc() } } - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_pi_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_MATH, - "Returns an approximate value of π.", - "pi()", - ) - .build() - }) -} diff --git a/datafusion/functions/src/math/power.rs b/datafusion/functions/src/math/power.rs index 92dd8966b66c..a47ead87d876 100644 --- a/datafusion/functions/src/math/power.rs +++ b/datafusion/functions/src/math/power.rs @@ -27,12 +27,20 @@ use datafusion_common::{ arrow_datafusion_err, exec_datafusion_err, exec_err, plan_datafusion_err, DataFusionError, Result, ScalarValue, }; +use datafusion_doc::DocSection; use datafusion_expr::expr::ScalarFunction; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH; use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; use datafusion_expr::{ColumnarValue, Documentation, Expr, ScalarUDF, TypeSignature}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; - +use datafusion_macros::user_doc; + +#[user_doc( + doc_section(label = "Math Functions"), + description = "Returns a base expression raised to the power of an exponent.", + syntax_example = "power(base, exponent)", + standard_argument(name = "base", prefix = "Numeric"), + standard_argument(name = "exponent", prefix = "Exponent numeric") +)] #[derive(Debug)] pub struct PowerFunc { signature: Signature, @@ -170,25 +178,10 @@ impl ScalarUDFImpl for PowerFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_power_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_power_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_MATH, - "Returns a base expression raised to the power of an exponent.", - "power(base, exponent)", - ) - .with_standard_argument("base", Some("Numeric")) - .with_standard_argument("exponent", Some("Exponent numeric")) - .build() - }) -} - /// Return true if this function call is a call to `Log` fn is_log(func: &ScalarUDF) -> bool { func.inner().as_any().downcast_ref::().is_some() diff --git a/datafusion/functions/src/math/random.rs b/datafusion/functions/src/math/random.rs index e34db023ed9a..708a25237a69 100644 --- a/datafusion/functions/src/math/random.rs +++ b/datafusion/functions/src/math/random.rs @@ -24,10 +24,17 @@ use arrow::datatypes::DataType::Float64; use rand::{thread_rng, Rng}; use datafusion_common::{internal_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH; +use datafusion_doc::DocSection; use datafusion_expr::ColumnarValue; use datafusion_expr::{Documentation, ScalarUDFImpl, Signature, Volatility}; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "Math Functions"), + description = r#"Returns a random float value in the range [0, 1). +The random seed is unique to each row."#, + syntax_example = "random()" +)] #[derive(Debug)] pub struct RandomFunc { signature: Signature, @@ -82,20 +89,6 @@ impl ScalarUDFImpl for RandomFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_random_doc()) + self.doc() } } - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_random_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_MATH, - r#"Returns a random float value in the range [0, 1). -The random seed is unique to each row."#, - "random()", - ) - .build() - }) -} diff --git a/datafusion/functions/src/math/round.rs b/datafusion/functions/src/math/round.rs index cfbf083fcb1e..86dd84dc9760 100644 --- a/datafusion/functions/src/math/round.rs +++ b/datafusion/functions/src/math/round.rs @@ -25,13 +25,24 @@ use arrow::compute::{cast_with_options, CastOptions}; use arrow::datatypes::DataType::{Float32, Float64, Int32}; use arrow::datatypes::{DataType, Float32Type, Float64Type, Int32Type}; use datafusion_common::{exec_datafusion_err, exec_err, Result, ScalarValue}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH; +use datafusion_doc::DocSection; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; - +use datafusion_macros::user_doc; + +#[user_doc( + doc_section(label = "Math Functions"), + description = "Rounds a number to the nearest integer.", + syntax_example = "round(numeric_expression[, decimal_places])", + standard_argument(name = "numeric_expression", prefix = "Numeric"), + argument( + name = "decimal_places", + description = "Optional. The number of decimal places to round to. Defaults to 0." + ) +)] #[derive(Debug)] pub struct RoundFunc { signature: Signature, @@ -104,28 +115,10 @@ impl ScalarUDFImpl for RoundFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_round_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_round_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_MATH, - "Rounds a number to the nearest integer.", - "round(numeric_expression[, decimal_places])", - ) - .with_standard_argument("numeric_expression", Some("Numeric")) - .with_argument( - "decimal_places", - "Optional. The number of decimal places to round to. Defaults to 0.", - ) - .build() - }) -} - /// Round SQL function pub fn round(args: &[ArrayRef]) -> Result { if args.len() != 1 && args.len() != 2 { diff --git a/datafusion/functions/src/math/signum.rs b/datafusion/functions/src/math/signum.rs index eda9df49fbac..005d1dc0ed39 100644 --- a/datafusion/functions/src/math/signum.rs +++ b/datafusion/functions/src/math/signum.rs @@ -23,14 +23,23 @@ use arrow::datatypes::DataType::{Float32, Float64}; use arrow::datatypes::{DataType, Float32Type, Float64Type}; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH; +use datafusion_doc::DocSection; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use crate::utils::make_scalar_function; +#[user_doc( + doc_section(label = "Math Functions"), + description = r#"Returns the sign of a number. +Negative numbers return `-1`. +Zero and positive numbers return `1`."#, + syntax_example = "signum(numeric_expression)", + standard_argument(name = "numeric_expression", prefix = "Numeric") +)] #[derive(Debug)] pub struct SignumFunc { signature: Signature, @@ -89,26 +98,10 @@ impl ScalarUDFImpl for SignumFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_signum_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_signum_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_MATH, - r#"Returns the sign of a number. -Negative numbers return `-1`. -Zero and positive numbers return `1`."#, - "signum(numeric_expression)", - ) - .with_standard_argument("numeric_expression", Some("Numeric")) - .build() - }) -} - /// signum SQL function pub fn signum(args: &[ArrayRef]) -> Result { match args[0].data_type() { diff --git a/datafusion/functions/src/math/trunc.rs b/datafusion/functions/src/math/trunc.rs index c2787c4577d0..b4331958262d 100644 --- a/datafusion/functions/src/math/trunc.rs +++ b/datafusion/functions/src/math/trunc.rs @@ -25,13 +25,28 @@ use arrow::datatypes::DataType::{Float32, Float64}; use arrow::datatypes::{DataType, Float32Type, Float64Type, Int64Type}; use datafusion_common::ScalarValue::Int64; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH; +use datafusion_doc::DocSection; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; - +use datafusion_macros::user_doc; + +#[user_doc( + doc_section(label = "Math Functions"), + description = "Truncates a number to a whole number or truncated to the specified decimal places", + syntax_example = "trunc(numeric_expression[, decimal_places])", + standard_argument(name = "numeric_expression", prefix = "Numeric"), + argument( + name = "decimal_places", + description = r#"Optional. The number of decimal places to + truncate to. Defaults to 0 (truncate to a whole number). If + `decimal_places` is a positive integer, truncates digits to the + right of the decimal point. If `decimal_places` is a negative + integer, replaces digits to the left of the decimal point with `0`."# + ) +)] #[derive(Debug)] pub struct TruncFunc { signature: Signature, @@ -109,29 +124,10 @@ impl ScalarUDFImpl for TruncFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_trunc_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_trunc_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_MATH, - "Truncates a number to a whole number or truncated to the specified decimal places.", - - "trunc(numeric_expression[, decimal_places])") - .with_standard_argument("numeric_expression", Some("Numeric")) - .with_argument("decimal_places", r#"Optional. The number of decimal places to - truncate to. Defaults to 0 (truncate to a whole number). If - `decimal_places` is a positive integer, truncates digits to the - right of the decimal point. If `decimal_places` is a negative - integer, replaces digits to the left of the decimal point with `0`."#) - .build() - }) -} - /// Truncate(numeric, decimalPrecision) and trunc(numeric) SQL function fn trunc(args: &[ArrayRef]) -> Result { if args.len() != 1 && args.len() != 2 { diff --git a/datafusion/functions/src/regex/regexpcount.rs b/datafusion/functions/src/regex/regexpcount.rs index 8f06c75b2fe9..1c5c92e802e3 100644 --- a/datafusion/functions/src/regex/regexpcount.rs +++ b/datafusion/functions/src/regex/regexpcount.rs @@ -23,17 +23,46 @@ use arrow::datatypes::{ }; use arrow::error::ArrowError; use datafusion_common::{exec_err, internal_err, Result, ScalarValue}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_REGEX; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature::Exact, TypeSignature::Uniform, Volatility, }; +use datafusion_macros::user_doc; use itertools::izip; use regex::Regex; use std::collections::hash_map::Entry; use std::collections::HashMap; use std::sync::{Arc, OnceLock}; +#[user_doc( + doc_section(label = "Regular Expression Functions"), + description = "Returns the number of matches that a [regular expression](https://docs.rs/regex/latest/regex/#syntax) has in a string.", + syntax_example = "regexp_count(str, regexp[, start, flags])", + sql_example = r#"```sql +> select regexp_count('abcAbAbc', 'abc', 2, 'i'); ++---------------------------------------------------------------+ +| regexp_count(Utf8("abcAbAbc"),Utf8("abc"),Int64(2),Utf8("i")) | ++---------------------------------------------------------------+ +| 1 | ++---------------------------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + standard_argument(name = "Regexp", prefix = "Regular"), + argument( + name = "start", + description = "- **start**: Optional start position (the first position is 1) to search for the regular expression. Can be a constant, column, or function." + ), + argument( + name = "flags", + description = r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: + - **i**: case-insensitive: letters match both upper and lower case + - **m**: multi-line mode: ^ and $ match begin/end of line + - **s**: allow . to match \n + - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used + - **U**: swap the meaning of x* and x*?"# + ) +)] #[derive(Debug)] pub struct RegexpCountFunc { signature: Signature, @@ -111,40 +140,10 @@ impl ScalarUDFImpl for RegexpCountFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_regexp_count_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_regexp_count_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_REGEX, - "Returns the number of matches that a [regular expression](https://docs.rs/regex/latest/regex/#syntax) has in a string.", - "regexp_count(str, regexp[, start, flags])") - .with_sql_example(r#"```sql -> select regexp_count('abcAbAbc', 'abc', 2, 'i'); -+---------------------------------------------------------------+ -| regexp_count(Utf8("abcAbAbc"),Utf8("abc"),Int64(2),Utf8("i")) | -+---------------------------------------------------------------+ -| 1 | -+---------------------------------------------------------------+ -```"#) - .with_standard_argument("str", Some("String")) - .with_standard_argument("regexp",Some("Regular")) - .with_argument("start", "- **start**: Optional start position (the first position is 1) to search for the regular expression. Can be a constant, column, or function.") - .with_argument("flags", - r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: - - **i**: case-insensitive: letters match both upper and lower case - - **m**: multi-line mode: ^ and $ match begin/end of line - - **s**: allow . to match \n - - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used - - **U**: swap the meaning of x* and x*?"#) - .build() - }) -} - pub fn regexp_count_func(args: &[ArrayRef]) -> Result { let args_len = args.len(); if !(2..=4).contains(&args_len) { diff --git a/datafusion/functions/src/regex/regexplike.rs b/datafusion/functions/src/regex/regexplike.rs index 49e57776c7b8..60e8bfc8a0fd 100644 --- a/datafusion/functions/src/regex/regexplike.rs +++ b/datafusion/functions/src/regex/regexplike.rs @@ -25,30 +25,19 @@ use datafusion_common::exec_err; use datafusion_common::ScalarValue; use datafusion_common::{arrow_datafusion_err, plan_err}; use datafusion_common::{internal_err, DataFusionError, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_REGEX; +use datafusion_doc::DocSection; use datafusion_expr::{ColumnarValue, Documentation, TypeSignature}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::{Arc, OnceLock}; -#[derive(Debug)] -pub struct RegexpLikeFunc { - signature: Signature, -} - -impl Default for RegexpLikeFunc { - fn default() -> Self { - Self::new() - } -} - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_regexp_like_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder(DOC_SECTION_REGEX,"Returns true if a [regular expression](https://docs.rs/regex/latest/regex/#syntax) has at least one match in a string, false otherwise.","regexp_like(str, regexp[, flags])") - .with_sql_example(r#"```sql +#[user_doc( + doc_section(label = "Regular Expression Functions"), + description = "Returns true if a [regular expression](https://docs.rs/regex/latest/regex/#syntax) has at least one match in a string, false otherwise.", + syntax_example = "regexp_like(str, regexp[, flags])", + sql_example = r#"```sql select regexp_like('Köln', '[a-zA-Z]ö[a-zA-Z]{2}'); +--------------------------------------------------------+ | regexp_like(Utf8("Köln"),Utf8("[a-zA-Z]ö[a-zA-Z]{2}")) | @@ -63,18 +52,32 @@ SELECT regexp_like('aBc', '(b|d)', 'i'); +--------------------------------------------------+ ``` Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) -"#) - .with_standard_argument("str", Some("String")) - .with_standard_argument("regexp", Some("Regular")) - .with_argument("flags", - r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: +"#, + standard_argument(name = "str", prefix = "String"), + standard_argument(name = "Regexp", prefix = "Regular"), + argument( + name = "start", + description = "- **start**: Optional start position (the first position is 1) to search for the regular expression. Can be a constant, column, or function." + ), + argument( + name = "flags", + description = r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: - **i**: case-insensitive: letters match both upper and lower case - **m**: multi-line mode: ^ and $ match begin/end of line - **s**: allow . to match \n - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used - - **U**: swap the meaning of x* and x*?"#) - .build() - }) + - **U**: swap the meaning of x* and x*?"# + ) +)] +#[derive(Debug)] +pub struct RegexpLikeFunc { + signature: Signature, +} + +impl Default for RegexpLikeFunc { + fn default() -> Self { + Self::new() + } } impl RegexpLikeFunc { @@ -161,7 +164,7 @@ impl ScalarUDFImpl for RegexpLikeFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_regexp_like_doc()) + self.doc() } } diff --git a/datafusion/functions/src/regex/regexpmatch.rs b/datafusion/functions/src/regex/regexpmatch.rs index 8362ef2f406c..4349129dd6a0 100644 --- a/datafusion/functions/src/regex/regexpmatch.rs +++ b/datafusion/functions/src/regex/regexpmatch.rs @@ -26,12 +26,48 @@ use datafusion_common::{arrow_datafusion_err, plan_err}; use datafusion_common::{ cast::as_generic_string_array, internal_err, DataFusionError, Result, }; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_REGEX; +use datafusion_doc::DocSection; use datafusion_expr::{ColumnarValue, Documentation, TypeSignature}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::{Arc, OnceLock}; +#[user_doc( + doc_section(label = "Regular Expression Functions"), + description = "Returns the first [regular expression](https://docs.rs/regex/latest/regex/#syntax) matches in a string.", + syntax_example = "regexp_match(str, regexp[, flags])", + sql_example = r#"```sql +> select regexp_match('Köln', '[a-zA-Z]ö[a-zA-Z]{2}'); ++---------------------------------------------------------+ +| regexp_match(Utf8("Köln"),Utf8("[a-zA-Z]ö[a-zA-Z]{2}")) | ++---------------------------------------------------------+ +| [Köln] | ++---------------------------------------------------------+ +SELECT regexp_match('aBc', '(b|d)', 'i'); ++---------------------------------------------------+ +| regexp_match(Utf8("aBc"),Utf8("(b|d)"),Utf8("i")) | ++---------------------------------------------------+ +| [B] | ++---------------------------------------------------+ +``` +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) +"#, + standard_argument(name = "str", prefix = "String"), + argument( + name = "regexp", + description = "Regular expression to match against. Can be a constant, column, or function." + ), + argument( + name = "flags", + description = r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: + - **i**: case-insensitive: letters match both upper and lower case + - **m**: multi-line mode: ^ and $ match begin/end of line + - **s**: allow . to match \n + - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used + - **U**: swap the meaning of x* and x*?"# + ) +)] #[derive(Debug)] pub struct RegexpMatchFunc { signature: Signature, @@ -113,48 +149,10 @@ impl ScalarUDFImpl for RegexpMatchFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_regexp_match_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_regexp_match_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_REGEX, - "Returns the first [regular expression](https://docs.rs/regex/latest/regex/#syntax) matches in a string.", - "regexp_match(str, regexp[, flags])") - .with_sql_example(r#"```sql - > select regexp_match('Köln', '[a-zA-Z]ö[a-zA-Z]{2}'); - +---------------------------------------------------------+ - | regexp_match(Utf8("Köln"),Utf8("[a-zA-Z]ö[a-zA-Z]{2}")) | - +---------------------------------------------------------+ - | [Köln] | - +---------------------------------------------------------+ - SELECT regexp_match('aBc', '(b|d)', 'i'); - +---------------------------------------------------+ - | regexp_match(Utf8("aBc"),Utf8("(b|d)"),Utf8("i")) | - +---------------------------------------------------+ - | [B] | - +---------------------------------------------------+ -``` -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) -"#) - .with_standard_argument("str", Some("String")) - .with_argument("regexp","Regular expression to match against. - Can be a constant, column, or function.") - .with_argument("flags", - r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: - - **i**: case-insensitive: letters match both upper and lower case - - **m**: multi-line mode: ^ and $ match begin/end of line - - **s**: allow . to match \n - - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used - - **U**: swap the meaning of x* and x*?"#) - .build() - }) -} - fn regexp_match_func(args: &[ArrayRef]) -> Result { match args[0].data_type() { DataType::Utf8 => regexp_match::(args), diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs index 9b4a7b04552b..66ebed78797c 100644 --- a/datafusion/functions/src/regex/regexpreplace.rs +++ b/datafusion/functions/src/regex/regexpreplace.rs @@ -31,16 +31,53 @@ use datafusion_common::ScalarValue; use datafusion_common::{ cast::as_generic_string_array, internal_err, DataFusionError, Result, }; +use datafusion_doc::DocSection; use datafusion_expr::function::Hint; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_REGEX; use datafusion_expr::ColumnarValue; use datafusion_expr::TypeSignature; use datafusion_expr::{Documentation, ScalarUDFImpl, Signature, Volatility}; +use datafusion_macros::user_doc; use regex::Regex; use std::any::Any; use std::collections::HashMap; use std::sync::{Arc, LazyLock, OnceLock}; +#[user_doc( + doc_section(label = "Regular Expression Functions"), + description = "Replaces substrings in a string that match a [regular expression](https://docs.rs/regex/latest/regex/#syntax).", + syntax_example = "regexp_replace(str, regexp, replacement[, flags])", + sql_example = r#"```sql +> select regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', 'g'); ++------------------------------------------------------------------------+ +| regexp_replace(Utf8("foobarbaz"),Utf8("b(..)"),Utf8("X\1Y"),Utf8("g")) | ++------------------------------------------------------------------------+ +| fooXarYXazY | ++------------------------------------------------------------------------+ +SELECT regexp_replace('aBc', '(b|d)', 'Ab\\1a', 'i'); ++-------------------------------------------------------------------+ +| regexp_replace(Utf8("aBc"),Utf8("(b|d)"),Utf8("Ab\1a"),Utf8("i")) | ++-------------------------------------------------------------------+ +| aAbBac | ++-------------------------------------------------------------------+ +``` +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) +"#, + standard_argument(name = "str", prefix = "String"), + standard_argument(name = "replacement", prefix = "Replacement string"), + argument( + name = "regexp", + description = "Regular expression to match against. Can be a constant, column, or function." + ), + argument( + name = "flags", + description = r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: + - **i**: case-insensitive: letters match both upper and lower case + - **m**: multi-line mode: ^ and $ match begin/end of line + - **s**: allow . to match \n + - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used + - **U**: swap the meaning of x* and x*?"# + ) +)] #[derive(Debug)] pub struct RegexpReplaceFunc { signature: Signature, @@ -130,50 +167,10 @@ impl ScalarUDFImpl for RegexpReplaceFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_regexp_replace_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_regexp_replace_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_REGEX, - "Replaces substrings in a string that match a [regular expression](https://docs.rs/regex/latest/regex/#syntax).", - "regexp_replace(str, regexp, replacement[, flags])") - .with_sql_example(r#"```sql -> select regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', 'g'); -+------------------------------------------------------------------------+ -| regexp_replace(Utf8("foobarbaz"),Utf8("b(..)"),Utf8("X\1Y"),Utf8("g")) | -+------------------------------------------------------------------------+ -| fooXarYXazY | -+------------------------------------------------------------------------+ -SELECT regexp_replace('aBc', '(b|d)', 'Ab\\1a', 'i'); -+-------------------------------------------------------------------+ -| regexp_replace(Utf8("aBc"),Utf8("(b|d)"),Utf8("Ab\1a"),Utf8("i")) | -+-------------------------------------------------------------------+ -| aAbBac | -+-------------------------------------------------------------------+ -``` -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) -"#) - .with_standard_argument("str", Some("String")) - .with_argument("regexp","Regular expression to match against. - Can be a constant, column, or function.") - .with_standard_argument("replacement", Some("Replacement string")) - .with_argument("flags", - r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: -- **g**: (global) Search globally and don't return after the first match -- **i**: case-insensitive: letters match both upper and lower case -- **m**: multi-line mode: ^ and $ match begin/end of line -- **s**: allow . to match \n -- **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used -- **U**: swap the meaning of x* and x*?"#) - .build() -}) -} - fn regexp_replace_func(args: &[ColumnarValue]) -> Result { match args[0].data_type() { DataType::Utf8 => specialize_regexp_replace::(args), diff --git a/datafusion/functions/src/string/ascii.rs b/datafusion/functions/src/string/ascii.rs index 4f615b5b2c58..d4a9d22d5488 100644 --- a/datafusion/functions/src/string/ascii.rs +++ b/datafusion/functions/src/string/ascii.rs @@ -20,12 +20,34 @@ use arrow::array::{ArrayAccessor, ArrayIter, ArrayRef, AsArray, Int32Array}; use arrow::datatypes::DataType; use arrow::error::ArrowError; use datafusion_common::{internal_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::{ColumnarValue, Documentation}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::{Arc, OnceLock}; +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns the Unicode character code of the first character in a string.", + syntax_example = "ascii(str)", + sql_example = r#"```sql +> select ascii('abc'); ++--------------------+ +| ascii(Utf8("abc")) | ++--------------------+ +| 97 | ++--------------------+ +> select ascii('🚀'); ++-------------------+ +| ascii(Utf8("🚀")) | ++-------------------+ +| 128640 | ++-------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + related_udf(name = "chr") +)] #[derive(Debug)] pub struct AsciiFunc { signature: Signature, @@ -73,41 +95,10 @@ impl ScalarUDFImpl for AsciiFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_ascii_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_ascii_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns the Unicode character code of the first character in a string.", - "ascii(str)", - ) - .with_sql_example( - r#"```sql -> select ascii('abc'); -+--------------------+ -| ascii(Utf8("abc")) | -+--------------------+ -| 97 | -+--------------------+ -> select ascii('🚀'); -+-------------------+ -| ascii(Utf8("🚀")) | -+-------------------+ -| 128640 | -+-------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .with_related_udf("chr") - .build() - }) -} - fn calculate_ascii<'a, V>(array: V) -> Result where V: ArrayAccessor, diff --git a/datafusion/functions/src/string/bit_length.rs b/datafusion/functions/src/string/bit_length.rs index 5a23692d85c7..6fc131fd226f 100644 --- a/datafusion/functions/src/string/bit_length.rs +++ b/datafusion/functions/src/string/bit_length.rs @@ -22,10 +22,28 @@ use std::sync::OnceLock; use crate::utils::utf8_to_int_type; use datafusion_common::{exec_err, Result, ScalarValue}; +use datafusion_doc::DocSection; use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::{ColumnarValue, Documentation, Volatility}; use datafusion_expr::{ScalarUDFImpl, Signature}; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns the bit length of a string.", + syntax_example = "bit_length(str)", + sql_example = r#"```sql +> select bit_length('datafusion'); ++--------------------------------+ +| bit_length(Utf8("datafusion")) | ++--------------------------------+ +| 80 | ++--------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + related_udf(name = "length"), + related_udf(name = "octet_length") +)] #[derive(Debug)] pub struct BitLengthFunc { signature: Signature, @@ -92,32 +110,6 @@ impl ScalarUDFImpl for BitLengthFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_bit_length_doc()) + self.doc() } } - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_bit_length_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns the bit length of a string.", - "bit_length(str)", - ) - .with_sql_example( - r#"```sql -> select bit_length('datafusion'); -+--------------------------------+ -| bit_length(Utf8("datafusion")) | -+--------------------------------+ -| 80 | -+--------------------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .with_related_udf("length") - .with_related_udf("octet_length") - .build() - }) -} diff --git a/datafusion/functions/src/string/btrim.rs b/datafusion/functions/src/string/btrim.rs index ae79bb59f9c7..3ff50deca08a 100644 --- a/datafusion/functions/src/string/btrim.rs +++ b/datafusion/functions/src/string/btrim.rs @@ -20,11 +20,12 @@ use crate::utils::{make_scalar_function, utf8_to_str_type}; use arrow::array::{ArrayRef, OffsetSizeTrait}; use arrow::datatypes::DataType; use datafusion_common::{exec_err, Result}; +use datafusion_doc::DocSection; use datafusion_expr::function::Hint; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::OnceLock; @@ -35,6 +36,28 @@ fn btrim(args: &[ArrayRef]) -> Result { general_trim::(args, TrimType::Both, use_string_view) } +#[user_doc( + doc_section(label = "String Functions"), + description = "Trims the specified trim string from the start and end of a string. If no trim string is provided, all whitespace is removed from the start and end of the input string.", + syntax_example = "btrim(str[, trim_str])", + alternative_syntax = "trim(BOTH trim_str FROM str)", + alternative_syntax = "trim(trim_str FROM str)", + sql_example = r#"```sql +> select btrim('__datafusion____', '_'); ++-------------------------------------------+ +| btrim(Utf8("__datafusion____"),Utf8("_")) | ++-------------------------------------------+ +| datafusion | ++-------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument( + name = "trim_str", + description = "String expression to operate on. Can be a constant, column, or function, and any combination of operators. _Default is whitespace characters._" + ), + related_udf(name = "ltrim"), + related_udf(name = "rtrim") +)] #[derive(Debug)] pub struct BTrimFunc { signature: Signature, @@ -106,36 +129,10 @@ impl ScalarUDFImpl for BTrimFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_btrim_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_btrim_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Trims the specified trim string from the start and end of a string. If no trim string is provided, all whitespace is removed from the start and end of the input string.", - "btrim(str[, trim_str])") - .with_sql_example(r#"```sql -> select btrim('__datafusion____', '_'); -+-------------------------------------------+ -| btrim(Utf8("__datafusion____"),Utf8("_")) | -+-------------------------------------------+ -| datafusion | -+-------------------------------------------+ -```"#) - .with_standard_argument("str", Some("String")) - .with_argument("trim_str", "String expression to operate on. Can be a constant, column, or function, and any combination of operators. _Default is whitespace characters._") - .with_alternative_syntax("trim(BOTH trim_str FROM str)") - .with_alternative_syntax("trim(trim_str FROM str)") - .with_related_udf("ltrim") - .with_related_udf("rtrim") - .build() - }) -} - #[cfg(test)] mod tests { use arrow::array::{Array, StringArray, StringViewArray}; diff --git a/datafusion/functions/src/string/chr.rs b/datafusion/functions/src/string/chr.rs index 127b02cdf733..1479a3b9a007 100644 --- a/datafusion/functions/src/string/chr.rs +++ b/datafusion/functions/src/string/chr.rs @@ -27,9 +27,10 @@ use arrow::datatypes::DataType::Utf8; use crate::utils::make_scalar_function; use datafusion_common::cast::as_int64_array; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::{ColumnarValue, Documentation, Volatility}; use datafusion_expr::{ScalarUDFImpl, Signature}; +use datafusion_macros::user_doc; /// Returns the character with the given code. chr(0) is disallowed because text data types cannot store that character. /// chr(65) = 'A' @@ -60,6 +61,23 @@ pub fn chr(args: &[ArrayRef]) -> Result { Ok(Arc::new(result) as ArrayRef) } +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns the character with the specified ASCII or Unicode code value.", + syntax_example = "chr(expression)", + alternative_syntax = "trim(BOTH trim_str FROM str)", + alternative_syntax = "trim(trim_str FROM str)", + sql_example = r#"```sql +> select chr(128640); ++--------------------+ +| chr(Int64(128640)) | ++--------------------+ +| 🚀 | ++--------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + related_udf(name = "ascii") +)] #[derive(Debug)] pub struct ChrFunc { signature: Signature, @@ -105,31 +123,6 @@ impl ScalarUDFImpl for ChrFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_chr_doc()) + self.doc() } } - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_chr_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns the character with the specified ASCII or Unicode code value.", - "chr(expression)", - ) - .with_sql_example( - r#"```sql -> select chr(128640); -+--------------------+ -| chr(Int64(128640)) | -+--------------------+ -| 🚀 | -+--------------------+ -```"#, - ) - .with_standard_argument("expression", Some("String")) - .with_related_udf("ascii") - .build() - }) -} diff --git a/datafusion/functions/src/string/concat.rs b/datafusion/functions/src/string/concat.rs index 576c891ce467..553dd3924284 100644 --- a/datafusion/functions/src/string/concat.rs +++ b/datafusion/functions/src/string/concat.rs @@ -26,12 +26,32 @@ use crate::strings::{ }; use datafusion_common::cast::{as_string_array, as_string_view_array}; use datafusion_common::{internal_err, plan_err, Result, ScalarValue}; +use datafusion_doc::DocSection; use datafusion_expr::expr::ScalarFunction; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; use datafusion_expr::{lit, ColumnarValue, Documentation, Expr, Volatility}; use datafusion_expr::{ScalarUDFImpl, Signature}; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Concatenates multiple strings together.", + syntax_example = "concat(str[, ..., str_n])", + sql_example = r#"```sql +> select concat('data', 'f', 'us', 'ion'); ++-------------------------------------------------------+ +| concat(Utf8("data"),Utf8("f"),Utf8("us"),Utf8("ion")) | ++-------------------------------------------------------+ +| datafusion | ++-------------------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument( + name = "str_n", + description = "Subsequent string expressions to concatenate." + ), + related_udf(name = "concat_ws") +)] #[derive(Debug)] pub struct ConcatFunc { signature: Signature, @@ -263,36 +283,10 @@ impl ScalarUDFImpl for ConcatFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_concat_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_concat_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Concatenates multiple strings together.", - "concat(str[, ..., str_n])", - ) - .with_sql_example( - r#"```sql -> select concat('data', 'f', 'us', 'ion'); -+-------------------------------------------------------+ -| concat(Utf8("data"),Utf8("f"),Utf8("us"),Utf8("ion")) | -+-------------------------------------------------------+ -| datafusion | -+-------------------------------------------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .with_argument("str_n", "Subsequent string expressions to concatenate.") - .with_related_udf("concat_ws") - .build() - }) -} - pub fn simplify_concat(args: Vec) -> Result { let mut new_args = Vec::with_capacity(args.len()); let mut contiguous_scalar = "".to_string(); diff --git a/datafusion/functions/src/string/concat_ws.rs b/datafusion/functions/src/string/concat_ws.rs index 610c4f0be697..bbe7c6036fec 100644 --- a/datafusion/functions/src/string/concat_ws.rs +++ b/datafusion/functions/src/string/concat_ws.rs @@ -26,12 +26,36 @@ use crate::string::concat_ws; use crate::strings::{ColumnarValueRef, StringArrayBuilder}; use datafusion_common::cast::{as_string_array, as_string_view_array}; use datafusion_common::{exec_err, internal_err, plan_err, Result, ScalarValue}; +use datafusion_doc::DocSection; use datafusion_expr::expr::ScalarFunction; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; use datafusion_expr::{lit, ColumnarValue, Documentation, Expr, Volatility}; use datafusion_expr::{ScalarUDFImpl, Signature}; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Concatenates multiple strings together with a specified separator.", + syntax_example = "concat_ws(separator, str[, ..., str_n])", + sql_example = r#"```sql +> select concat_ws('_', 'data', 'fusion'); ++--------------------------------------------------+ +| concat_ws(Utf8("_"),Utf8("data"),Utf8("fusion")) | ++--------------------------------------------------+ +| data_fusion | ++--------------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument( + name = "separator", + description = "Separator to insert between concatenated strings." + ), + argument( + name = "str_n", + description = "Subsequent string expressions to concatenate." + ), + related_udf(name = "concat") +)] #[derive(Debug)] pub struct ConcatWsFunc { signature: Signature, @@ -271,40 +295,10 @@ impl ScalarUDFImpl for ConcatWsFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_concat_ws_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_concat_ws_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Concatenates multiple strings together with a specified separator.", - "concat_ws(separator, str[, ..., str_n])", - ) - .with_sql_example( - r#"```sql -> select concat_ws('_', 'data', 'fusion'); -+--------------------------------------------------+ -| concat_ws(Utf8("_"),Utf8("data"),Utf8("fusion")) | -+--------------------------------------------------+ -| data_fusion | -+--------------------------------------------------+ -```"#, - ) - .with_argument( - "separator", - "Separator to insert between concatenated strings.", - ) - .with_standard_argument("str", Some("String")) - .with_argument("str_n", "Subsequent string expressions to concatenate.") - .with_related_udf("concat") - .build() - }) -} - fn simplify_concat_ws(delimiter: &Expr, args: &[Expr]) -> Result { match delimiter { Expr::Literal( diff --git a/datafusion/functions/src/string/contains.rs b/datafusion/functions/src/string/contains.rs index 3e5c72ac20e9..d47f8326d568 100644 --- a/datafusion/functions/src/string/contains.rs +++ b/datafusion/functions/src/string/contains.rs @@ -23,13 +23,29 @@ use arrow::datatypes::DataType::{Boolean, LargeUtf8, Utf8, Utf8View}; use datafusion_common::exec_err; use datafusion_common::DataFusionError; use datafusion_common::Result; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::{Arc, OnceLock}; +#[user_doc( + doc_section(label = "String Functions"), + description = "Return true if search_str is found within string (case-sensitive).", + syntax_example = "contains(str, search_str)", + sql_example = r#"```sql +> select contains('the quick brown fox', 'row'); ++---------------------------------------------------+ +| contains(Utf8("the quick brown fox"),Utf8("row")) | ++---------------------------------------------------+ +| true | ++---------------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument(name = "search_str", description = "The string to search for in str.") +)] #[derive(Debug)] pub struct ContainsFunc { signature: Signature, @@ -75,35 +91,10 @@ impl ScalarUDFImpl for ContainsFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_contains_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_contains_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Return true if search_str is found within string (case-sensitive).", - "contains(str, search_str)", - ) - .with_sql_example( - r#"```sql -> select contains('the quick brown fox', 'row'); -+---------------------------------------------------+ -| contains(Utf8("the quick brown fox"),Utf8("row")) | -+---------------------------------------------------+ -| true | -+---------------------------------------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .with_argument("search_str", "The string to search for in str.") - .build() - }) -} - /// use `arrow::compute::contains` to do the calculation for contains pub fn contains(args: &[ArrayRef]) -> Result { match (args[0].data_type(), args[1].data_type()) { diff --git a/datafusion/functions/src/string/ends_with.rs b/datafusion/functions/src/string/ends_with.rs index fc7fc04f4363..9e6e0ad74b0d 100644 --- a/datafusion/functions/src/string/ends_with.rs +++ b/datafusion/functions/src/string/ends_with.rs @@ -23,10 +23,32 @@ use arrow::datatypes::DataType; use crate::utils::make_scalar_function; use datafusion_common::{internal_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::{ColumnarValue, Documentation, Volatility}; use datafusion_expr::{ScalarUDFImpl, Signature}; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Tests if a string ends with a substring.", + syntax_example = "ends_with(str, substr)", + sql_example = r#"```sql +> select ends_with('datafusion', 'soin'); ++--------------------------------------------+ +| ends_with(Utf8("datafusion"),Utf8("soin")) | ++--------------------------------------------+ +| false | ++--------------------------------------------+ +> select ends_with('datafusion', 'sion'); ++--------------------------------------------+ +| ends_with(Utf8("datafusion"),Utf8("sion")) | ++--------------------------------------------+ +| true | ++--------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument(name = "substr", description = "Substring to test for.") +)] #[derive(Debug)] pub struct EndsWithFunc { signature: Signature, @@ -79,41 +101,10 @@ impl ScalarUDFImpl for EndsWithFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_ends_with_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_ends_with_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Tests if a string ends with a substring.", - "ends_with(str, substr)", - ) - .with_sql_example( - r#"```sql -> select ends_with('datafusion', 'soin'); -+--------------------------------------------+ -| ends_with(Utf8("datafusion"),Utf8("soin")) | -+--------------------------------------------+ -| false | -+--------------------------------------------+ -> select ends_with('datafusion', 'sion'); -+--------------------------------------------+ -| ends_with(Utf8("datafusion"),Utf8("sion")) | -+--------------------------------------------+ -| true | -+--------------------------------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .with_argument("substr", "Substring to test for.") - .build() - }) -} - /// Returns true if string ends with suffix. /// ends_with('alphabet', 'abet') = 't' pub fn ends_with(args: &[ArrayRef]) -> Result { diff --git a/datafusion/functions/src/string/initcap.rs b/datafusion/functions/src/string/initcap.rs index a9090b0a6f43..48a66eed3e44 100644 --- a/datafusion/functions/src/string/initcap.rs +++ b/datafusion/functions/src/string/initcap.rs @@ -24,10 +24,27 @@ use arrow::datatypes::DataType; use crate::utils::{make_scalar_function, utf8_to_str_type}; use datafusion_common::cast::{as_generic_string_array, as_string_view_array}; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::{ColumnarValue, Documentation, Volatility}; use datafusion_expr::{ScalarUDFImpl, Signature}; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Capitalizes the first character in each word in the input string. Words are delimited by non-alphanumeric characters.", + syntax_example = "initcap(str)", + sql_example = r#"```sql +> select initcap('apache datafusion'); ++------------------------------------+ +| initcap(Utf8("apache datafusion")) | ++------------------------------------+ +| Apache Datafusion | ++------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + related_udf(name = "lower"), + related_udf(name = "upper") +)] #[derive(Debug)] pub struct InitcapFunc { signature: Signature, @@ -80,33 +97,10 @@ impl ScalarUDFImpl for InitcapFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_initcap_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_initcap_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Capitalizes the first character in each word in the input string. Words are delimited by non-alphanumeric characters.", - "initcap(str)") - .with_sql_example(r#"```sql -> select initcap('apache datafusion'); -+------------------------------------+ -| initcap(Utf8("apache datafusion")) | -+------------------------------------+ -| Apache Datafusion | -+------------------------------------+ -```"#) - .with_standard_argument("str", Some("String")) - .with_related_udf("lower") - .with_related_udf("upper") - .build() - }) -} - /// Converts the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters. /// initcap('hi THOMAS') = 'Hi Thomas' fn initcap(args: &[ArrayRef]) -> Result { diff --git a/datafusion/functions/src/string/levenshtein.rs b/datafusion/functions/src/string/levenshtein.rs index 51ff428055e4..62afef483c49 100644 --- a/datafusion/functions/src/string/levenshtein.rs +++ b/datafusion/functions/src/string/levenshtein.rs @@ -25,10 +25,32 @@ use crate::utils::{make_scalar_function, utf8_to_int_type}; use datafusion_common::cast::{as_generic_string_array, as_string_view_array}; use datafusion_common::utils::datafusion_strsim; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::{ColumnarValue, Documentation}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns the [`Levenshtein distance`](https://en.wikipedia.org/wiki/Levenshtein_distance) between the two given strings.", + syntax_example = "levenshtein(str1, str2)", + sql_example = r#"```sql +> select levenshtein('kitten', 'sitting'); ++---------------------------------------------+ +| levenshtein(Utf8("kitten"),Utf8("sitting")) | ++---------------------------------------------+ +| 3 | ++---------------------------------------------+ +```"#, + argument( + name = "str1", + description = "String expression to compute Levenshtein distance with str2." + ), + argument( + name = "str2", + description = "String expression to compute Levenshtein distance with str1." + ) +)] #[derive(Debug)] pub struct LevenshteinFunc { signature: Signature, @@ -82,32 +104,10 @@ impl ScalarUDFImpl for LevenshteinFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_levenshtein_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_levenshtein_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns the [`Levenshtein distance`](https://en.wikipedia.org/wiki/Levenshtein_distance) between the two given strings.", - "levenshtein(str1, str2)") - .with_sql_example(r#"```sql -> select levenshtein('kitten', 'sitting'); -+---------------------------------------------+ -| levenshtein(Utf8("kitten"),Utf8("sitting")) | -+---------------------------------------------+ -| 3 | -+---------------------------------------------+ -```"#) - .with_argument("str1", "String expression to compute Levenshtein distance with str2.") - .with_argument("str2", "String expression to compute Levenshtein distance with str1.") - .build() - }) -} - ///Returns the Levenshtein distance between the two given strings. /// LEVENSHTEIN('kitten', 'sitting') = 3 pub fn levenshtein(args: &[ArrayRef]) -> Result { diff --git a/datafusion/functions/src/string/lower.rs b/datafusion/functions/src/string/lower.rs index 67c80cb785b6..73284761ae56 100644 --- a/datafusion/functions/src/string/lower.rs +++ b/datafusion/functions/src/string/lower.rs @@ -22,10 +22,27 @@ use std::sync::OnceLock; use crate::string::common::to_lower; use crate::utils::utf8_to_str_type; use datafusion_common::Result; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::{ColumnarValue, Documentation}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Converts a string to lower-case.", + syntax_example = "lower(str)", + sql_example = r#"```sql +> select lower('Ångström'); ++-------------------------+ +| lower(Utf8("Ångström")) | ++-------------------------+ +| ångström | ++-------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + related_udf(name = "initcap"), + related_udf(name = "upper") +)] #[derive(Debug)] pub struct LowerFunc { signature: Signature, @@ -71,35 +88,10 @@ impl ScalarUDFImpl for LowerFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_lower_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_lower_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Converts a string to lower-case.", - "lower(str)", - ) - .with_sql_example( - r#"```sql -> select lower('Ångström'); -+-------------------------+ -| lower(Utf8("Ångström")) | -+-------------------------+ -| ångström | -+-------------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .with_related_udf("initcap") - .with_related_udf("upper") - .build() - }) -} #[cfg(test)] mod tests { use super::*; diff --git a/datafusion/functions/src/string/octet_length.rs b/datafusion/functions/src/string/octet_length.rs index 2dbfa6746d61..fbd70b0532e4 100644 --- a/datafusion/functions/src/string/octet_length.rs +++ b/datafusion/functions/src/string/octet_length.rs @@ -22,10 +22,27 @@ use std::sync::OnceLock; use crate::utils::utf8_to_int_type; use datafusion_common::{exec_err, Result, ScalarValue}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::{ColumnarValue, Documentation, Volatility}; use datafusion_expr::{ScalarUDFImpl, Signature}; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns the length of a string in bytes.", + syntax_example = "octet_length(str)", + sql_example = r#"```sql +> select octet_length('Ångström'); ++--------------------------------+ +| octet_length(Utf8("Ångström")) | ++--------------------------------+ +| 10 | ++--------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + related_udf(name = "bit_length"), + related_udf(name = "length") +)] #[derive(Debug)] pub struct OctetLengthFunc { signature: Signature, @@ -92,36 +109,10 @@ impl ScalarUDFImpl for OctetLengthFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_octet_length_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_octet_length_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns the length of a string in bytes.", - "octet_length(str)", - ) - .with_sql_example( - r#"```sql -> select octet_length('Ångström'); -+--------------------------------+ -| octet_length(Utf8("Ångström")) | -+--------------------------------+ -| 10 | -+--------------------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .with_related_udf("bit_length") - .with_related_udf("length") - .build() - }) -} - #[cfg(test)] mod tests { use std::sync::Arc; diff --git a/datafusion/functions/src/string/overlay.rs b/datafusion/functions/src/string/overlay.rs index ced263456802..9569ac64d1bf 100644 --- a/datafusion/functions/src/string/overlay.rs +++ b/datafusion/functions/src/string/overlay.rs @@ -26,10 +26,34 @@ use datafusion_common::cast::{ as_generic_string_array, as_int64_array, as_string_view_array, }; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility}; use datafusion_expr::{ScalarUDFImpl, Signature}; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns the string which is replaced by another string from the specified position and specified count length.", + syntax_example = "overlay(str PLACING substr FROM pos [FOR count])", + sql_example = r#"```sql +> select overlay('Txxxxas' placing 'hom' from 2 for 4); ++--------------------------------------------------------+ +| overlay(Utf8("Txxxxas"),Utf8("hom"),Int64(2),Int64(4)) | ++--------------------------------------------------------+ +| Thomas | ++--------------------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument(name = "substr", description = "Substring to replace in str."), + argument( + name = "pos", + description = "The start position to start the replace in str." + ), + argument( + name = "count", + description = "The count of characters to be replaced from start position of str. If not specified, will use substr length instead." + ) +)] #[derive(Debug)] pub struct OverlayFunc { signature: Signature, @@ -92,34 +116,10 @@ impl ScalarUDFImpl for OverlayFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_overlay_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_overlay_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns the string which is replaced by another string from the specified position and specified count length.", - "overlay(str PLACING substr FROM pos [FOR count])") - .with_sql_example(r#"```sql -> select overlay('Txxxxas' placing 'hom' from 2 for 4); -+--------------------------------------------------------+ -| overlay(Utf8("Txxxxas"),Utf8("hom"),Int64(2),Int64(4)) | -+--------------------------------------------------------+ -| Thomas | -+--------------------------------------------------------+ -```"#) - .with_standard_argument("str", Some("String")) - .with_argument("substr", "Substring to replace in str.") - .with_argument("pos", "The start position to start the replace in str.") - .with_argument("count", "The count of characters to be replaced from start position of str. If not specified, will use substr length instead.") - .build() - }) -} - macro_rules! process_overlay { // For the three-argument case ($string_array:expr, $characters_array:expr, $pos_num:expr) => {{ diff --git a/datafusion/functions/src/string/repeat.rs b/datafusion/functions/src/string/repeat.rs index 4140a9b913ff..2a9ec2cad2c2 100644 --- a/datafusion/functions/src/string/repeat.rs +++ b/datafusion/functions/src/string/repeat.rs @@ -29,10 +29,29 @@ use arrow::datatypes::DataType::{LargeUtf8, Utf8, Utf8View}; use datafusion_common::cast::as_int64_array; use datafusion_common::types::{logical_int64, logical_string}; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::{ColumnarValue, Documentation, Volatility}; use datafusion_expr::{ScalarUDFImpl, Signature}; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns a string with an input string repeated a specified number.", + syntax_example = "repeat(str, n)", + sql_example = r#"```sql +> select repeat('data', 3); ++-------------------------------+ +| repeat(Utf8("data"),Int64(3)) | ++-------------------------------+ +| datadatadata | ++-------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument( + name = "n", + description = "Number of times to repeat the input string." + ) +)] #[derive(Debug)] pub struct RepeatFunc { signature: Signature, @@ -81,35 +100,10 @@ impl ScalarUDFImpl for RepeatFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_repeat_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_repeat_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns a string with an input string repeated a specified number.", - "repeat(str, n)", - ) - .with_sql_example( - r#"```sql -> select repeat('data', 3); -+-------------------------------+ -| repeat(Utf8("data"),Int64(3)) | -+-------------------------------+ -| datadatadata | -+-------------------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .with_argument("n", "Number of times to repeat the input string.") - .build() - }) -} - /// Repeats string the specified number of times. /// repeat('Pg', 4) = 'PgPgPgPg' fn repeat(args: &[ArrayRef]) -> Result { diff --git a/datafusion/functions/src/string/replace.rs b/datafusion/functions/src/string/replace.rs index 2439799f96d7..2efb752ad264 100644 --- a/datafusion/functions/src/string/replace.rs +++ b/datafusion/functions/src/string/replace.rs @@ -24,10 +24,29 @@ use arrow::datatypes::DataType; use crate::utils::{make_scalar_function, utf8_to_str_type}; use datafusion_common::cast::{as_generic_string_array, as_string_view_array}; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::{ColumnarValue, Documentation, Volatility}; use datafusion_expr::{ScalarUDFImpl, Signature}; - +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Replaces all occurrences of a specified substring in a string with a new substring.", + syntax_example = "replace(str, substr, replacement)", + sql_example = r#"```sql +> select replace('ABabbaBA', 'ab', 'cd'); ++-------------------------------------------------+ +| replace(Utf8("ABabbaBA"),Utf8("ab"),Utf8("cd")) | ++-------------------------------------------------+ +| ABcdbaBA | ++-------------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + standard_argument( + name = "substr", + prefix = "Substring expression to replace in the input string. Substring" + ), + standard_argument(name = "replacement", prefix = "Replacement substring") +)] #[derive(Debug)] pub struct ReplaceFunc { signature: Signature, @@ -80,33 +99,10 @@ impl ScalarUDFImpl for ReplaceFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_replace_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_replace_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Replaces all occurrences of a specified substring in a string with a new substring.", - "replace(str, substr, replacement)") - .with_sql_example(r#"```sql -> select replace('ABabbaBA', 'ab', 'cd'); -+-------------------------------------------------+ -| replace(Utf8("ABabbaBA"),Utf8("ab"),Utf8("cd")) | -+-------------------------------------------------+ -| ABcdbaBA | -+-------------------------------------------------+ -```"#) - .with_standard_argument("str", Some("String")) - .with_standard_argument("substr", Some("Substring expression to replace in the input string. Substring")) - .with_standard_argument("replacement", Some("Replacement substring")) - .build() - }) -} - fn replace_view(args: &[ArrayRef]) -> Result { let string_array = as_string_view_array(&args[0])?; let from_array = as_string_view_array(&args[1])?; diff --git a/datafusion/functions/src/string/rtrim.rs b/datafusion/functions/src/string/rtrim.rs index b4fe8d432432..e6fb36263e92 100644 --- a/datafusion/functions/src/string/rtrim.rs +++ b/datafusion/functions/src/string/rtrim.rs @@ -23,10 +23,11 @@ use std::sync::OnceLock; use crate::string::common::*; use crate::utils::{make_scalar_function, utf8_to_str_type}; use datafusion_common::{exec_err, Result}; +use datafusion_doc::DocSection; use datafusion_expr::function::Hint; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility}; use datafusion_expr::{ScalarUDFImpl, Signature}; +use datafusion_macros::user_doc; /// Returns the longest string with trailing characters removed. If the characters are not specified, whitespace is removed. /// rtrim('testxxzx', 'xyz') = 'test' @@ -35,6 +36,33 @@ fn rtrim(args: &[ArrayRef]) -> Result { general_trim::(args, TrimType::Right, use_string_view) } +#[user_doc( + doc_section(label = "String Functions"), + description = "Trims the specified trim string from the end of a string. If no trim string is provided, all whitespace is removed from the end of the input string.", + syntax_example = "rtrim(str[, trim_str])", + alternative_syntax = "trim(TRAILING trim_str FROM str)", + sql_example = r#"```sql +> select rtrim(' datafusion '); ++-------------------------------+ +| rtrim(Utf8(" datafusion ")) | ++-------------------------------+ +| datafusion | ++-------------------------------+ +> select rtrim('___datafusion___', '_'); ++-------------------------------------------+ +| rtrim(Utf8("___datafusion___"),Utf8("_")) | ++-------------------------------------------+ +| ___datafusion | ++-------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument( + name = "trim_str", + description = "String expression to trim from the end of the input string. Can be a constant, column, or function, and any combination of arithmetic operators. _Default is whitespace characters._" + ), + related_udf(name = "btrim"), + related_udf(name = "ltrim") +)] #[derive(Debug)] pub struct RtrimFunc { signature: Signature, @@ -100,41 +128,10 @@ impl ScalarUDFImpl for RtrimFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_rtrim_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_rtrim_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Trims the specified trim string from the end of a string. If no trim string is provided, all whitespace is removed from the end of the input string.", - "rtrim(str[, trim_str])") - .with_sql_example(r#"```sql -> select rtrim(' datafusion '); -+-------------------------------+ -| rtrim(Utf8(" datafusion ")) | -+-------------------------------+ -| datafusion | -+-------------------------------+ -> select rtrim('___datafusion___', '_'); -+-------------------------------------------+ -| rtrim(Utf8("___datafusion___"),Utf8("_")) | -+-------------------------------------------+ -| ___datafusion | -+-------------------------------------------+ -```"#) - .with_standard_argument("str", Some("String")) - .with_argument("trim_str", "String expression to trim from the end of the input string. Can be a constant, column, or function, and any combination of arithmetic operators. _Default is whitespace characters._") - .with_alternative_syntax("trim(TRAILING trim_str FROM str)") - .with_related_udf("btrim") - .with_related_udf("ltrim") - .build() - }) -} - #[cfg(test)] mod tests { use arrow::array::{Array, StringArray, StringViewArray}; diff --git a/datafusion/functions/src/string/split_part.rs b/datafusion/functions/src/string/split_part.rs index e55325db756d..50553897de35 100644 --- a/datafusion/functions/src/string/split_part.rs +++ b/datafusion/functions/src/string/split_part.rs @@ -25,12 +25,29 @@ use arrow::datatypes::DataType; use datafusion_common::cast::as_int64_array; use datafusion_common::ScalarValue; use datafusion_common::{exec_err, DataFusionError, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility}; use datafusion_expr::{ScalarUDFImpl, Signature}; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::{Arc, OnceLock}; +#[user_doc( + doc_section(label = "String Functions"), + description = "Splits a string based on a specified delimiter and returns the substring in the specified position.", + syntax_example = "split_part(str, delimiter, pos)", + sql_example = r#"```sql +> select split_part('1.2.3.4.5', '.', 3); ++--------------------------------------------------+ +| split_part(Utf8("1.2.3.4.5"),Utf8("."),Int64(3)) | ++--------------------------------------------------+ +| 3 | ++--------------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument(name = "delimiter", description = "String or character to split on."), + argument(name = "pos", description = "Position of the part to return.") +)] #[derive(Debug)] pub struct SplitPartFunc { signature: Signature, @@ -182,33 +199,10 @@ impl ScalarUDFImpl for SplitPartFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_split_part_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_split_part_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Splits a string based on a specified delimiter and returns the substring in the specified position.", - "split_part(str, delimiter, pos)") - .with_sql_example(r#"```sql -> select split_part('1.2.3.4.5', '.', 3); -+--------------------------------------------------+ -| split_part(Utf8("1.2.3.4.5"),Utf8("."),Int64(3)) | -+--------------------------------------------------+ -| 3 | -+--------------------------------------------------+ -```"#) - .with_standard_argument("str", Some("String")) - .with_argument("delimiter", "String or character to split on.") - .with_argument("pos", "Position of the part to return.") - .build() - }) -} - /// impl pub fn split_part_impl<'a, StringArrType, DelimiterArrType, StringArrayLen>( string_array: StringArrType, diff --git a/datafusion/functions/src/string/starts_with.rs b/datafusion/functions/src/string/starts_with.rs index 36dbd8167b4e..156112bda5e6 100644 --- a/datafusion/functions/src/string/starts_with.rs +++ b/datafusion/functions/src/string/starts_with.rs @@ -23,9 +23,10 @@ use arrow::datatypes::DataType; use crate::utils::make_scalar_function; use datafusion_common::{internal_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::{ColumnarValue, Documentation}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use datafusion_macros::user_doc; /// Returns true if string starts with prefix. /// starts_with('alphabet', 'alph') = 't' @@ -34,6 +35,21 @@ pub fn starts_with(args: &[ArrayRef]) -> Result { Ok(Arc::new(result) as ArrayRef) } +#[user_doc( + doc_section(label = "String Functions"), + description = "Tests if a string starts with a substring.", + syntax_example = "starts_with(str, substr)", + sql_example = r#"```sql +> select starts_with('datafusion','data'); ++----------------------------------------------+ +| starts_with(Utf8("datafusion"),Utf8("data")) | ++----------------------------------------------+ +| true | ++----------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument(name = "substr", description = "Substring to test for.") +)] #[derive(Debug)] pub struct StartsWithFunc { signature: Signature, @@ -84,35 +100,10 @@ impl ScalarUDFImpl for StartsWithFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_starts_with_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_starts_with_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Tests if a string starts with a substring.", - "starts_with(str, substr)", - ) - .with_sql_example( - r#"```sql -> select starts_with('datafusion','data'); -+----------------------------------------------+ -| starts_with(Utf8("datafusion"),Utf8("data")) | -+----------------------------------------------+ -| true | -+----------------------------------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .with_argument("substr", "Substring to test for.") - .build() - }) -} - #[cfg(test)] mod tests { use crate::utils::test::test_function; diff --git a/datafusion/functions/src/string/to_hex.rs b/datafusion/functions/src/string/to_hex.rs index 04907af14ade..529228694457 100644 --- a/datafusion/functions/src/string/to_hex.rs +++ b/datafusion/functions/src/string/to_hex.rs @@ -27,9 +27,10 @@ use crate::utils::make_scalar_function; use datafusion_common::cast::as_primitive_array; use datafusion_common::Result; use datafusion_common::{exec_err, plan_err}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::{ColumnarValue, Documentation}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use datafusion_macros::user_doc; /// Converts the number to its equivalent hexadecimal representation. /// to_hex(2147483647) = '7fffffff' @@ -59,6 +60,20 @@ where Ok(Arc::new(result) as ArrayRef) } +#[user_doc( + doc_section(label = "String Functions"), + description = "Converts an integer to a hexadecimal string.", + syntax_example = "to_hex(int)", + sql_example = r#"```sql +> select to_hex(12345689); ++-------------------------+ +| to_hex(Int64(12345689)) | ++-------------------------+ +| bc6159 | ++-------------------------+ +```"#, + standard_argument(name = "int", prefix = "Integer") +)] #[derive(Debug)] pub struct ToHexFunc { signature: Signature, @@ -116,34 +131,10 @@ impl ScalarUDFImpl for ToHexFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_to_hex_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_to_hex_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Converts an integer to a hexadecimal string.", - "to_hex(int)", - ) - .with_sql_example( - r#"```sql -> select to_hex(12345689); -+-------------------------+ -| to_hex(Int64(12345689)) | -+-------------------------+ -| bc6159 | -+-------------------------+ -```"#, - ) - .with_standard_argument("int", Some("Integer")) - .build() - }) -} - #[cfg(test)] mod tests { use arrow::array::{Int32Array, StringArray}; diff --git a/datafusion/functions/src/string/upper.rs b/datafusion/functions/src/string/upper.rs index 1d05c42394a7..aa14c8b32652 100644 --- a/datafusion/functions/src/string/upper.rs +++ b/datafusion/functions/src/string/upper.rs @@ -19,12 +19,29 @@ use crate::string::common::to_upper; use crate::utils::utf8_to_str_type; use arrow::datatypes::DataType; use datafusion_common::Result; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::{ColumnarValue, Documentation}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::OnceLock; +#[user_doc( + doc_section(label = "String Functions"), + description = "Converts a string to upper-case.", + syntax_example = "upper(str)", + sql_example = r#"```sql +> select upper('dataFusion'); ++---------------------------+ +| upper(Utf8("dataFusion")) | ++---------------------------+ +| DATAFUSION | ++---------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + related_udf(name = "lower"), + related_udf(name = "initcap") +)] #[derive(Debug)] pub struct UpperFunc { signature: Signature, @@ -70,36 +87,10 @@ impl ScalarUDFImpl for UpperFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_upper_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_upper_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Converts a string to upper-case.", - "upper(str)", - ) - .with_sql_example( - r#"```sql -> select upper('dataFusion'); -+---------------------------+ -| upper(Utf8("dataFusion")) | -+---------------------------+ -| DATAFUSION | -+---------------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .with_related_udf("initcap") - .with_related_udf("lower") - .build() - }) -} - #[cfg(test)] mod tests { use super::*; diff --git a/datafusion/functions/src/string/uuid.rs b/datafusion/functions/src/string/uuid.rs index 6048a70bd8c5..75ac49cabd24 100644 --- a/datafusion/functions/src/string/uuid.rs +++ b/datafusion/functions/src/string/uuid.rs @@ -24,10 +24,24 @@ use arrow::datatypes::DataType::Utf8; use uuid::Uuid; use datafusion_common::{internal_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::{ColumnarValue, Documentation, Volatility}; use datafusion_expr::{ScalarUDFImpl, Signature}; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns [`UUID v4`](https://en.wikipedia.org/wiki/Universally_unique_identifier#Version_4_(random)) string value which is unique per row.", + syntax_example = "uuid()", + sql_example = r#"```sql +> select uuid(); ++--------------------------------------+ +| uuid() | ++--------------------------------------+ +| 6ec17ef8-1934-41cc-8d59-d0c8f9eea1f0 | ++--------------------------------------+ +```"# +)] #[derive(Debug)] pub struct UuidFunc { signature: Signature, @@ -80,26 +94,6 @@ impl ScalarUDFImpl for UuidFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_uuid_doc()) + self.doc() } } - -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_uuid_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns [`UUID v4`](https://en.wikipedia.org/wiki/Universally_unique_identifier#Version_4_(random)) string value which is unique per row.", - "uuid()") - .with_sql_example(r#"```sql -> select uuid(); -+--------------------------------------+ -| uuid() | -+--------------------------------------+ -| 6ec17ef8-1934-41cc-8d59-d0c8f9eea1f0 | -+--------------------------------------+ -```"#) - .build() - }) -} diff --git a/datafusion/functions/src/unicode/character_length.rs b/datafusion/functions/src/unicode/character_length.rs index 726822a8f887..00c82481c76d 100644 --- a/datafusion/functions/src/unicode/character_length.rs +++ b/datafusion/functions/src/unicode/character_length.rs @@ -22,13 +22,30 @@ use arrow::array::{ }; use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type}; use datafusion_common::Result; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::sync::{Arc, OnceLock}; +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns the number of characters in a string.", + syntax_example = "character_length(str)", + sql_example = r#"```sql +> select character_length('Ångström'); ++------------------------------------+ +| character_length(Utf8("Ångström")) | ++------------------------------------+ +| 8 | ++------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + related_udf(name = "bit_length"), + related_udf(name = "octet_length") +)] #[derive(Debug)] pub struct CharacterLengthFunc { signature: Signature, @@ -85,36 +102,10 @@ impl ScalarUDFImpl for CharacterLengthFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_character_length_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_character_length_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns the number of characters in a string.", - "character_length(str)", - ) - .with_sql_example( - r#"```sql -> select character_length('Ångström'); -+------------------------------------+ -| character_length(Utf8("Ångström")) | -+------------------------------------+ -| 8 | -+------------------------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .with_related_udf("bit_length") - .with_related_udf("octet_length") - .build() - }) -} - /// Returns number of characters in the string. /// character_length('josé') = 4 /// The implementation counts UTF-8 code points to count the number of characters diff --git a/datafusion/functions/src/unicode/find_in_set.rs b/datafusion/functions/src/unicode/find_in_set.rs index 38efb408c1d3..8cd985eec3fa 100644 --- a/datafusion/functions/src/unicode/find_in_set.rs +++ b/datafusion/functions/src/unicode/find_in_set.rs @@ -26,12 +26,31 @@ use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type}; use crate::utils::{make_scalar_function, utf8_to_int_type}; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns a value in the range of 1 to N if the string str is in the string list strlist consisting of N substrings..", + syntax_example = "find_in_set(str, strlist)", + sql_example = r#"```sql +> select find_in_set('b', 'a,b,c,d'); ++----------------------------------------+ +| find_in_set(Utf8("b"),Utf8("a,b,c,d")) | ++----------------------------------------+ +| 2 | ++----------------------------------------+ +```"#, + argument(name = "str", description = "String expression to find in strlist."), + argument( + name = "strlist", + description = "A string list is a string composed of substrings separated by , characters." + ) +)] #[derive(Debug)] pub struct FindInSetFunc { signature: Signature, @@ -85,32 +104,10 @@ impl ScalarUDFImpl for FindInSetFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_find_in_set_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_find_in_set_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns a value in the range of 1 to N if the string str is in the string list strlist consisting of N substrings.", - "find_in_set(str, strlist)") - .with_sql_example(r#"```sql -> select find_in_set('b', 'a,b,c,d'); -+----------------------------------------+ -| find_in_set(Utf8("b"),Utf8("a,b,c,d")) | -+----------------------------------------+ -| 2 | -+----------------------------------------+ -```"#) - .with_argument("str", "String expression to find in strlist.") - .with_argument("strlist", "A string list is a string composed of substrings separated by , characters.") - .build() - }) -} - ///Returns a value in the range of 1 to N if the string str is in the string list strlist consisting of N substrings ///A string list is a string composed of substrings separated by , characters. fn find_in_set(args: &[ArrayRef]) -> Result { diff --git a/datafusion/functions/src/unicode/left.rs b/datafusion/functions/src/unicode/left.rs index ef2802340b14..e2bb038aafdb 100644 --- a/datafusion/functions/src/unicode/left.rs +++ b/datafusion/functions/src/unicode/left.rs @@ -31,12 +31,29 @@ use datafusion_common::cast::{ }; use datafusion_common::exec_err; use datafusion_common::Result; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns a specified number of characters from the left side of a string.", + syntax_example = "left(str, n)", + sql_example = r#"```sql +> select left('datafusion', 4); ++-----------------------------------+ +| left(Utf8("datafusion"),Int64(4)) | ++-----------------------------------+ +| data | ++-----------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument(name = "n", description = "Number of characters to return."), + related_udf(name = "right") +)] #[derive(Debug)] pub struct LeftFunc { signature: Signature, @@ -99,36 +116,10 @@ impl ScalarUDFImpl for LeftFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_left_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_left_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns a specified number of characters from the left side of a string.", - "left(str, n)", - ) - .with_sql_example( - r#"```sql -> select left('datafusion', 4); -+-----------------------------------+ -| left(Utf8("datafusion"),Int64(4)) | -+-----------------------------------+ -| data | -+-----------------------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .with_argument("n", "Number of characters to return.") - .with_related_udf("right") - .build() - }) -} - /// Returns first n characters in the string, or when n is negative, returns all but last |n| characters. /// left('abcde', 2) = 'ab' /// The implementation uses UTF-8 code points as characters diff --git a/datafusion/functions/src/unicode/lpad.rs b/datafusion/functions/src/unicode/lpad.rs index 6c8a4ec97bb0..63fc1d8c91bd 100644 --- a/datafusion/functions/src/unicode/lpad.rs +++ b/datafusion/functions/src/unicode/lpad.rs @@ -31,12 +31,33 @@ use crate::strings::StringArrayType; use crate::utils::{make_scalar_function, utf8_to_str_type}; use datafusion_common::cast::as_int64_array; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Pads the left side of a string with another string to a specified string length.", + syntax_example = "lpad(str, n[, padding_str])", + sql_example = r#"```sql +> select lpad('Dolly', 10, 'hello'); ++---------------------------------------------+ +| lpad(Utf8("Dolly"),Int64(10),Utf8("hello")) | ++---------------------------------------------+ +| helloDolly | ++---------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument(name = "n", description = "String length to pad to"), + argument( + name = "padding_str", + description = "Optional string expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._" + ), + related_udf(name = "rpad") +)] #[derive(Debug)] pub struct LPadFunc { signature: Signature, @@ -103,34 +124,10 @@ impl ScalarUDFImpl for LPadFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_lpad_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_lpad_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Pads the left side of a string with another string to a specified string length.", - "lpad(str, n[, padding_str])") - .with_sql_example(r#"```sql -> select lpad('Dolly', 10, 'hello'); -+---------------------------------------------+ -| lpad(Utf8("Dolly"),Int64(10),Utf8("hello")) | -+---------------------------------------------+ -| helloDolly | -+---------------------------------------------+ -```"#) - .with_standard_argument("str", Some("String")) - .with_argument("n", "String length to pad to.") - .with_argument("padding_str", "Optional string expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._") - .with_related_udf("rpad") - .build() - }) -} - /// Extends the string to length 'length' by prepending the characters fill (a space by default). /// If the string is already longer than length then it is truncated (on the right). /// lpad('hi', 5, 'xy') = 'xyxhi' diff --git a/datafusion/functions/src/unicode/reverse.rs b/datafusion/functions/src/unicode/reverse.rs index 38c1f23cbd5a..ec579e9013b7 100644 --- a/datafusion/functions/src/unicode/reverse.rs +++ b/datafusion/functions/src/unicode/reverse.rs @@ -25,12 +25,27 @@ use arrow::array::{ }; use arrow::datatypes::DataType; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use DataType::{LargeUtf8, Utf8, Utf8View}; +#[user_doc( + doc_section(label = "String Functions"), + description = "Reverses the character order of a string.", + syntax_example = "reverse(str)", + sql_example = r#"```sql +> select reverse('datafusion'); ++-----------------------------+ +| reverse(Utf8("datafusion")) | ++-----------------------------+ +| noisufatad | ++-----------------------------+ +```"#, + standard_argument(name = "str", prefix = "String") +)] #[derive(Debug)] pub struct ReverseFunc { signature: Signature, @@ -87,34 +102,10 @@ impl ScalarUDFImpl for ReverseFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_reverse_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_reverse_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Reverses the character order of a string.", - "reverse(str)", - ) - .with_sql_example( - r#"```sql -> select reverse('datafusion'); -+-----------------------------+ -| reverse(Utf8("datafusion")) | -+-----------------------------+ -| noisufatad | -+-----------------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .build() - }) -} - /// Reverses the order of the characters in the string. /// reverse('abcde') = 'edcba' /// The implementation uses UTF-8 code points as characters diff --git a/datafusion/functions/src/unicode/right.rs b/datafusion/functions/src/unicode/right.rs index 1586e23eb8aa..bca7a072181b 100644 --- a/datafusion/functions/src/unicode/right.rs +++ b/datafusion/functions/src/unicode/right.rs @@ -31,12 +31,29 @@ use datafusion_common::cast::{ }; use datafusion_common::exec_err; use datafusion_common::Result; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns a specified number of characters from the right side of a string.", + syntax_example = "right(str, n)", + sql_example = r#"```sql +> select right('datafusion', 6); ++------------------------------------+ +| right(Utf8("datafusion"),Int64(6)) | ++------------------------------------+ +| fusion | ++------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument(name = "n", description = "Number of characters to return."), + related_udf(name = "left") +)] #[derive(Debug)] pub struct RightFunc { signature: Signature, @@ -99,36 +116,10 @@ impl ScalarUDFImpl for RightFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_right_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_right_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns a specified number of characters from the right side of a string.", - "right(str, n)", - ) - .with_sql_example( - r#"```sql -> select right('datafusion', 6); -+------------------------------------+ -| right(Utf8("datafusion"),Int64(6)) | -+------------------------------------+ -| fusion | -+------------------------------------+ -```"#, - ) - .with_standard_argument("str", Some("String")) - .with_argument("n", "Number of characters to return") - .with_related_udf("left") - .build() - }) -} - /// Returns last n characters in the string, or when n is negative, returns all but first |n| characters. /// right('abcde', 2) = 'de' /// The implementation uses UTF-8 code points as characters diff --git a/datafusion/functions/src/unicode/rpad.rs b/datafusion/functions/src/unicode/rpad.rs index 6e6bde3e177c..758ff6a54080 100644 --- a/datafusion/functions/src/unicode/rpad.rs +++ b/datafusion/functions/src/unicode/rpad.rs @@ -25,17 +25,38 @@ use arrow::datatypes::DataType; use datafusion_common::cast::as_int64_array; use datafusion_common::DataFusionError; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; use std::any::Any; use std::fmt::Write; use std::sync::{Arc, OnceLock}; use unicode_segmentation::UnicodeSegmentation; use DataType::{LargeUtf8, Utf8, Utf8View}; +#[user_doc( + doc_section(label = "String Functions"), + description = "Pads the right side of a string with another string to a specified string length.", + syntax_example = "rpad(str, n[, padding_str])", + sql_example = r#"```sql +> select rpad('datafusion', 20, '_-'); ++-----------------------------------------------+ +| rpad(Utf8("datafusion"),Int64(20),Utf8("_-")) | ++-----------------------------------------------+ +| datafusion_-_-_-_-_- | ++-----------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument(name = "n", description = "String length to pad to."), + argument( + name = "padding_str", + description = "String expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._" + ), + related_udf(name = "light") +)] #[derive(Debug)] pub struct RPadFunc { signature: Signature, @@ -122,38 +143,10 @@ impl ScalarUDFImpl for RPadFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_rpad_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_rpad_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Pads the right side of a string with another string to a specified string length.", - "rpad(str, n[, padding_str])") - .with_sql_example(r#"```sql -> select rpad('datafusion', 20, '_-'); -+-----------------------------------------------+ -| rpad(Utf8("datafusion"),Int64(20),Utf8("_-")) | -+-----------------------------------------------+ -| datafusion_-_-_-_-_- | -+-----------------------------------------------+ -```"#) - .with_standard_argument( - "str", - Some("String"), - ) - .with_argument("n", "String length to pad to.") - .with_argument("padding_str", - "String expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._") - .with_related_udf("lpad") - .build() - }) -} - pub fn rpad( args: &[ArrayRef], ) -> Result { diff --git a/datafusion/functions/src/unicode/strpos.rs b/datafusion/functions/src/unicode/strpos.rs index 5d1986e44c92..e0ffb2d49271 100644 --- a/datafusion/functions/src/unicode/strpos.rs +++ b/datafusion/functions/src/unicode/strpos.rs @@ -23,11 +23,28 @@ use crate::utils::{make_scalar_function, utf8_to_int_type}; use arrow::array::{ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray}; use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type}; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Returns the starting position of a specified substring in a string. Positions begin at 1. If the substring does not exist in the string, the function returns 0.", + syntax_example = "strpos(str, substr)", + alternative_syntax = "position(substr in origstr)", + sql_example = r#"```sql +> select strpos('datafusion', 'fus'); ++----------------------------------------+ +| strpos(Utf8("datafusion"),Utf8("fus")) | ++----------------------------------------+ +| 5 | ++----------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument(name = "substr", description = "Substring expression to search for.") +)] #[derive(Debug)] pub struct StrposFunc { signature: Signature, @@ -79,33 +96,10 @@ impl ScalarUDFImpl for StrposFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_strpos_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_strpos_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Returns the starting position of a specified substring in a string. Positions begin at 1. If the substring does not exist in the string, the function returns 0.", - "strpos(str, substr)") - .with_sql_example(r#"```sql -> select strpos('datafusion', 'fus'); -+----------------------------------------+ -| strpos(Utf8("datafusion"),Utf8("fus")) | -+----------------------------------------+ -| 5 | -+----------------------------------------+ -```"#) - .with_standard_argument("str", Some("String")) - .with_argument("substr", "Substring expression to search for.") - .with_alternative_syntax("position(substr in origstr)") - .build() - }) -} - fn strpos(args: &[ArrayRef]) -> Result { match (args[0].data_type(), args[1].data_type()) { (DataType::Utf8, DataType::Utf8) => { diff --git a/datafusion/functions/src/unicode/substr.rs b/datafusion/functions/src/unicode/substr.rs index 0ac050c707bf..0811c2d972c9 100644 --- a/datafusion/functions/src/unicode/substr.rs +++ b/datafusion/functions/src/unicode/substr.rs @@ -28,11 +28,35 @@ use arrow::datatypes::DataType; use arrow_buffer::{NullBufferBuilder, ScalarBuffer}; use datafusion_common::cast::as_int64_array; use datafusion_common::{exec_err, plan_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; - +use datafusion_macros::user_doc; + +#[user_doc( + doc_section(label = "String Functions"), + description = "Extracts a substring of a specified number of characters from a specific starting position in a string.", + syntax_example = "substr(str, start_pos[, length])", + alternative_syntax = "substring(str from start_pos for length)", + sql_example = r#"```sql +> select substr('datafusion', 5, 3); ++----------------------------------------------+ +| substr(Utf8("datafusion"),Int64(5),Int64(3)) | ++----------------------------------------------+ +| fus | ++----------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument( + name = "start_pos", + description = "Character position to start the substring at. The first character in the string has a position of 1." + ), + argument( + name = "length", + description = "Number of characters to extract. If not specified, returns the rest of the string after the start position." + ) +)] #[derive(Debug)] pub struct SubstrFunc { signature: Signature, @@ -154,34 +178,10 @@ impl ScalarUDFImpl for SubstrFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_substr_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_substr_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - "Extracts a substring of a specified number of characters from a specific starting position in a string.", - "substr(str, start_pos[, length])") - .with_sql_example(r#"```sql -> select substr('datafusion', 5, 3); -+----------------------------------------------+ -| substr(Utf8("datafusion"),Int64(5),Int64(3)) | -+----------------------------------------------+ -| fus | -+----------------------------------------------+ -```"#) - .with_standard_argument("str", Some("String")) - .with_argument("start_pos", "Character position to start the substring at. The first character in the string has a position of 1.") - .with_argument("length", "Number of characters to extract. If not specified, returns the rest of the string after the start position.") - .with_alternative_syntax("substring(str from start_pos for length)") - .build() - }) -} - /// Extracts the substring of string starting at the start'th character, and extending for count characters if that is specified. (Same as substring(string from start for count).) /// substr('alphabet', 3) = 'phabet' /// substr('alphabet', 3, 2) = 'ph' diff --git a/datafusion/functions/src/unicode/substrindex.rs b/datafusion/functions/src/unicode/substrindex.rs index 825666b0455e..5eeee5961138 100644 --- a/datafusion/functions/src/unicode/substrindex.rs +++ b/datafusion/functions/src/unicode/substrindex.rs @@ -26,12 +26,43 @@ use arrow::datatypes::{DataType, Int32Type, Int64Type}; use crate::utils::{make_scalar_function, utf8_to_str_type}; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = r#"Returns the substring from str before count occurrences of the delimiter delim. +If count is positive, everything to the left of the final delimiter (counting from the left) is returned. +If count is negative, everything to the right of the final delimiter (counting from the right) is returned."#, + syntax_example = "substr(str, start_pos[, length])", + sql_example = r#"```sql +> select substr_index('www.apache.org', '.', 1); ++---------------------------------------------------------+ +| substr_index(Utf8("www.apache.org"),Utf8("."),Int64(1)) | ++---------------------------------------------------------+ +| www | ++---------------------------------------------------------+ +> select substr_index('www.apache.org', '.', -1); ++----------------------------------------------------------+ +| substr_index(Utf8("www.apache.org"),Utf8("."),Int64(-1)) | ++----------------------------------------------------------+ +| org | ++----------------------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument( + name = "delim", + description = "The string to find in str to split str." + ), + argument( + name = "count", + description = "The number of times to search for the delimiter. Can be either a positive or negative number." + ) +)] #[derive(Debug)] pub struct SubstrIndexFunc { signature: Signature, @@ -91,41 +122,10 @@ impl ScalarUDFImpl for SubstrIndexFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_substr_index_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_substr_index_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder( - DOC_SECTION_STRING, - r#"Returns the substring from str before count occurrences of the delimiter delim. -If count is positive, everything to the left of the final delimiter (counting from the left) is returned. -If count is negative, everything to the right of the final delimiter (counting from the right) is returned."#, - "substr_index(str, delim, count)") - .with_sql_example(r#"```sql -> select substr_index('www.apache.org', '.', 1); -+---------------------------------------------------------+ -| substr_index(Utf8("www.apache.org"),Utf8("."),Int64(1)) | -+---------------------------------------------------------+ -| www | -+---------------------------------------------------------+ -> select substr_index('www.apache.org', '.', -1); -+----------------------------------------------------------+ -| substr_index(Utf8("www.apache.org"),Utf8("."),Int64(-1)) | -+----------------------------------------------------------+ -| org | -+----------------------------------------------------------+ -```"#) - .with_standard_argument("str", Some("String")) - .with_argument("delim", "The string to find in str to split str.") - .with_argument("count", "The number of times to search for the delimiter. Can be either a positive or negative number.") - .build() - }) -} - /// Returns the substring from str before count occurrences of the delimiter delim. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. /// SUBSTRING_INDEX('www.apache.org', '.', 1) = www /// SUBSTRING_INDEX('www.apache.org', '.', 2) = www.apache diff --git a/datafusion/functions/src/unicode/translate.rs b/datafusion/functions/src/unicode/translate.rs index 780603777133..160949f6adf4 100644 --- a/datafusion/functions/src/unicode/translate.rs +++ b/datafusion/functions/src/unicode/translate.rs @@ -27,12 +27,32 @@ use unicode_segmentation::UnicodeSegmentation; use crate::utils::{make_scalar_function, utf8_to_str_type}; use datafusion_common::{exec_err, Result}; -use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING; +use datafusion_doc::DocSection; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::user_doc; +#[user_doc( + doc_section(label = "String Functions"), + description = "Translates characters in a string to specified translation characters.", + syntax_example = "substr(str, start_pos[, length])", + sql_example = r#"```sql +> select translate('twice', 'wic', 'her'); ++--------------------------------------------------+ +| translate(Utf8("twice"),Utf8("wic"),Utf8("her")) | ++--------------------------------------------------+ +| there | ++--------------------------------------------------+ +```"#, + standard_argument(name = "str", prefix = "String"), + argument(name = "chars", description = "Characters to translate."), + argument( + name = "translation", + description = "Translation characters. Translation characters replace only characters at the same position in the **chars** string." + ) +)] #[derive(Debug)] pub struct TranslateFunc { signature: Signature, @@ -85,30 +105,10 @@ impl ScalarUDFImpl for TranslateFunc { } fn documentation(&self) -> Option<&Documentation> { - Some(get_translate_doc()) + self.doc() } } -static DOCUMENTATION: OnceLock = OnceLock::new(); - -fn get_translate_doc() -> &'static Documentation { - DOCUMENTATION.get_or_init(|| { - Documentation::builder(DOC_SECTION_STRING,"Translates characters in a string to specified translation characters.","translate(str, chars, translation)") - .with_sql_example(r#"```sql -> select translate('twice', 'wic', 'her'); -+--------------------------------------------------+ -| translate(Utf8("twice"),Utf8("wic"),Utf8("her")) | -+--------------------------------------------------+ -| there | -+--------------------------------------------------+ -```"#) - .with_standard_argument("str", Some("String")) - .with_argument("chars", "Characters to translate.") - .with_argument("translation", "Translation characters. Translation characters replace only characters at the same position in the **chars** string.") - .build() - }) -} - fn invoke_translate(args: &[ArrayRef]) -> Result { match args[0].data_type() { DataType::Utf8View => {