From ec5ad655f4153d93fc6eaf8e808c7990a649134b Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Tue, 6 Sep 2022 14:23:02 +0800 Subject: [PATCH 1/7] perf(expr): optimize to_char function with constant template Signed-off-by: TennyZhuang --- src/expr/src/expr/build_expr_from_prost.rs | 25 +++++++-- src/expr/src/expr/expr_to_char_const_tmpl.rs | 54 ++++++++++++++++++++ src/expr/src/expr/mod.rs | 1 + src/expr/src/vector_op/to_char.rs | 2 +- 4 files changed, 77 insertions(+), 5 deletions(-) create mode 100644 src/expr/src/expr/expr_to_char_const_tmpl.rs diff --git a/src/expr/src/expr/build_expr_from_prost.rs b/src/expr/src/expr/build_expr_from_prost.rs index a2a63d9ced4fc..6374c8cbcb00c 100644 --- a/src/expr/src/expr/build_expr_from_prost.rs +++ b/src/expr/src/expr/build_expr_from_prost.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use risingwave_common::types::DataType; +use risingwave_common::types::{DataType, ScalarImpl}; use risingwave_pb::expr::expr_node::RexNode; use risingwave_pb::expr::ExprNode; @@ -27,10 +27,12 @@ use crate::expr::expr_ternary_bytes::{ new_overlay_exp, new_replace_expr, new_split_part_expr, new_substr_start_end, new_translate_expr, }; +use crate::expr::expr_to_char_const_tmpl::{ExprToCharConstTmpl, ExprToCharConstTmplContext}; use crate::expr::expr_unary::{ new_length_default, new_ltrim_expr, new_rtrim_expr, new_trim_expr, new_unary_expr, }; -use crate::expr::{build_from_prost as expr_build_from_prost, BoxedExpression}; +use crate::expr::{build_from_prost as expr_build_from_prost, BoxedExpression, Expression}; +use crate::vector_op::to_char::compile_pattern_to_chrono; use crate::{bail, ensure, Result}; fn get_children_and_return_type(prost: &ExprNode) -> Result<(Vec, DataType)> { @@ -211,9 +213,24 @@ pub fn build_to_char_expr(prost: &ExprNode) -> Result { let (children, ret_type) = get_children_and_return_type(prost)?; ensure!(children.len() == 2); let data_expr = expr_build_from_prost(&children[0])?; + let tmpl_node = &children[1]; // TODO: Optimize for const template. - let tmpl_expr = expr_build_from_prost(&children[1])?; - Ok(new_to_char(data_expr, tmpl_expr, ret_type)) + if let RexNode::Constant(tmpl_value) = tmpl_node.get_rex_node().unwrap() + && let Ok(tmpl) = ScalarImpl::from_proto_bytes(tmpl_value.get_body(), tmpl_node.get_return_type().unwrap()) + { + let tmpl = tmpl.as_utf8(); + let pattern = compile_pattern_to_chrono(tmpl); + + Ok(ExprToCharConstTmpl { + ctx: ExprToCharConstTmplContext { + chrono_tmpl: pattern, + }, + child: data_expr, + }.boxed()) + } else { + let tmpl_expr = expr_build_from_prost(&children[1])?; + Ok(new_to_char(data_expr, tmpl_expr, ret_type)) + } } #[cfg(test)] diff --git a/src/expr/src/expr/expr_to_char_const_tmpl.rs b/src/expr/src/expr/expr_to_char_const_tmpl.rs new file mode 100644 index 0000000000000..f63b2da3d5758 --- /dev/null +++ b/src/expr/src/expr/expr_to_char_const_tmpl.rs @@ -0,0 +1,54 @@ + +use risingwave_common::array::Array; +use risingwave_common::types::{Datum, ScalarImpl}; +use risingwave_common::{types::DataType, array::{NaiveDateTimeArray, Utf8ArrayBuilder, ArrayBuilder}}; + +use super::Expression; +use itertools::Itertools; +use std::sync::Arc; + +#[derive(Debug)] +pub(crate) struct ExprToCharConstTmplContext { + pub(crate) chrono_tmpl: String, +} + +#[derive(Debug)] +pub(crate) struct ExprToCharConstTmpl { + pub(crate) child: Box, + pub(crate) ctx: ExprToCharConstTmplContext, +} + +impl Expression for ExprToCharConstTmpl { + fn return_type(&self) -> DataType { + DataType::Varchar + } + + fn eval(&self, input: &risingwave_common::array::DataChunk) -> crate::Result { + let data_arr = self.child.eval_checked(input)?; + let data_arr: &NaiveDateTimeArray = data_arr.as_ref().into(); + let mut output = Utf8ArrayBuilder::new(input.capacity()); + for (data, vis) in data_arr.iter().zip_eq(input.vis().iter()) { + if !vis { + output.append_null()?; + } else { + if let Some(data) = data { + let res = data.0.format(&self.ctx.chrono_tmpl).to_string(); + output.append(Some(res.as_str()))?; + } else { + output.append_null()?; + } + } + } + + Ok(Arc::new((output.finish()?).into())) + } + + fn eval_row(&self, input: &risingwave_common::array::Row) -> crate::Result { + let data = self.child.eval_row(input)?; + Ok(if let Some(ScalarImpl::NaiveDateTime(data)) = data { + Some(data.0.format(&self.ctx.chrono_tmpl).to_string().into()) + } else { + None + }) + } +} diff --git a/src/expr/src/expr/mod.rs b/src/expr/src/expr/mod.rs index efb26c7022cf6..e62ab8e7cf096 100644 --- a/src/expr/src/expr/mod.rs +++ b/src/expr/src/expr/mod.rs @@ -31,6 +31,7 @@ mod expr_nested_construct; mod expr_quaternary_bytes; mod expr_regexp; mod expr_ternary_bytes; +mod expr_to_char_const_tmpl; pub mod expr_unary; mod expr_vnode; mod template; diff --git a/src/expr/src/vector_op/to_char.rs b/src/expr/src/vector_op/to_char.rs index 994ed05b974b2..ead71317ebf0b 100644 --- a/src/expr/src/vector_op/to_char.rs +++ b/src/expr/src/vector_op/to_char.rs @@ -20,7 +20,7 @@ use crate::Result; /// Compile the pg pattern to chrono pattern. // TODO: Chrono can not fully support the pg format, so consider using other implementations later. -fn compile_pattern_to_chrono(tmpl: &str) -> String { +pub fn compile_pattern_to_chrono(tmpl: &str) -> String { // https://www.postgresql.org/docs/current/functions-formatting.html static PG_PATTERNS: &[&str] = &[ "HH24", "HH12", "HH", "MI", "SS", "YYYY", "YY", "IYYY", "IY", "MM", "DD", From 0ac6bbab8c80b1c86d91b15ba16a3bbb77077247 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Tue, 6 Sep 2022 14:26:28 +0800 Subject: [PATCH 2/7] fmt Signed-off-by: TennyZhuang --- src/expr/src/expr/expr_to_char_const_tmpl.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/expr/src/expr/expr_to_char_const_tmpl.rs b/src/expr/src/expr/expr_to_char_const_tmpl.rs index f63b2da3d5758..a14b645651d07 100644 --- a/src/expr/src/expr/expr_to_char_const_tmpl.rs +++ b/src/expr/src/expr/expr_to_char_const_tmpl.rs @@ -1,11 +1,10 @@ +use std::sync::Arc; -use risingwave_common::array::Array; -use risingwave_common::types::{Datum, ScalarImpl}; -use risingwave_common::{types::DataType, array::{NaiveDateTimeArray, Utf8ArrayBuilder, ArrayBuilder}}; +use itertools::Itertools; +use risingwave_common::array::{Array, ArrayBuilder, NaiveDateTimeArray, Utf8ArrayBuilder}; +use risingwave_common::types::{DataType, Datum, ScalarImpl}; use super::Expression; -use itertools::Itertools; -use std::sync::Arc; #[derive(Debug)] pub(crate) struct ExprToCharConstTmplContext { @@ -23,7 +22,10 @@ impl Expression for ExprToCharConstTmpl { DataType::Varchar } - fn eval(&self, input: &risingwave_common::array::DataChunk) -> crate::Result { + fn eval( + &self, + input: &risingwave_common::array::DataChunk, + ) -> crate::Result { let data_arr = self.child.eval_checked(input)?; let data_arr: &NaiveDateTimeArray = data_arr.as_ref().into(); let mut output = Utf8ArrayBuilder::new(input.capacity()); From 620093eaa3c297168ebc60766ab1cd8562cccbe9 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Tue, 6 Sep 2022 14:27:12 +0800 Subject: [PATCH 3/7] Update src/expr/src/expr/expr_to_char_const_tmpl.rs Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/expr/src/expr/expr_to_char_const_tmpl.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/expr/src/expr/expr_to_char_const_tmpl.rs b/src/expr/src/expr/expr_to_char_const_tmpl.rs index a14b645651d07..8a7178c24e545 100644 --- a/src/expr/src/expr/expr_to_char_const_tmpl.rs +++ b/src/expr/src/expr/expr_to_char_const_tmpl.rs @@ -1,4 +1,18 @@ use std::sync::Arc; +// Copyright 2022 Singularity Data +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use itertools::Itertools; use risingwave_common::array::{Array, ArrayBuilder, NaiveDateTimeArray, Utf8ArrayBuilder}; From 704d74b0ff0a2f0e37c46a2d96deb529f1175072 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Tue, 6 Sep 2022 14:34:45 +0800 Subject: [PATCH 4/7] Update src/expr/src/expr/expr_to_char_const_tmpl.rs Co-authored-by: Tao Wu --- src/expr/src/expr/expr_to_char_const_tmpl.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/expr/src/expr/expr_to_char_const_tmpl.rs b/src/expr/src/expr/expr_to_char_const_tmpl.rs index 8a7178c24e545..30cf7365975f2 100644 --- a/src/expr/src/expr/expr_to_char_const_tmpl.rs +++ b/src/expr/src/expr/expr_to_char_const_tmpl.rs @@ -1,4 +1,3 @@ -use std::sync::Arc; // Copyright 2022 Singularity Data // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -13,6 +12,8 @@ use std::sync::Arc; // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::Arc; + use itertools::Itertools; use risingwave_common::array::{Array, ArrayBuilder, NaiveDateTimeArray, Utf8ArrayBuilder}; From 86f3f6a06b0e45c80b08540f83bb8e1801380340 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Tue, 6 Sep 2022 14:34:57 +0800 Subject: [PATCH 5/7] Update src/expr/src/expr/build_expr_from_prost.rs Co-authored-by: Bugen Zhao --- src/expr/src/expr/build_expr_from_prost.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/expr/src/expr/build_expr_from_prost.rs b/src/expr/src/expr/build_expr_from_prost.rs index 6374c8cbcb00c..01c8cc5f3874a 100644 --- a/src/expr/src/expr/build_expr_from_prost.rs +++ b/src/expr/src/expr/build_expr_from_prost.rs @@ -214,7 +214,6 @@ pub fn build_to_char_expr(prost: &ExprNode) -> Result { ensure!(children.len() == 2); let data_expr = expr_build_from_prost(&children[0])?; let tmpl_node = &children[1]; - // TODO: Optimize for const template. if let RexNode::Constant(tmpl_value) = tmpl_node.get_rex_node().unwrap() && let Ok(tmpl) = ScalarImpl::from_proto_bytes(tmpl_value.get_body(), tmpl_node.get_return_type().unwrap()) { From fc8b258c4a096ece38492c476764c1c113859a6a Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Tue, 6 Sep 2022 14:39:58 +0800 Subject: [PATCH 6/7] fmt Signed-off-by: TennyZhuang --- src/expr/src/expr/expr_to_char_const_tmpl.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/expr/src/expr/expr_to_char_const_tmpl.rs b/src/expr/src/expr/expr_to_char_const_tmpl.rs index 30cf7365975f2..8822092d2c67d 100644 --- a/src/expr/src/expr/expr_to_char_const_tmpl.rs +++ b/src/expr/src/expr/expr_to_char_const_tmpl.rs @@ -14,7 +14,6 @@ use std::sync::Arc; - use itertools::Itertools; use risingwave_common::array::{Array, ArrayBuilder, NaiveDateTimeArray, Utf8ArrayBuilder}; use risingwave_common::types::{DataType, Datum, ScalarImpl}; From c06327dee512c411957b7c73741af18394ac1706 Mon Sep 17 00:00:00 2001 From: TennyZhuang Date: Tue, 6 Sep 2022 14:50:58 +0800 Subject: [PATCH 7/7] clippy Signed-off-by: TennyZhuang --- src/expr/src/expr/expr_to_char_const_tmpl.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/expr/src/expr/expr_to_char_const_tmpl.rs b/src/expr/src/expr/expr_to_char_const_tmpl.rs index 8822092d2c67d..a4d514f2c816e 100644 --- a/src/expr/src/expr/expr_to_char_const_tmpl.rs +++ b/src/expr/src/expr/expr_to_char_const_tmpl.rs @@ -46,13 +46,11 @@ impl Expression for ExprToCharConstTmpl { for (data, vis) in data_arr.iter().zip_eq(input.vis().iter()) { if !vis { output.append_null()?; + } else if let Some(data) = data { + let res = data.0.format(&self.ctx.chrono_tmpl).to_string(); + output.append(Some(res.as_str()))?; } else { - if let Some(data) = data { - let res = data.0.format(&self.ctx.chrono_tmpl).to_string(); - output.append(Some(res.as_str()))?; - } else { - output.append_null()?; - } + output.append_null()?; } }