Skip to content

Commit

Permalink
perf(expr): optimize to_char function with constant template (#5130)
Browse files Browse the repository at this point in the history
* perf(expr): optimize to_char function with constant template

Signed-off-by: TennyZhuang <[email protected]>

* fmt

Signed-off-by: TennyZhuang <[email protected]>

* Update src/expr/src/expr/expr_to_char_const_tmpl.rs

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* Update src/expr/src/expr/expr_to_char_const_tmpl.rs

Co-authored-by: Tao Wu <[email protected]>

* Update src/expr/src/expr/build_expr_from_prost.rs

Co-authored-by: Bugen Zhao <[email protected]>

* fmt

Signed-off-by: TennyZhuang <[email protected]>

* clippy

Signed-off-by: TennyZhuang <[email protected]>

Signed-off-by: TennyZhuang <[email protected]>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Tao Wu <[email protected]>
Co-authored-by: Bugen Zhao <[email protected]>
  • Loading branch information
4 people authored Sep 6, 2022
1 parent d0a377d commit 5fd012d
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 6 deletions.
26 changes: 21 additions & 5 deletions src/expr/src/expr/build_expr_from_prost.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use risingwave_common::types::DataType;
use risingwave_common::types::{DataType, ScalarImpl};
use risingwave_pb::expr::expr_node::RexNode;
use risingwave_pb::expr::ExprNode;

Expand All @@ -27,10 +27,12 @@ use crate::expr::expr_ternary_bytes::{
new_overlay_exp, new_replace_expr, new_split_part_expr, new_substr_start_end,
new_translate_expr,
};
use crate::expr::expr_to_char_const_tmpl::{ExprToCharConstTmpl, ExprToCharConstTmplContext};
use crate::expr::expr_unary::{
new_length_default, new_ltrim_expr, new_rtrim_expr, new_trim_expr, new_unary_expr,
};
use crate::expr::{build_from_prost as expr_build_from_prost, BoxedExpression};
use crate::expr::{build_from_prost as expr_build_from_prost, BoxedExpression, Expression};
use crate::vector_op::to_char::compile_pattern_to_chrono;
use crate::{bail, ensure, Result};

fn get_children_and_return_type(prost: &ExprNode) -> Result<(Vec<ExprNode>, DataType)> {
Expand Down Expand Up @@ -211,9 +213,23 @@ pub fn build_to_char_expr(prost: &ExprNode) -> Result<BoxedExpression> {
let (children, ret_type) = get_children_and_return_type(prost)?;
ensure!(children.len() == 2);
let data_expr = expr_build_from_prost(&children[0])?;
// TODO: Optimize for const template.
let tmpl_expr = expr_build_from_prost(&children[1])?;
Ok(new_to_char(data_expr, tmpl_expr, ret_type))
let tmpl_node = &children[1];
if let RexNode::Constant(tmpl_value) = tmpl_node.get_rex_node().unwrap()
&& let Ok(tmpl) = ScalarImpl::from_proto_bytes(tmpl_value.get_body(), tmpl_node.get_return_type().unwrap())
{
let tmpl = tmpl.as_utf8();
let pattern = compile_pattern_to_chrono(tmpl);

Ok(ExprToCharConstTmpl {
ctx: ExprToCharConstTmplContext {
chrono_tmpl: pattern,
},
child: data_expr,
}.boxed())
} else {
let tmpl_expr = expr_build_from_prost(&children[1])?;
Ok(new_to_char(data_expr, tmpl_expr, ret_type))
}
}

#[cfg(test)]
Expand Down
68 changes: 68 additions & 0 deletions src/expr/src/expr/expr_to_char_const_tmpl.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Copyright 2022 Singularity Data
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::sync::Arc;

use itertools::Itertools;
use risingwave_common::array::{Array, ArrayBuilder, NaiveDateTimeArray, Utf8ArrayBuilder};
use risingwave_common::types::{DataType, Datum, ScalarImpl};

use super::Expression;

#[derive(Debug)]
pub(crate) struct ExprToCharConstTmplContext {
pub(crate) chrono_tmpl: String,
}

#[derive(Debug)]
pub(crate) struct ExprToCharConstTmpl {
pub(crate) child: Box<dyn Expression>,
pub(crate) ctx: ExprToCharConstTmplContext,
}

impl Expression for ExprToCharConstTmpl {
fn return_type(&self) -> DataType {
DataType::Varchar
}

fn eval(
&self,
input: &risingwave_common::array::DataChunk,
) -> crate::Result<risingwave_common::array::ArrayRef> {
let data_arr = self.child.eval_checked(input)?;
let data_arr: &NaiveDateTimeArray = data_arr.as_ref().into();
let mut output = Utf8ArrayBuilder::new(input.capacity());
for (data, vis) in data_arr.iter().zip_eq(input.vis().iter()) {
if !vis {
output.append_null()?;
} else if let Some(data) = data {
let res = data.0.format(&self.ctx.chrono_tmpl).to_string();
output.append(Some(res.as_str()))?;
} else {
output.append_null()?;
}
}

Ok(Arc::new((output.finish()?).into()))
}

fn eval_row(&self, input: &risingwave_common::array::Row) -> crate::Result<Datum> {
let data = self.child.eval_row(input)?;
Ok(if let Some(ScalarImpl::NaiveDateTime(data)) = data {
Some(data.0.format(&self.ctx.chrono_tmpl).to_string().into())
} else {
None
})
}
}
1 change: 1 addition & 0 deletions src/expr/src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ mod expr_nested_construct;
mod expr_quaternary_bytes;
mod expr_regexp;
mod expr_ternary_bytes;
mod expr_to_char_const_tmpl;
pub mod expr_unary;
mod expr_vnode;
mod template;
Expand Down
2 changes: 1 addition & 1 deletion src/expr/src/vector_op/to_char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use crate::Result;

/// Compile the pg pattern to chrono pattern.
// TODO: Chrono can not fully support the pg format, so consider using other implementations later.
fn compile_pattern_to_chrono(tmpl: &str) -> String {
pub fn compile_pattern_to_chrono(tmpl: &str) -> String {
// https://www.postgresql.org/docs/current/functions-formatting.html
static PG_PATTERNS: &[&str] = &[
"HH24", "HH12", "HH", "MI", "SS", "YYYY", "YY", "IYYY", "IY", "MM", "DD",
Expand Down

0 comments on commit 5fd012d

Please sign in to comment.