From 363ad571b726d4cb4d089639741d41e856ee6af2 Mon Sep 17 00:00:00 2001 From: Xiangjin Date: Tue, 4 Apr 2023 23:17:03 +0800 Subject: [PATCH 1/4] fix(expr): align `position` syntax with SQL standard --- .../planner_test/tests/testdata/expr.yaml | 2 +- src/frontend/src/binder/expr/function.rs | 1 - src/frontend/src/binder/expr/mod.rs | 10 ++++++++++ src/meta/src/manager/catalog/utils.rs | 5 +++++ src/sqlparser/src/ast/mod.rs | 8 ++++++++ src/sqlparser/src/parser.rs | 20 +++++++++++++++++++ src/tests/regress/data/sql/strings.sql | 6 +++--- 7 files changed, 47 insertions(+), 5 deletions(-) diff --git a/src/frontend/planner_test/tests/testdata/expr.yaml b/src/frontend/planner_test/tests/testdata/expr.yaml index 6bce012ea8136..dbd59acf5b601 100644 --- a/src/frontend/planner_test/tests/testdata/expr.yaml +++ b/src/frontend/planner_test/tests/testdata/expr.yaml @@ -170,7 +170,7 @@ batch_plan: | BatchValues { rows: [[4:Int32]] } - sql: | - select position(replace('1','1','2'),'123') where '12' like '%1'; + select position('123' in replace('1','1','2')) where '12' like '%1'; batch_plan: | BatchValues { rows: [] } - name: case searched form with else diff --git a/src/frontend/src/binder/expr/function.rs b/src/frontend/src/binder/expr/function.rs index 5fc756400de93..e88b6c5491966 100644 --- a/src/frontend/src/binder/expr/function.rs +++ b/src/frontend/src/binder/expr/function.rs @@ -373,7 +373,6 @@ impl Binder { ("trim", raw_call(ExprType::Trim)), ("replace", raw_call(ExprType::Replace)), ("overlay", raw_call(ExprType::Overlay)), - ("position", raw_call(ExprType::Position)), ("ltrim", raw_call(ExprType::Ltrim)), ("rtrim", raw_call(ExprType::Rtrim)), ("md5", raw_call(ExprType::Md5)), diff --git a/src/frontend/src/binder/expr/mod.rs b/src/frontend/src/binder/expr/mod.rs index deab200c41234..16c3ba8e8a7a6 100644 --- a/src/frontend/src/binder/expr/mod.rs +++ b/src/frontend/src/binder/expr/mod.rs @@ -117,6 +117,7 @@ impl Binder { substring_from, substring_for, } => self.bind_substring(*expr, substring_from, substring_for), + Expr::Position { substring, string } => self.bind_position(*substring, *string), Expr::Overlay { expr, new_substring, @@ -281,6 +282,15 @@ impl Binder { FunctionCall::new(ExprType::Substr, args).map(|f| f.into()) } + fn bind_position(&mut self, substring: Expr, string: Expr) -> Result { + let args = vec![ + // Note that we reverse the order of arguments. + self.bind_expr(string)?, + self.bind_expr(substring)?, + ]; + FunctionCall::new(ExprType::Position, args).map(Into::into) + } + fn bind_overlay( &mut self, expr: Expr, diff --git a/src/meta/src/manager/catalog/utils.rs b/src/meta/src/manager/catalog/utils.rs index 0d19dd3a21542..59044bfca1908 100644 --- a/src/meta/src/manager/catalog/utils.rs +++ b/src/meta/src/manager/catalog/utils.rs @@ -251,6 +251,11 @@ impl QueryRewriter<'_> { | Expr::Nested(expr) | Expr::ArrayIndex { obj: expr, .. } => self.visit_expr(expr), + Expr::Position { substring, string } => { + self.visit_expr(substring); + self.visit_expr(string); + } + Expr::InSubquery { expr, subquery, .. } => { self.visit_expr(expr); self.visit_query(subquery); diff --git a/src/sqlparser/src/ast/mod.rs b/src/sqlparser/src/ast/mod.rs index b1f5619e4a874..2f32ea718f59a 100644 --- a/src/sqlparser/src/ast/mod.rs +++ b/src/sqlparser/src/ast/mod.rs @@ -338,6 +338,11 @@ pub enum Expr { substring_from: Option>, substring_for: Option>, }, + /// POSITION( IN ) + Position { + substring: Box, + string: Box, + }, /// OVERLAY( PLACING FROM [ FOR ]) Overlay { expr: Box, @@ -550,6 +555,9 @@ impl fmt::Display for Expr { write!(f, ")") } + Expr::Position { substring, string } => { + write!(f, "POSITION({} IN {})", substring, string) + } Expr::Overlay { expr, new_substring, diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs index ea0799e463015..dcbf54bb91a0d 100644 --- a/src/sqlparser/src/parser.rs +++ b/src/sqlparser/src/parser.rs @@ -496,6 +496,7 @@ impl Parser { Keyword::EXISTS => self.parse_exists_expr(), Keyword::EXTRACT => self.parse_extract_expr(), Keyword::SUBSTRING => self.parse_substring_expr(), + Keyword::POSITION => self.parse_position_expr(), Keyword::OVERLAY => self.parse_overlay_expr(), Keyword::TRIM => self.parse_trim_expr(), Keyword::INTERVAL => self.parse_literal_interval(), @@ -928,6 +929,25 @@ impl Parser { }) } + /// POSITION( IN ) + pub fn parse_position_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + + // Logically `parse_expr`, but limited to those with precedence higher than `BETWEEN`/`IN`, + // to avoid conflict with general IN operator, for example `position(a IN (b) IN (c))`. + // https://github.com/postgres/postgres/blob/REL_15_2/src/backend/parser/gram.y#L16012 + let substring = self.parse_subexpr(Precedence::Between)?; + self.expect_keyword(Keyword::IN)?; + let string = self.parse_subexpr(Precedence::Between)?; + + self.expect_token(&Token::RParen)?; + + Ok(Expr::Position { + substring: Box::new(substring), + string: Box::new(string), + }) + } + /// OVERLAY( PLACING FROM [ FOR ]) pub fn parse_overlay_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; diff --git a/src/tests/regress/data/sql/strings.sql b/src/tests/regress/data/sql/strings.sql index 1486ffb2649ea..c6a8b7d510cae 100644 --- a/src/tests/regress/data/sql/strings.sql +++ b/src/tests/regress/data/sql/strings.sql @@ -264,9 +264,9 @@ SELECT SUBSTRING('string' FROM -10 FOR -2147483646) AS "error"; \pset null '' -- E021-11 position expression ---@ SELECT POSITION('4' IN '1234567890') = '4' AS "4"; ---@ ---@ SELECT POSITION('5' IN '1234567890') = '5' AS "5"; +SELECT POSITION('4' IN '1234567890') = '4' AS "4"; + +SELECT POSITION('5' IN '1234567890') = '5' AS "5"; -- T312 character overlay function SELECT OVERLAY('abcdef' PLACING '45' FROM 4) AS "abc45f"; From b2bdc8df2623d29c12260257c9acc7b205ac59cd Mon Sep 17 00:00:00 2001 From: Xiangjin Date: Tue, 4 Apr 2023 23:45:56 +0800 Subject: [PATCH 2/4] consolidate and fix `strpos` --- proto/expr.proto | 2 +- src/expr/src/vector_op/position.rs | 43 +++++++++++++++++++----- src/expr/src/vector_op/string.rs | 30 ----------------- src/frontend/src/binder/expr/function.rs | 2 +- 4 files changed, 37 insertions(+), 40 deletions(-) diff --git a/proto/expr.proto b/proto/expr.proto index 4d27757b73803..82b965d8ebca6 100644 --- a/proto/expr.proto +++ b/proto/expr.proto @@ -102,7 +102,7 @@ message ExprNode { LPAD = 238; RPAD = 239; REVERSE = 240; - STRPOS = 241; + STRPOS = 241 [deprecated=true]; // duplicated with POSITION TO_ASCII = 242; TO_HEX = 243; QUOTE_IDENT = 244; diff --git a/src/expr/src/vector_op/position.rs b/src/expr/src/vector_op/position.rs index 9049e743a4f70..1d434aa2170a9 100644 --- a/src/expr/src/vector_op/position.rs +++ b/src/expr/src/vector_op/position.rs @@ -14,16 +14,43 @@ use risingwave_expr_macro::function; -use crate::Result; - -/// Location of specified substring +/// Returns the index of the first occurrence of the specified substring in the input string, +/// or zero if the substring is not present. +/// +/// # Example +/// +/// ```slt +/// query I +/// select position('om' in 'Thomas'); +/// ---- +/// 3 +/// +/// query I +/// select strpos('hello, world', 'lo'); +/// ---- +/// 4 +/// +/// query I +/// select strpos('high', 'ig'); +/// ---- +/// 2 +/// +/// query I +/// select strpos('abc', 'def'); +/// ---- +/// 0 /// -/// Note: According to pgsql, position will return 0 rather -1 when substr is not in the target str +/// query I +/// select strpos('床前明月光', '月光'); +/// ---- +/// 4 +/// ``` +#[function("strpos(varchar, varchar) -> int32")] // backward compatibility with old proto #[function("position(varchar, varchar) -> int32")] -pub fn position(str: &str, sub_str: &str) -> Result { +pub fn position(str: &str, sub_str: &str) -> i32 { match str.find(sub_str) { - Some(byte_idx) => Ok((str[..byte_idx].chars().count() + 1) as i32), - None => Ok(0), + Some(byte_idx) => (str[..byte_idx].chars().count() + 1) as i32, + None => 0, } } @@ -41,7 +68,7 @@ mod tests { ]; for (str, sub_str, expected) in cases { - assert_eq!(position(str, sub_str).unwrap(), expected) + assert_eq!(position(str, sub_str), expected) } } } diff --git a/src/expr/src/vector_op/string.rs b/src/expr/src/vector_op/string.rs index f5db6a3deddd1..524699bf6a043 100644 --- a/src/expr/src/vector_op/string.rs +++ b/src/expr/src/vector_op/string.rs @@ -235,36 +235,6 @@ pub fn reverse(s: &str, writer: &mut dyn Write) { } } -/// Returns the index of the first occurrence of the specified substring in the input string, -/// or zero if the substring is not present. -/// -/// # Example -/// -/// ```slt -/// query T -/// select strpos('hello, world', 'lo'); -/// ---- -/// 4 -/// -/// query T -/// select strpos('high', 'ig'); -/// ---- -/// 2 -/// -/// query T -/// select strpos('abc', 'def'); -/// ---- -/// 0 -/// ``` -#[function("strpos(varchar, varchar) -> int32")] -pub fn strpos(s: &str, substr: &str) -> i32 { - if let Some(pos) = s.find(substr) { - pos as i32 + 1 - } else { - 0 - } -} - /// Converts the input string to ASCII by dropping accents, assuming that the input string /// is encoded in one of the supported encodings (Latin1, Latin2, Latin9, or WIN1250). /// diff --git a/src/frontend/src/binder/expr/function.rs b/src/frontend/src/binder/expr/function.rs index e88b6c5491966..0c7f98d02b822 100644 --- a/src/frontend/src/binder/expr/function.rs +++ b/src/frontend/src/binder/expr/function.rs @@ -397,7 +397,7 @@ impl Binder { ("lpad", raw_call(ExprType::Lpad)), ("rpad", raw_call(ExprType::Rpad)), ("reverse", raw_call(ExprType::Reverse)), - ("strpos", raw_call(ExprType::Strpos)), + ("strpos", raw_call(ExprType::Position)), ("to_ascii", raw_call(ExprType::ToAscii)), ("to_hex", raw_call(ExprType::ToHex)), ("quote_ident", raw_call(ExprType::QuoteIdent)), From 031c2e1345f6d389411aef677facf897ccc16159 Mon Sep 17 00:00:00 2001 From: Xiangjin Date: Wed, 5 Apr 2023 00:07:09 +0800 Subject: [PATCH 3/4] buf format --- proto/expr.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proto/expr.proto b/proto/expr.proto index 82b965d8ebca6..c6bc5fb1ea4b1 100644 --- a/proto/expr.proto +++ b/proto/expr.proto @@ -102,7 +102,7 @@ message ExprNode { LPAD = 238; RPAD = 239; REVERSE = 240; - STRPOS = 241 [deprecated=true]; // duplicated with POSITION + STRPOS = 241 [deprecated = true]; // duplicated with POSITION TO_ASCII = 242; TO_HEX = 243; QUOTE_IDENT = 244; From 07e4410ab345abe3d76fc3d814302fea749cec0c Mon Sep 17 00:00:00 2001 From: Xiangjin Date: Wed, 5 Apr 2023 00:57:21 +0800 Subject: [PATCH 4/4] sqlsmith should generate strpos (or position-with-in) --- src/tests/sqlsmith/src/sql_gen/expr.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/sqlsmith/src/sql_gen/expr.rs b/src/tests/sqlsmith/src/sql_gen/expr.rs index 1447f6ea2ced5..52df98f17aa1b 100644 --- a/src/tests/sqlsmith/src/sql_gen/expr.rs +++ b/src/tests/sqlsmith/src/sql_gen/expr.rs @@ -550,7 +550,7 @@ fn make_general_expr(func: ExprType, exprs: Vec) -> Option { E::IsNotTrue => Some(Expr::IsNotTrue(Box::new(exprs[0].clone()))), E::IsFalse => Some(Expr::IsFalse(Box::new(exprs[0].clone()))), E::IsNotFalse => Some(Expr::IsNotFalse(Box::new(exprs[0].clone()))), - E::Position => Some(Expr::Function(make_simple_func("position", &exprs))), + E::Position => Some(Expr::Function(make_simple_func("strpos", &exprs))), E::RoundDigit => Some(Expr::Function(make_simple_func("round", &exprs))), E::Pow => Some(Expr::Function(make_simple_func("pow", &exprs))), E::Repeat => Some(Expr::Function(make_simple_func("repeat", &exprs))),