Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(expr): align position syntax and strpos semantic with SQL standard #9000

Merged
merged 5 commits into from
Apr 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion proto/expr.proto
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ message ExprNode {
LPAD = 238;
RPAD = 239;
REVERSE = 240;
STRPOS = 241;
STRPOS = 241 [deprecated = true]; // duplicated with POSITION
TO_ASCII = 242;
TO_HEX = 243;
QUOTE_IDENT = 244;
Expand Down
43 changes: 35 additions & 8 deletions src/expr/src/vector_op/position.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,43 @@

use risingwave_expr_macro::function;

use crate::Result;

/// Location of specified substring
/// Returns the index of the first occurrence of the specified substring in the input string,
/// or zero if the substring is not present.
///
/// # Example
///
/// ```slt
/// query I
/// select position('om' in 'Thomas');
/// ----
/// 3
///
/// query I
/// select strpos('hello, world', 'lo');
/// ----
/// 4
///
/// query I
/// select strpos('high', 'ig');
/// ----
/// 2
///
/// query I
/// select strpos('abc', 'def');
/// ----
/// 0
///
/// Note: According to pgsql, position will return 0 rather -1 when substr is not in the target str
/// query I
/// select strpos('床前明月光', '月光');
/// ----
/// 4
/// ```
#[function("strpos(varchar, varchar) -> int32")] // backward compatibility with old proto
#[function("position(varchar, varchar) -> int32")]
pub fn position(str: &str, sub_str: &str) -> Result<i32> {
pub fn position(str: &str, sub_str: &str) -> i32 {
match str.find(sub_str) {
Some(byte_idx) => Ok((str[..byte_idx].chars().count() + 1) as i32),
None => Ok(0),
Some(byte_idx) => (str[..byte_idx].chars().count() + 1) as i32,
None => 0,
}
}

Expand All @@ -41,7 +68,7 @@ mod tests {
];

for (str, sub_str, expected) in cases {
assert_eq!(position(str, sub_str).unwrap(), expected)
assert_eq!(position(str, sub_str), expected)
}
}
}
30 changes: 0 additions & 30 deletions src/expr/src/vector_op/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,36 +235,6 @@ pub fn reverse(s: &str, writer: &mut dyn Write) {
}
}

/// Returns the index of the first occurrence of the specified substring in the input string,
/// or zero if the substring is not present.
///
/// # Example
///
/// ```slt
/// query T
/// select strpos('hello, world', 'lo');
/// ----
/// 4
///
/// query T
/// select strpos('high', 'ig');
/// ----
/// 2
///
/// query T
/// select strpos('abc', 'def');
/// ----
/// 0
/// ```
#[function("strpos(varchar, varchar) -> int32")]
pub fn strpos(s: &str, substr: &str) -> i32 {
if let Some(pos) = s.find(substr) {
pos as i32 + 1
} else {
0
}
}

/// Converts the input string to ASCII by dropping accents, assuming that the input string
/// is encoded in one of the supported encodings (Latin1, Latin2, Latin9, or WIN1250).
///
Expand Down
2 changes: 1 addition & 1 deletion src/frontend/planner_test/tests/testdata/expr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@
batch_plan: |
BatchValues { rows: [[4:Int32]] }
- sql: |
select position(replace('1','1','2'),'123') where '12' like '%1';
select position('123' in replace('1','1','2')) where '12' like '%1';
batch_plan: |
BatchValues { rows: [] }
- name: case searched form with else
Expand Down
3 changes: 1 addition & 2 deletions src/frontend/src/binder/expr/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,6 @@ impl Binder {
("trim", raw_call(ExprType::Trim)),
("replace", raw_call(ExprType::Replace)),
("overlay", raw_call(ExprType::Overlay)),
("position", raw_call(ExprType::Position)),
("ltrim", raw_call(ExprType::Ltrim)),
("rtrim", raw_call(ExprType::Rtrim)),
("md5", raw_call(ExprType::Md5)),
Expand All @@ -398,7 +397,7 @@ impl Binder {
("lpad", raw_call(ExprType::Lpad)),
("rpad", raw_call(ExprType::Rpad)),
("reverse", raw_call(ExprType::Reverse)),
("strpos", raw_call(ExprType::Strpos)),
("strpos", raw_call(ExprType::Position)),
("to_ascii", raw_call(ExprType::ToAscii)),
("to_hex", raw_call(ExprType::ToHex)),
("quote_ident", raw_call(ExprType::QuoteIdent)),
Expand Down
10 changes: 10 additions & 0 deletions src/frontend/src/binder/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ impl Binder {
substring_from,
substring_for,
} => self.bind_substring(*expr, substring_from, substring_for),
Expr::Position { substring, string } => self.bind_position(*substring, *string),
Expr::Overlay {
expr,
new_substring,
Expand Down Expand Up @@ -281,6 +282,15 @@ impl Binder {
FunctionCall::new(ExprType::Substr, args).map(|f| f.into())
}

fn bind_position(&mut self, substring: Expr, string: Expr) -> Result<ExprImpl> {
let args = vec![
// Note that we reverse the order of arguments.
self.bind_expr(string)?,
self.bind_expr(substring)?,
];
FunctionCall::new(ExprType::Position, args).map(Into::into)
}

fn bind_overlay(
&mut self,
expr: Expr,
Expand Down
5 changes: 5 additions & 0 deletions src/meta/src/manager/catalog/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,11 @@ impl QueryRewriter<'_> {
| Expr::Nested(expr)
| Expr::ArrayIndex { obj: expr, .. } => self.visit_expr(expr),

Expr::Position { substring, string } => {
self.visit_expr(substring);
self.visit_expr(string);
}

Expr::InSubquery { expr, subquery, .. } => {
self.visit_expr(expr);
self.visit_query(subquery);
Expand Down
8 changes: 8 additions & 0 deletions src/sqlparser/src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,11 @@ pub enum Expr {
substring_from: Option<Box<Expr>>,
substring_for: Option<Box<Expr>>,
},
/// POSITION(<expr> IN <expr>)
Position {
substring: Box<Expr>,
string: Box<Expr>,
},
/// OVERLAY(<expr> PLACING <expr> FROM <expr> [ FOR <expr> ])
Overlay {
expr: Box<Expr>,
Expand Down Expand Up @@ -550,6 +555,9 @@ impl fmt::Display for Expr {

write!(f, ")")
}
Expr::Position { substring, string } => {
write!(f, "POSITION({} IN {})", substring, string)
}
Expr::Overlay {
expr,
new_substring,
Expand Down
20 changes: 20 additions & 0 deletions src/sqlparser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,7 @@ impl Parser {
Keyword::EXISTS => self.parse_exists_expr(),
Keyword::EXTRACT => self.parse_extract_expr(),
Keyword::SUBSTRING => self.parse_substring_expr(),
Keyword::POSITION => self.parse_position_expr(),
Keyword::OVERLAY => self.parse_overlay_expr(),
Keyword::TRIM => self.parse_trim_expr(),
Keyword::INTERVAL => self.parse_literal_interval(),
Expand Down Expand Up @@ -928,6 +929,25 @@ impl Parser {
})
}

/// POSITION(<expr> IN <expr>)
pub fn parse_position_expr(&mut self) -> Result<Expr, ParserError> {
self.expect_token(&Token::LParen)?;

// Logically `parse_expr`, but limited to those with precedence higher than `BETWEEN`/`IN`,
// to avoid conflict with general IN operator, for example `position(a IN (b) IN (c))`.
// https://github.com/postgres/postgres/blob/REL_15_2/src/backend/parser/gram.y#L16012
let substring = self.parse_subexpr(Precedence::Between)?;
self.expect_keyword(Keyword::IN)?;
let string = self.parse_subexpr(Precedence::Between)?;

self.expect_token(&Token::RParen)?;

Ok(Expr::Position {
substring: Box::new(substring),
string: Box::new(string),
})
}

/// OVERLAY(<expr> PLACING <expr> FROM <expr> [ FOR <expr> ])
pub fn parse_overlay_expr(&mut self) -> Result<Expr, ParserError> {
self.expect_token(&Token::LParen)?;
Expand Down
6 changes: 3 additions & 3 deletions src/tests/regress/data/sql/strings.sql
Original file line number Diff line number Diff line change
Expand Up @@ -264,9 +264,9 @@ SELECT SUBSTRING('string' FROM -10 FOR -2147483646) AS "error";
\pset null ''

-- E021-11 position expression
--@ SELECT POSITION('4' IN '1234567890') = '4' AS "4";
--@
--@ SELECT POSITION('5' IN '1234567890') = '5' AS "5";
SELECT POSITION('4' IN '1234567890') = '4' AS "4";

SELECT POSITION('5' IN '1234567890') = '5' AS "5";

-- T312 character overlay function
SELECT OVERLAY('abcdef' PLACING '45' FROM 4) AS "abc45f";
Expand Down
2 changes: 1 addition & 1 deletion src/tests/sqlsmith/src/sql_gen/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,7 @@ fn make_general_expr(func: ExprType, exprs: Vec<Expr>) -> Option<Expr> {
E::IsNotTrue => Some(Expr::IsNotTrue(Box::new(exprs[0].clone()))),
E::IsFalse => Some(Expr::IsFalse(Box::new(exprs[0].clone()))),
E::IsNotFalse => Some(Expr::IsNotFalse(Box::new(exprs[0].clone()))),
E::Position => Some(Expr::Function(make_simple_func("position", &exprs))),
E::Position => Some(Expr::Function(make_simple_func("strpos", &exprs))),
E::RoundDigit => Some(Expr::Function(make_simple_func("round", &exprs))),
E::Pow => Some(Expr::Function(make_simple_func("pow", &exprs))),
E::Repeat => Some(Expr::Function(make_simple_func("repeat", &exprs))),
Expand Down