Skip to content

Commit

Permalink
fix(expr): align position syntax and strpos semantic with SQL sta…
Browse files Browse the repository at this point in the history
  • Loading branch information
xiangjinwu authored Apr 6, 2023
1 parent f1b7942 commit efe56ec
Show file tree
Hide file tree
Showing 11 changed files with 85 additions and 46 deletions.
2 changes: 1 addition & 1 deletion proto/expr.proto
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ message ExprNode {
LPAD = 238;
RPAD = 239;
REVERSE = 240;
STRPOS = 241;
STRPOS = 241 [deprecated = true]; // duplicated with POSITION
TO_ASCII = 242;
TO_HEX = 243;
QUOTE_IDENT = 244;
Expand Down
43 changes: 35 additions & 8 deletions src/expr/src/vector_op/position.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,43 @@

use risingwave_expr_macro::function;

use crate::Result;

/// Location of specified substring
/// Returns the index of the first occurrence of the specified substring in the input string,
/// or zero if the substring is not present.
///
/// # Example
///
/// ```slt
/// query I
/// select position('om' in 'Thomas');
/// ----
/// 3
///
/// query I
/// select strpos('hello, world', 'lo');
/// ----
/// 4
///
/// query I
/// select strpos('high', 'ig');
/// ----
/// 2
///
/// query I
/// select strpos('abc', 'def');
/// ----
/// 0
///
/// Note: According to pgsql, position will return 0 rather -1 when substr is not in the target str
/// query I
/// select strpos('床前明月光', '月光');
/// ----
/// 4
/// ```
#[function("strpos(varchar, varchar) -> int32")] // backward compatibility with old proto
#[function("position(varchar, varchar) -> int32")]
pub fn position(str: &str, sub_str: &str) -> Result<i32> {
pub fn position(str: &str, sub_str: &str) -> i32 {
match str.find(sub_str) {
Some(byte_idx) => Ok((str[..byte_idx].chars().count() + 1) as i32),
None => Ok(0),
Some(byte_idx) => (str[..byte_idx].chars().count() + 1) as i32,
None => 0,
}
}

Expand All @@ -41,7 +68,7 @@ mod tests {
];

for (str, sub_str, expected) in cases {
assert_eq!(position(str, sub_str).unwrap(), expected)
assert_eq!(position(str, sub_str), expected)
}
}
}
30 changes: 0 additions & 30 deletions src/expr/src/vector_op/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,36 +235,6 @@ pub fn reverse(s: &str, writer: &mut dyn Write) {
}
}

/// Returns the index of the first occurrence of the specified substring in the input string,
/// or zero if the substring is not present.
///
/// # Example
///
/// ```slt
/// query T
/// select strpos('hello, world', 'lo');
/// ----
/// 4
///
/// query T
/// select strpos('high', 'ig');
/// ----
/// 2
///
/// query T
/// select strpos('abc', 'def');
/// ----
/// 0
/// ```
#[function("strpos(varchar, varchar) -> int32")]
pub fn strpos(s: &str, substr: &str) -> i32 {
if let Some(pos) = s.find(substr) {
pos as i32 + 1
} else {
0
}
}

/// Converts the input string to ASCII by dropping accents, assuming that the input string
/// is encoded in one of the supported encodings (Latin1, Latin2, Latin9, or WIN1250).
///
Expand Down
2 changes: 1 addition & 1 deletion src/frontend/planner_test/tests/testdata/expr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@
batch_plan: |
BatchValues { rows: [[4:Int32]] }
- sql: |
select position(replace('1','1','2'),'123') where '12' like '%1';
select position('123' in replace('1','1','2')) where '12' like '%1';
batch_plan: |
BatchValues { rows: [] }
- name: case searched form with else
Expand Down
3 changes: 1 addition & 2 deletions src/frontend/src/binder/expr/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,6 @@ impl Binder {
("trim", raw_call(ExprType::Trim)),
("replace", raw_call(ExprType::Replace)),
("overlay", raw_call(ExprType::Overlay)),
("position", raw_call(ExprType::Position)),
("ltrim", raw_call(ExprType::Ltrim)),
("rtrim", raw_call(ExprType::Rtrim)),
("md5", raw_call(ExprType::Md5)),
Expand All @@ -398,7 +397,7 @@ impl Binder {
("lpad", raw_call(ExprType::Lpad)),
("rpad", raw_call(ExprType::Rpad)),
("reverse", raw_call(ExprType::Reverse)),
("strpos", raw_call(ExprType::Strpos)),
("strpos", raw_call(ExprType::Position)),
("to_ascii", raw_call(ExprType::ToAscii)),
("to_hex", raw_call(ExprType::ToHex)),
("quote_ident", raw_call(ExprType::QuoteIdent)),
Expand Down
10 changes: 10 additions & 0 deletions src/frontend/src/binder/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ impl Binder {
substring_from,
substring_for,
} => self.bind_substring(*expr, substring_from, substring_for),
Expr::Position { substring, string } => self.bind_position(*substring, *string),
Expr::Overlay {
expr,
new_substring,
Expand Down Expand Up @@ -281,6 +282,15 @@ impl Binder {
FunctionCall::new(ExprType::Substr, args).map(|f| f.into())
}

fn bind_position(&mut self, substring: Expr, string: Expr) -> Result<ExprImpl> {
let args = vec![
// Note that we reverse the order of arguments.
self.bind_expr(string)?,
self.bind_expr(substring)?,
];
FunctionCall::new(ExprType::Position, args).map(Into::into)
}

fn bind_overlay(
&mut self,
expr: Expr,
Expand Down
5 changes: 5 additions & 0 deletions src/meta/src/manager/catalog/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,11 @@ impl QueryRewriter<'_> {
| Expr::Nested(expr)
| Expr::ArrayIndex { obj: expr, .. } => self.visit_expr(expr),

Expr::Position { substring, string } => {
self.visit_expr(substring);
self.visit_expr(string);
}

Expr::InSubquery { expr, subquery, .. } => {
self.visit_expr(expr);
self.visit_query(subquery);
Expand Down
8 changes: 8 additions & 0 deletions src/sqlparser/src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,11 @@ pub enum Expr {
substring_from: Option<Box<Expr>>,
substring_for: Option<Box<Expr>>,
},
/// POSITION(<expr> IN <expr>)
Position {
substring: Box<Expr>,
string: Box<Expr>,
},
/// OVERLAY(<expr> PLACING <expr> FROM <expr> [ FOR <expr> ])
Overlay {
expr: Box<Expr>,
Expand Down Expand Up @@ -550,6 +555,9 @@ impl fmt::Display for Expr {

write!(f, ")")
}
Expr::Position { substring, string } => {
write!(f, "POSITION({} IN {})", substring, string)
}
Expr::Overlay {
expr,
new_substring,
Expand Down
20 changes: 20 additions & 0 deletions src/sqlparser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,7 @@ impl Parser {
Keyword::EXISTS => self.parse_exists_expr(),
Keyword::EXTRACT => self.parse_extract_expr(),
Keyword::SUBSTRING => self.parse_substring_expr(),
Keyword::POSITION => self.parse_position_expr(),
Keyword::OVERLAY => self.parse_overlay_expr(),
Keyword::TRIM => self.parse_trim_expr(),
Keyword::INTERVAL => self.parse_literal_interval(),
Expand Down Expand Up @@ -928,6 +929,25 @@ impl Parser {
})
}

/// POSITION(<expr> IN <expr>)
pub fn parse_position_expr(&mut self) -> Result<Expr, ParserError> {
self.expect_token(&Token::LParen)?;

// Logically `parse_expr`, but limited to those with precedence higher than `BETWEEN`/`IN`,
// to avoid conflict with general IN operator, for example `position(a IN (b) IN (c))`.
// https://github.com/postgres/postgres/blob/REL_15_2/src/backend/parser/gram.y#L16012
let substring = self.parse_subexpr(Precedence::Between)?;
self.expect_keyword(Keyword::IN)?;
let string = self.parse_subexpr(Precedence::Between)?;

self.expect_token(&Token::RParen)?;

Ok(Expr::Position {
substring: Box::new(substring),
string: Box::new(string),
})
}

/// OVERLAY(<expr> PLACING <expr> FROM <expr> [ FOR <expr> ])
pub fn parse_overlay_expr(&mut self) -> Result<Expr, ParserError> {
self.expect_token(&Token::LParen)?;
Expand Down
6 changes: 3 additions & 3 deletions src/tests/regress/data/sql/strings.sql
Original file line number Diff line number Diff line change
Expand Up @@ -264,9 +264,9 @@ SELECT SUBSTRING('string' FROM -10 FOR -2147483646) AS "error";
\pset null ''

-- E021-11 position expression
--@ SELECT POSITION('4' IN '1234567890') = '4' AS "4";
--@
--@ SELECT POSITION('5' IN '1234567890') = '5' AS "5";
SELECT POSITION('4' IN '1234567890') = '4' AS "4";

SELECT POSITION('5' IN '1234567890') = '5' AS "5";

-- T312 character overlay function
SELECT OVERLAY('abcdef' PLACING '45' FROM 4) AS "abc45f";
Expand Down
2 changes: 1 addition & 1 deletion src/tests/sqlsmith/src/sql_gen/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,7 @@ fn make_general_expr(func: ExprType, exprs: Vec<Expr>) -> Option<Expr> {
E::IsNotTrue => Some(Expr::IsNotTrue(Box::new(exprs[0].clone()))),
E::IsFalse => Some(Expr::IsFalse(Box::new(exprs[0].clone()))),
E::IsNotFalse => Some(Expr::IsNotFalse(Box::new(exprs[0].clone()))),
E::Position => Some(Expr::Function(make_simple_func("position", &exprs))),
E::Position => Some(Expr::Function(make_simple_func("strpos", &exprs))),
E::RoundDigit => Some(Expr::Function(make_simple_func("round", &exprs))),
E::Pow => Some(Expr::Function(make_simple_func("pow", &exprs))),
E::Repeat => Some(Expr::Function(make_simple_func("repeat", &exprs))),
Expand Down

0 comments on commit efe56ec

Please sign in to comment.