From 2877163e19d87546cc99cf3b18d0829b52eba04d Mon Sep 17 00:00:00 2001 From: Michael Victor Zink Date: Fri, 21 Feb 2025 14:46:51 -0800 Subject: [PATCH] Parse signed/unsigned integer data type in MySQL CAST MySQL doesn't have the same set of possible CAST types as for e.g. column definitions. For example, it raises a syntax error for `CAST(1 AS INTEGER SIGNED)` and instead expects `CAST(1 AS SIGNED INTEGER)`. We retain the current somewhat permissive datatype parsing behavior (e.g. allowing `CAST(1 AS BIGINT)` even though MySQL would raise a syntax error), and add two datatypes for this specific case (`SIGNED [INTEGER]` and `UNSIGNED [INTEGER]`). Closes #1589 --- src/ast/data_type.rs | 26 ++++++++++++++++++++++++++ src/ast/mod.rs | 5 +++-- src/keywords.rs | 1 + src/parser/mod.rs | 8 ++++++++ tests/sqlparser_mysql.rs | 18 ++++++++++++++++++ 5 files changed, 56 insertions(+), 2 deletions(-) diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index cae8ca8f0..5ec4283b9 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -238,6 +238,26 @@ pub enum DataType { UnsignedBigInt(Option), /// Unsigned Int8 with optional display width e.g. INT8 UNSIGNED or INT8(11) UNSIGNED UnsignedInt8(Option), + /// Signed integer as used in [MySQL CAST] target types, with optional `INTEGER` suffix: + /// `SIGNED [INTEGER]` + /// + /// Note that this doesn't accept a display width and is reversed from the syntax used in column + /// definitions ([`DataType::Int`]): `INTEGER [SIGNED]` + /// + /// Semantically equivalent to `BIGINT`. + /// + /// [MySQL CAST]: https://dev.mysql.com/doc/refman/8.4/en/cast-functions.html + Signed(bool), + /// Unsigned integer as used in [MySQL CAST] target types, with optional `INTEGER` suffix: + /// `UNSIGNED [INTEGER]` + /// + /// Note that this doesn't accept a display widths and is reversed from the syntax used in + /// column definitions ([`DataType::UnsignedInteger`]): `INTEGER [UNSIGNED]` + /// + /// Semantically equivalent to `BIGINT UNSIGNED`. + /// + /// [MySQL CAST]: https://dev.mysql.com/doc/refman/8.4/en/cast-functions.html + Unsigned(bool), /// Float4 as alias for Real in [postgresql] /// /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html @@ -515,6 +535,12 @@ impl fmt::Display for DataType { DataType::UInt256 => { write!(f, "UInt256") } + DataType::Signed(integer) => { + write!(f, "SIGNED{}", if *integer { " INTEGER" } else { "" }) + } + DataType::Unsigned(integer) => { + write!(f, "UNSIGNED{}", if *integer { " INTEGER" } else { "" }) + } DataType::Real => write!(f, "REAL"), DataType::Float4 => write!(f, "FLOAT4"), DataType::Float32 => write!(f, "Float32"), diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 2b9016d9a..7b8ddb219 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -798,8 +798,9 @@ pub enum Expr { kind: CastKind, expr: Box, data_type: DataType, - // Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery - // https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax + /// Optional CAST(string_expression AS type FORMAT format_string_expression) as used by [BigQuery] + /// + /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax format: Option, }, /// AT a timestamp to a different timezone e.g. `FROM_UNIXTIME(0) AT TIME ZONE 'UTC-06:00'` diff --git a/src/keywords.rs b/src/keywords.rs index d62a038b8..020b404ed 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -790,6 +790,7 @@ define_keywords!( SHARE, SHARING, SHOW, + SIGNED, SIMILAR, SKIP, SLOW, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 69268bc51..2926e12d8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9110,6 +9110,14 @@ impl<'a> Parser<'a> { let columns = self.parse_returns_table_columns()?; Ok(DataType::Table(columns)) } + Keyword::SIGNED => { + let integer = self.parse_keyword(Keyword::INTEGER); + Ok(DataType::Signed(integer)) + } + Keyword::UNSIGNED => { + let integer = self.parse_keyword(Keyword::INTEGER); + Ok(DataType::Unsigned(integer)) + } _ => { self.prev_token(); let type_name = self.parse_object_name(false)?; diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 44c8350fa..600b4f842 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -3255,3 +3255,21 @@ fn parse_looks_like_single_line_comment() { "UPDATE account SET balance = balance WHERE account_id = 5752", ); } + +#[test] +fn parse_cast_integers() { + mysql().verified_expr("CAST(foo AS UNSIGNED)"); + mysql().verified_expr("CAST(foo AS SIGNED)"); + mysql().verified_expr("CAST(foo AS UNSIGNED INTEGER)"); + mysql().verified_expr("CAST(foo AS SIGNED INTEGER)"); + + mysql() + .run_parser_method("CAST(foo AS UNSIGNED(3))", |p| p.parse_expr()) + .expect_err("CAST doesn't allow display width"); + mysql() + .run_parser_method("CAST(foo AS UNSIGNED(3) INTEGER)", |p| p.parse_expr()) + .expect_err("CAST doesn't allow display width"); + mysql() + .run_parser_method("CAST(foo AS UNSIGNED INTEGER(3))", |p| p.parse_expr()) + .expect_err("CAST doesn't allow display width"); +}