From 8bad4070f20d094cae80922383376a5bda7b390f Mon Sep 17 00:00:00 2001 From: Xiangjin Date: Tue, 18 Oct 2022 22:19:23 +0800 Subject: [PATCH 1/7] parse AT TIME ZONE --- src/sqlparser/src/ast/mod.rs | 10 ++++++++ src/sqlparser/src/parser.rs | 30 ++++++++++++++++++++++++ src/sqlparser/tests/testdata/select.yaml | 5 ++++ 3 files changed, 45 insertions(+) diff --git a/src/sqlparser/src/ast/mod.rs b/src/sqlparser/src/ast/mod.rs index b311030e71efc..ebfcdadc2597f 100644 --- a/src/sqlparser/src/ast/mod.rs +++ b/src/sqlparser/src/ast/mod.rs @@ -258,6 +258,12 @@ pub enum Expr { expr: Box, data_type: DataType, }, + /// AT TIME ZONE converts `timestamp without time zone` to/from `timestamp with time zone` with + /// explicitly specified zone + AtTimeZone { + timestamp: Box, + time_zone: String, + }, /// EXTRACT(DateTimeField FROM ) Extract { field: String, expr: Box }, /// SUBSTRING( [FROM ] [FOR ]) @@ -391,6 +397,10 @@ impl fmt::Display for Expr { } Expr::Cast { expr, data_type } => write!(f, "CAST({} AS {})", expr, data_type), Expr::TryCast { expr, data_type } => write!(f, "TRY_CAST({} AS {})", expr, data_type), + Expr::AtTimeZone { + timestamp, + time_zone, + } => write!(f, "{} AT TIME ZONE '{}'", timestamp, time_zone), Expr::Extract { field, expr } => write!(f, "EXTRACT({} FROM {})", field, expr), Expr::Collate { expr, collation } => write!(f, "{} COLLATE {}", expr, collation), Expr::Nested(ast) => write!(f, "({})", ast), diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs index f4bf3b468af5d..598da1308e036 100644 --- a/src/sqlparser/src/parser.rs +++ b/src/sqlparser/src/parser.rs @@ -121,6 +121,7 @@ pub struct Parser { impl Parser { const BETWEEN_PREC: u8 = 20; const PLUS_MINUS_PREC: u8 = 30; + const TIME_ZONE_PREC: u8 = 20; const UNARY_NOT_PREC: u8 = 15; /// Parse the specified tokens @@ -1079,6 +1080,23 @@ impl Parser { ) } } + Keyword::AT => { + if self.parse_keywords(&[Keyword::TIME, Keyword::ZONE]) { + let time_zone = self.next_token(); + match time_zone { + Token::SingleQuotedString(time_zone) => Ok(Expr::AtTimeZone { + timestamp: Box::new(expr), + time_zone, + }), + tok => self.expected( + "Expected Token::SingleQuotedString after AT TIME ZONE", + tok, + ), + } + } else { + self.expected("Expected Token::Word after AT", tok) + } + } Keyword::NOT | Keyword::IN | Keyword::BETWEEN => { self.prev_token(); let negated = self.parse_keyword(Keyword::NOT); @@ -1179,6 +1197,18 @@ impl Parser { Token::Word(w) if w.keyword == Keyword::OR => Ok(5), Token::Word(w) if w.keyword == Keyword::AND => Ok(10), Token::Word(w) if w.keyword == Keyword::XOR => Ok(24), + + Token::Word(w) if w.keyword == Keyword::AT => { + match (self.peek_nth_token(1), self.peek_nth_token(2)) { + (Token::Word(w), Token::Word(w2)) + if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE => + { + Ok(Self::TIME_ZONE_PREC) + } + _ => Ok(0), + } + } + Token::Word(w) if w.keyword == Keyword::NOT => match self.peek_nth_token(1) { // The precedence of NOT varies depending on keyword that // follows it. If it is followed by IN, BETWEEN, or LIKE, diff --git a/src/sqlparser/tests/testdata/select.yaml b/src/sqlparser/tests/testdata/select.yaml index 441b6457d05ef..be713b9f91612 100644 --- a/src/sqlparser/tests/testdata/select.yaml +++ b/src/sqlparser/tests/testdata/select.yaml @@ -75,3 +75,8 @@ - input: SELECT 1, WHERE true error_msg: "sql parser error: syntax error at or near \"WHERE\"" + +- input: SELECT timestamp with time zone '2022-10-01 12:00:00Z' AT TIME ZONE 'US/Pacific' + formatted_sql: SELECT TIMESTAMP WITH TIME ZONE '2022-10-01 12:00:00Z' AT TIME ZONE 'US/Pacific' + formatted_ast: | + Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(AtTimeZone { timestamp: TypedString { data_type: Timestamp(true), value: "2022-10-01 12:00:00Z" }, time_zone: "US/Pacific" })], from: [], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None }) From 9a6331a80a4efc9c39f2a6f8ca27c302eac0e22a Mon Sep 17 00:00:00 2001 From: Xiangjin Date: Thu, 20 Oct 2022 14:59:28 +0800 Subject: [PATCH 2/7] expr: AtTimeZone --- proto/expr.proto | 1 + src/expr/src/expr/expr_binary_nonnull.rs | 30 ++++++++++++++++++++ src/expr/src/expr/mod.rs | 2 +- src/expr/src/vector_op/timestampz.rs | 12 +++++++- src/frontend/src/binder/expr/mod.rs | 10 +++++++ src/frontend/src/expr/type_inference/func.rs | 2 ++ 6 files changed, 55 insertions(+), 2 deletions(-) diff --git a/proto/expr.proto b/proto/expr.proto index beec5b6347ec3..1538f0487b92e 100644 --- a/proto/expr.proto +++ b/proto/expr.proto @@ -41,6 +41,7 @@ message ExprNode { EXTRACT = 101; TUMBLE_START = 103; TO_TIMESTAMP = 104; + AT_TIME_ZONE = 105; // other functions CAST = 201; SUBSTR = 202; diff --git a/src/expr/src/expr/expr_binary_nonnull.rs b/src/expr/src/expr/expr_binary_nonnull.rs index b753e24e38edf..2b279e0d6c537 100644 --- a/src/expr/src/expr/expr_binary_nonnull.rs +++ b/src/expr/src/expr/expr_binary_nonnull.rs @@ -31,6 +31,7 @@ use crate::vector_op::extract::{ use crate::vector_op::like::like_default; use crate::vector_op::position::position; use crate::vector_op::round::round_digits; +use crate::vector_op::timestampz::{timestamp_at_time_zone, timestampz_at_time_zone}; use crate::vector_op::tumble::{ tumble_start_date, tumble_start_date_time, tumble_start_timestampz, }; @@ -354,6 +355,34 @@ fn build_extract_expr( Ok(expr) } +fn build_at_time_zone_expr( + ret: DataType, + l: BoxedExpression, + r: BoxedExpression, +) -> Result { + let expr: BoxedExpression = match l.return_type() { + DataType::Timestamp => Box::new(BinaryExpression::< + NaiveDateTimeArray, + Utf8Array, + I64Array, + _, + >::new(l, r, ret, timestamp_at_time_zone)), + DataType::Timestampz => Box::new(BinaryExpression::< + I64Array, + Utf8Array, + NaiveDateTimeArray, + _, + >::new(l, r, ret, timestampz_at_time_zone)), + _ => { + return Err(ExprError::UnsupportedFunction(format!( + "{:?} AT TIME ZONE is not supported yet!", + l.return_type() + ))) + } + }; + Ok(expr) +} + pub fn new_binary_expr( expr_type: Type, ret: DataType, @@ -515,6 +544,7 @@ pub fn new_binary_expr( } } Type::Extract => build_extract_expr(ret, l, r)?, + Type::AtTimeZone => build_at_time_zone_expr(ret, l, r)?, Type::RoundDigit => Box::new( BinaryExpression::::new( l, diff --git a/src/expr/src/expr/mod.rs b/src/expr/src/expr/mod.rs index 84e388d45cae5..9e34a39208864 100644 --- a/src/expr/src/expr/mod.rs +++ b/src/expr/src/expr/mod.rs @@ -106,7 +106,7 @@ pub fn build_from_prost(prost: &ExprNode) -> Result { Equal | NotEqual | LessThan | LessThanOrEqual | GreaterThan | GreaterThanOrEqual | Add | Subtract | Multiply | Divide | Modulus | Extract | RoundDigit | TumbleStart | Position | BitwiseShiftLeft | BitwiseShiftRight | BitwiseAnd | BitwiseOr | BitwiseXor - | ConcatOp => build_binary_expr_prost(prost), + | ConcatOp | AtTimeZone => build_binary_expr_prost(prost), And | Or | IsDistinctFrom | IsNotDistinctFrom | ArrayAccess => { build_nullable_binary_expr_prost(prost) } diff --git a/src/expr/src/vector_op/timestampz.rs b/src/expr/src/vector_op/timestampz.rs index 15698e750d954..e009849adc57f 100644 --- a/src/expr/src/vector_op/timestampz.rs +++ b/src/expr/src/vector_op/timestampz.rs @@ -13,7 +13,7 @@ // limitations under the License. use num_traits::ToPrimitive; -use risingwave_common::types::OrderedF64; +use risingwave_common::types::{NaiveDateTimeWrapper, OrderedF64}; use crate::{ExprError, Result}; @@ -25,3 +25,13 @@ pub fn f64_sec_to_timestampz(elem: OrderedF64) -> Result { .to_i64() .ok_or(ExprError::NumericOutOfRange) } + +#[inline(always)] +pub fn timestamp_at_time_zone(input: NaiveDateTimeWrapper, time_zone: &str) -> Result { + todo!() +} + +#[inline(always)] +pub fn timestampz_at_time_zone(input: i64, time_zone: &str) -> Result { + todo!() +} diff --git a/src/frontend/src/binder/expr/mod.rs b/src/frontend/src/binder/expr/mod.rs index 4b2cbeda7dff8..7401c2398d52e 100644 --- a/src/frontend/src/binder/expr/mod.rs +++ b/src/frontend/src/binder/expr/mod.rs @@ -104,6 +104,10 @@ impl Binder { } => self.bind_in_list(*expr, list, negated), // special syntax for date/time Expr::Extract { field, expr } => self.bind_extract(field, *expr), + Expr::AtTimeZone { + timestamp, + time_zone, + } => self.bind_at_time_zone(*timestamp, time_zone), // special syntaxt for string Expr::Trim { expr, trim_where } => self.bind_trim(*expr, trim_where), Expr::Substring { @@ -144,6 +148,12 @@ impl Binder { .into()) } + pub(super) fn bind_at_time_zone(&mut self, input: Expr, time_zone: String) -> Result { + let input = self.bind_expr(input)?; + let time_zone = self.bind_string(time_zone)?.into(); + FunctionCall::new(ExprType::AtTimeZone, vec![input, time_zone]).map(Into::into) + } + pub(super) fn bind_in_list( &mut self, expr: Expr, diff --git a/src/frontend/src/expr/type_inference/func.rs b/src/frontend/src/expr/type_inference/func.rs index cc6a784f382df..65494e41afbf6 100644 --- a/src/frontend/src/expr/type_inference/func.rs +++ b/src/frontend/src/expr/type_inference/func.rs @@ -813,6 +813,8 @@ fn build_type_derive_map() -> FuncSigMap { T::Timestampz, ); map.insert(E::ToTimestamp, vec![T::Float64], T::Timestampz); + map.insert(E::AtTimeZone, vec![T::Timestamp, T::Varchar], T::Timestampz); + map.insert(E::AtTimeZone, vec![T::Timestampz, T::Varchar], T::Timestamp); // string expressions for e in [E::Trim, E::Ltrim, E::Rtrim, E::Lower, E::Upper, E::Md5] { From c309dd6b1d734f0d152f81abb5cb043a2ee6970c Mon Sep 17 00:00:00 2001 From: Xiangjin Date: Fri, 21 Oct 2022 13:20:28 +0800 Subject: [PATCH 3/7] time zone parsing and conversion --- Cargo.lock | 67 ++++++++++++++++++++++++++++ src/expr/Cargo.toml | 1 + src/expr/src/vector_op/timestampz.rs | 32 ++++++++++++- src/workspace-hack/Cargo.toml | 6 +++ 4 files changed, 104 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0827c9bc0a496..d280ebde19d90 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1072,6 +1072,30 @@ dependencies = [ "winapi", ] +[[package]] +name = "chrono-tz" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbc529705a6e0028189c83f0a5dd9fb214105116f7e3c0eeab7ff0369766b0d1" +dependencies = [ + "chrono", + "chrono-tz-build", + "phf", + "uncased", +] + +[[package]] +name = "chrono-tz-build" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9998fb9f7e9b2111641485bf8beb32f92945f97f92a3d061f744cfef335f751" +dependencies = [ + "parse-zoneinfo", + "phf", + "phf_codegen", + "uncased", +] + [[package]] name = "ciborium" version = "0.2.0" @@ -3786,6 +3810,15 @@ dependencies = [ "syn", ] +[[package]] +name = "parse-zoneinfo" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c705f256449c60da65e11ff6626e0c16a0a0b96aaa348de61376b249bc340f41" +dependencies = [ + "regex", +] + [[package]] name = "paste" version = "1.0.8" @@ -3907,6 +3940,26 @@ dependencies = [ "phf_shared", ] +[[package]] +name = "phf_codegen" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a56ac890c5e3ca598bbdeaa99964edb5b0258a583a9eb6ef4e89fc85d9224770" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf" +dependencies = [ + "phf_shared", + "rand 0.8.5", +] + [[package]] name = "phf_shared" version = "0.11.1" @@ -3914,6 +3967,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1fb5f6f826b772a8d4c0394209441e7d37cbbb967ae9c7e0e8134365c9ee676" dependencies = [ "siphasher", + "uncased", ] [[package]] @@ -5063,6 +5117,7 @@ dependencies = [ "byteorder", "bytes", "chrono", + "chrono-tz", "crc32fast", "dyn-clone", "either", @@ -7064,6 +7119,15 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" +[[package]] +name = "uncased" +version = "0.9.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09b01702b0fd0b3fadcf98e098780badda8742d4f4a7676615cad90e8ac73622" +dependencies = [ + "version_check", +] + [[package]] name = "unicase" version = "2.6.0" @@ -7501,10 +7565,13 @@ dependencies = [ "parking_lot 0.12.1", "parking_lot_core 0.9.3", "petgraph", + "phf", + "phf_shared", "postgres-types", "prometheus", "prost", "rand 0.8.5", + "rand_core 0.6.3", "regex", "regex-automata", "regex-syntax", diff --git a/src/expr/Cargo.toml b/src/expr/Cargo.toml index 030a311f106bf..be812273ab838 100644 --- a/src/expr/Cargo.toml +++ b/src/expr/Cargo.toml @@ -12,6 +12,7 @@ async-trait = "0.1" byteorder = "1" bytes = "1" chrono = { version = "0.4", default-features = false, features = ["clock", "std"] } +chrono-tz = { version = "0.7", features = ["case-insensitive"] } crc32fast = "1" dyn-clone = "1" either = "1" diff --git a/src/expr/src/vector_op/timestampz.rs b/src/expr/src/vector_op/timestampz.rs index e009849adc57f..9877ec24b04eb 100644 --- a/src/expr/src/vector_op/timestampz.rs +++ b/src/expr/src/vector_op/timestampz.rs @@ -12,11 +12,22 @@ // See the License for the specific language governing permissions and // limitations under the License. +use chrono::{TimeZone, Utc}; +use chrono_tz::Tz; use num_traits::ToPrimitive; use risingwave_common::types::{NaiveDateTimeWrapper, OrderedF64}; use crate::{ExprError, Result}; +/// Just a wrapper to reuse the `map_err` logic. +#[inline(always)] +fn parse_time_zone(time_zone: &str) -> Result { + Tz::from_str_insensitive(time_zone).map_err(|e| ExprError::InvalidParam { + name: "time_zone", + reason: e, + }) +} + #[inline(always)] pub fn f64_sec_to_timestampz(elem: OrderedF64) -> Result { // TODO(#4515): handle +/- infinity @@ -28,10 +39,27 @@ pub fn f64_sec_to_timestampz(elem: OrderedF64) -> Result { #[inline(always)] pub fn timestamp_at_time_zone(input: NaiveDateTimeWrapper, time_zone: &str) -> Result { - todo!() + let time_zone = parse_time_zone(time_zone)?; + let instant_local = input + .0 + .and_local_timezone(time_zone) + .latest() + .ok_or_else(|| ExprError::InvalidParam { + name: "local timestamp", + reason: format!( + "fail to interpret local timestamp \"{}\" in time zone \"{}\"", + input, time_zone + ), + })?; + let usec = instant_local.timestamp_micros(); + Ok(usec) } #[inline(always)] pub fn timestampz_at_time_zone(input: i64, time_zone: &str) -> Result { - todo!() + let time_zone = parse_time_zone(time_zone)?; + let instant_utc = Utc.timestamp(input / 1_000_000, (input % 1_000_000 * 1000) as u32); + let instant_local = instant_utc.with_timezone(&time_zone); + let naive = instant_local.naive_local(); + Ok(NaiveDateTimeWrapper(naive)) } diff --git a/src/workspace-hack/Cargo.toml b/src/workspace-hack/Cargo.toml index e57c1c50cd289..671ac9fdbf30d 100644 --- a/src/workspace-hack/Cargo.toml +++ b/src/workspace-hack/Cargo.toml @@ -58,10 +58,13 @@ num-traits = { version = "0.2", features = ["i128", "std"] } parking_lot = { version = "0.12", features = ["arc_lock", "deadlock_detection"] } parking_lot_core = { version = "0.9", default-features = false, features = ["backtrace", "deadlock_detection", "petgraph", "thread-id"] } petgraph = { version = "0.6", features = ["graphmap", "matrix_graph", "stable_graph"] } +phf = { version = "0.11", features = ["std", "uncased"] } +phf_shared = { version = "0.11", features = ["std", "uncased"] } postgres-types = { git = "https://github.com/madsim-rs/rust-postgres.git", rev = "87ca1dc", default-features = false, features = ["chrono-04", "derive", "postgres-derive", "with-chrono-0_4"] } prometheus = { version = "0.13", features = ["libc", "process", "procfs", "protobuf"] } prost = { version = "0.11", features = ["no-recursion-limit", "prost-derive", "std"] } rand = { version = "0.8", features = ["alloc", "getrandom", "libc", "rand_chacha", "small_rng", "std", "std_rng"] } +rand_core = { version = "0.6", default-features = false, features = ["alloc", "getrandom", "std"] } regex = { version = "1", features = ["aho-corasick", "memchr", "perf", "perf-cache", "perf-dfa", "perf-inline", "perf-literal", "std", "unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] } regex-automata = { version = "0.1", features = ["regex-syntax", "std"] } regex-syntax = { version = "0.6", features = ["unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] } @@ -134,10 +137,13 @@ num-traits = { version = "0.2", features = ["i128", "std"] } parking_lot = { version = "0.12", features = ["arc_lock", "deadlock_detection"] } parking_lot_core = { version = "0.9", default-features = false, features = ["backtrace", "deadlock_detection", "petgraph", "thread-id"] } petgraph = { version = "0.6", features = ["graphmap", "matrix_graph", "stable_graph"] } +phf = { version = "0.11", features = ["std", "uncased"] } +phf_shared = { version = "0.11", features = ["std", "uncased"] } postgres-types = { git = "https://github.com/madsim-rs/rust-postgres.git", rev = "87ca1dc", default-features = false, features = ["chrono-04", "derive", "postgres-derive", "with-chrono-0_4"] } prometheus = { version = "0.13", features = ["libc", "process", "procfs", "protobuf"] } prost = { version = "0.11", features = ["no-recursion-limit", "prost-derive", "std"] } rand = { version = "0.8", features = ["alloc", "getrandom", "libc", "rand_chacha", "small_rng", "std", "std_rng"] } +rand_core = { version = "0.6", default-features = false, features = ["alloc", "getrandom", "std"] } regex = { version = "1", features = ["aho-corasick", "memchr", "perf", "perf-cache", "perf-dfa", "perf-inline", "perf-literal", "std", "unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] } regex-automata = { version = "0.1", features = ["regex-syntax", "std"] } regex-syntax = { version = "0.6", features = ["unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] } From 0fde2651969cde064112ae6904c8966584f9e81d Mon Sep 17 00:00:00 2001 From: Xiangjin Date: Fri, 21 Oct 2022 16:22:17 +0800 Subject: [PATCH 4/7] unit tests --- src/expr/src/vector_op/timestampz.rs | 88 ++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/src/expr/src/vector_op/timestampz.rs b/src/expr/src/vector_op/timestampz.rs index 9877ec24b04eb..9b1f8a34b4233 100644 --- a/src/expr/src/vector_op/timestampz.rs +++ b/src/expr/src/vector_op/timestampz.rs @@ -63,3 +63,91 @@ pub fn timestampz_at_time_zone(input: i64, time_zone: &str) -> Result PDT, where [02:00, 03:00) are invalid + ["2022-03-13 09:59:00Z", "2022-03-13 01:59:00", "2022-03-13 17:59:00", "2022-03-13 10:59:00"], + ["2022-03-13 10:00:00Z", "2022-03-13 03:00:00", "2022-03-13 18:00:00", "2022-03-13 11:00:00"], + // before and after CET -> CEST, where [02:00. 03:00) are invalid + ["2022-03-27 00:59:00Z", "2022-03-26 17:59:00", "2022-03-27 08:59:00", "2022-03-27 01:59:00"], + ["2022-03-27 01:00:00Z", "2022-03-26 18:00:00", "2022-03-27 09:00:00", "2022-03-27 03:00:00"], + // before and after CEST -> CET, where [02:00, 03:00) are ambiguous + ["2022-10-29 23:59:00Z", "2022-10-29 16:59:00", "2022-10-30 07:59:00", "2022-10-30 01:59:00"], + ["2022-10-30 02:00:00Z", "2022-10-29 19:00:00", "2022-10-30 10:00:00", "2022-10-30 03:00:00"], + // before and after PDT -> PST, where [01:00, 02:00) are ambiguous + ["2022-11-06 07:59:00Z", "2022-11-06 00:59:00", "2022-11-06 15:59:00", "2022-11-06 08:59:00"], + ["2022-11-06 10:00:00Z", "2022-11-06 02:00:00", "2022-11-06 18:00:00", "2022-11-06 11:00:00"], + ]; + for case in test_cases { + let usecs = str_to_timestampz(case[0]).unwrap(); + case.iter().skip(1).zip_eq(zones).for_each(|(local, zone)| { + let local = str_to_timestamp(local).unwrap(); + + let actual = timestampz_at_time_zone(usecs, zone).unwrap(); + assert_eq!(local, actual); + + let actual = timestamp_at_time_zone(local, zone).unwrap(); + assert_eq!(usecs, actual); + }); + } + } + + #[test] + fn test_time_zone_conversion_daylight_forward() { + for (local, zone) in [ + ("2022-03-13 02:00:00", "US/Pacific"), + ("2022-03-13 02:59:00", "US/Pacific"), + ("2022-03-27 02:00:00", "europe/zurich"), + ("2022-03-27 02:59:00", "europe/zurich"), + ] { + let local = str_to_timestamp(local).unwrap(); + + let actual = timestamp_at_time_zone(local, zone); + assert_matches!(actual, Err(_)); + } + } + + #[test] + fn test_time_zone_conversion_daylight_backward() { + #[rustfmt::skip] + let test_cases = [ + ("2022-10-30 00:00:00Z", "2022-10-30 02:00:00", "europe/zurich", false), + ("2022-10-30 00:59:00Z", "2022-10-30 02:59:00", "europe/zurich", false), + ("2022-10-30 01:00:00Z", "2022-10-30 02:00:00", "europe/zurich", true), + ("2022-10-30 01:59:00Z", "2022-10-30 02:59:00", "europe/zurich", true), + ("2022-11-06 08:00:00Z", "2022-11-06 01:00:00", "US/Pacific", false), + ("2022-11-06 08:59:00Z", "2022-11-06 01:59:00", "US/Pacific", false), + ("2022-11-06 09:00:00Z", "2022-11-06 01:00:00", "US/Pacific", true), + ("2022-11-06 09:59:00Z", "2022-11-06 01:59:00", "US/Pacific", true), + ]; + for (instant, local, zone, preferred) in test_cases { + let usecs = str_to_timestampz(instant).unwrap(); + let local = str_to_timestamp(local).unwrap(); + + let actual = timestampz_at_time_zone(usecs, zone).unwrap(); + assert_eq!(local, actual); + + if preferred { + let actual = timestamp_at_time_zone(local, zone).unwrap(); + assert_eq!(usecs, actual) + } + } + } +} From 510fc5d15cc8f7f08d5eb59317c78f19fea85dc4 Mon Sep 17 00:00:00 2001 From: Xiangjin Date: Fri, 21 Oct 2022 16:51:02 +0800 Subject: [PATCH 5/7] e2e tests --- .../batch/functions/at_time_zone.slt.part | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 e2e_test/batch/functions/at_time_zone.slt.part diff --git a/e2e_test/batch/functions/at_time_zone.slt.part b/e2e_test/batch/functions/at_time_zone.slt.part new file mode 100644 index 0000000000000..74bea68d1b5fa --- /dev/null +++ b/e2e_test/batch/functions/at_time_zone.slt.part @@ -0,0 +1,21 @@ +# zone-independent instant -> local naive time +query T +select '2022-01-01 00:00:00Z'::timestamp with time zone AT TIME ZONE 'US/Pacific'; +---- +2021-12-31 16:00:00 + +# local naive time -> zone-independent instant +query T +select '2021-12-31 16:00:00'::timestamp AT TIME ZONE 'us/pacific'; +---- +2022-01-01 00:00:00+00:00 + +# Unlike PostgreSQL, we do not support invalid local time during daylight saving forward yet. +statement error +select '2022-03-13 02:00:00'::timestamp AT TIME ZONE 'us/pacific'; + +# Like PostgreSQL, ambiguous local time during daylight saving backward are interpreted as after the transition. +query T +select '2022-11-06 01:00:00'::timestamp AT TIME ZONE 'us/pacific'; +---- +2022-11-06 09:00:00+00:00 From 635f470781d4f4cf7e7db40782124ac5325f8b3d Mon Sep 17 00:00:00 2001 From: Xiangjin Date: Fri, 21 Oct 2022 17:25:18 +0800 Subject: [PATCH 6/7] comments --- src/expr/src/vector_op/timestampz.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/expr/src/vector_op/timestampz.rs b/src/expr/src/vector_op/timestampz.rs index 9b1f8a34b4233..4829fd6c9ab51 100644 --- a/src/expr/src/vector_op/timestampz.rs +++ b/src/expr/src/vector_op/timestampz.rs @@ -40,6 +40,13 @@ pub fn f64_sec_to_timestampz(elem: OrderedF64) -> Result { #[inline(always)] pub fn timestamp_at_time_zone(input: NaiveDateTimeWrapper, time_zone: &str) -> Result { let time_zone = parse_time_zone(time_zone)?; + // https://www.postgresql.org/docs/current/datetime-invalid-input.html + // Special cases: + // * invalid time during daylight forward + // * PostgreSQL uses UTC offset before the transition + // * We report an error (FIXME) + // * ambiguous time during daylight backward + // * We follow PostgreSQL to use UTC offset after the transition let instant_local = input .0 .and_local_timezone(time_zone) From 933a4c272f5e67d67cf92e2e18fc3afa67cd3d43 Mon Sep 17 00:00:00 2001 From: Xiangjin Date: Mon, 24 Oct 2022 10:23:15 +0800 Subject: [PATCH 7/7] parse_time_zone -> lookup_time_zone --- src/expr/src/vector_op/timestampz.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/expr/src/vector_op/timestampz.rs b/src/expr/src/vector_op/timestampz.rs index 4829fd6c9ab51..edfd5aed65a1b 100644 --- a/src/expr/src/vector_op/timestampz.rs +++ b/src/expr/src/vector_op/timestampz.rs @@ -21,7 +21,7 @@ use crate::{ExprError, Result}; /// Just a wrapper to reuse the `map_err` logic. #[inline(always)] -fn parse_time_zone(time_zone: &str) -> Result { +fn lookup_time_zone(time_zone: &str) -> Result { Tz::from_str_insensitive(time_zone).map_err(|e| ExprError::InvalidParam { name: "time_zone", reason: e, @@ -39,7 +39,7 @@ pub fn f64_sec_to_timestampz(elem: OrderedF64) -> Result { #[inline(always)] pub fn timestamp_at_time_zone(input: NaiveDateTimeWrapper, time_zone: &str) -> Result { - let time_zone = parse_time_zone(time_zone)?; + let time_zone = lookup_time_zone(time_zone)?; // https://www.postgresql.org/docs/current/datetime-invalid-input.html // Special cases: // * invalid time during daylight forward @@ -64,7 +64,7 @@ pub fn timestamp_at_time_zone(input: NaiveDateTimeWrapper, time_zone: &str) -> R #[inline(always)] pub fn timestampz_at_time_zone(input: i64, time_zone: &str) -> Result { - let time_zone = parse_time_zone(time_zone)?; + let time_zone = lookup_time_zone(time_zone)?; let instant_utc = Utc.timestamp(input / 1_000_000, (input % 1_000_000 * 1000) as u32); let instant_local = instant_utc.with_timezone(&time_zone); let naive = instant_local.naive_local();