Skip to content

Commit

Permalink
feat(expr): AT TIME ZONE to convert between timestamp and `timest…
Browse files Browse the repository at this point in the history
…amptz` (#5968)

* parse AT TIME ZONE

* expr: AtTimeZone

* time zone parsing and conversion

* unit tests

* e2e tests

* comments

* parse_time_zone -> lookup_time_zone

Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
  • Loading branch information
xiangjinwu and mergify[bot] authored Oct 25, 2022
1 parent 6cdabc6 commit 759b374
Show file tree
Hide file tree
Showing 13 changed files with 318 additions and 2 deletions.
67 changes: 67 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions e2e_test/batch/functions/at_time_zone.slt.part
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# zone-independent instant -> local naive time
query T
select '2022-01-01 00:00:00Z'::timestamp with time zone AT TIME ZONE 'US/Pacific';
----
2021-12-31 16:00:00

# local naive time -> zone-independent instant
query T
select '2021-12-31 16:00:00'::timestamp AT TIME ZONE 'us/pacific';
----
2022-01-01 00:00:00+00:00

# Unlike PostgreSQL, we do not support invalid local time during daylight saving forward yet.
statement error
select '2022-03-13 02:00:00'::timestamp AT TIME ZONE 'us/pacific';

# Like PostgreSQL, ambiguous local time during daylight saving backward are interpreted as after the transition.
query T
select '2022-11-06 01:00:00'::timestamp AT TIME ZONE 'us/pacific';
----
2022-11-06 09:00:00+00:00
1 change: 1 addition & 0 deletions proto/expr.proto
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ message ExprNode {
EXTRACT = 101;
TUMBLE_START = 103;
TO_TIMESTAMP = 104;
AT_TIME_ZONE = 105;
// other functions
CAST = 201;
SUBSTR = 202;
Expand Down
1 change: 1 addition & 0 deletions src/expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ async-trait = "0.1"
byteorder = "1"
bytes = "1"
chrono = { version = "0.4", default-features = false, features = ["clock", "std"] }
chrono-tz = { version = "0.7", features = ["case-insensitive"] }
crc32fast = "1"
dyn-clone = "1"
either = "1"
Expand Down
30 changes: 30 additions & 0 deletions src/expr/src/expr/expr_binary_nonnull.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ use crate::vector_op::extract::{
use crate::vector_op::like::like_default;
use crate::vector_op::position::position;
use crate::vector_op::round::round_digits;
use crate::vector_op::timestampz::{timestamp_at_time_zone, timestampz_at_time_zone};
use crate::vector_op::tumble::{
tumble_start_date, tumble_start_date_time, tumble_start_timestampz,
};
Expand Down Expand Up @@ -354,6 +355,34 @@ fn build_extract_expr(
Ok(expr)
}

fn build_at_time_zone_expr(
ret: DataType,
l: BoxedExpression,
r: BoxedExpression,
) -> Result<BoxedExpression> {
let expr: BoxedExpression = match l.return_type() {
DataType::Timestamp => Box::new(BinaryExpression::<
NaiveDateTimeArray,
Utf8Array,
I64Array,
_,
>::new(l, r, ret, timestamp_at_time_zone)),
DataType::Timestampz => Box::new(BinaryExpression::<
I64Array,
Utf8Array,
NaiveDateTimeArray,
_,
>::new(l, r, ret, timestampz_at_time_zone)),
_ => {
return Err(ExprError::UnsupportedFunction(format!(
"{:?} AT TIME ZONE is not supported yet!",
l.return_type()
)))
}
};
Ok(expr)
}

pub fn new_binary_expr(
expr_type: Type,
ret: DataType,
Expand Down Expand Up @@ -515,6 +544,7 @@ pub fn new_binary_expr(
}
}
Type::Extract => build_extract_expr(ret, l, r)?,
Type::AtTimeZone => build_at_time_zone_expr(ret, l, r)?,
Type::RoundDigit => Box::new(
BinaryExpression::<DecimalArray, I32Array, DecimalArray, _>::new(
l,
Expand Down
2 changes: 1 addition & 1 deletion src/expr/src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ pub fn build_from_prost(prost: &ExprNode) -> Result<BoxedExpression> {
Equal | NotEqual | LessThan | LessThanOrEqual | GreaterThan | GreaterThanOrEqual | Add
| Subtract | Multiply | Divide | Modulus | Extract | RoundDigit | TumbleStart
| Position | BitwiseShiftLeft | BitwiseShiftRight | BitwiseAnd | BitwiseOr | BitwiseXor
| ConcatOp => build_binary_expr_prost(prost),
| ConcatOp | AtTimeZone => build_binary_expr_prost(prost),
And | Or | IsDistinctFrom | IsNotDistinctFrom | ArrayAccess => {
build_nullable_binary_expr_prost(prost)
}
Expand Down
135 changes: 134 additions & 1 deletion src/expr/src/vector_op/timestampz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,22 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use chrono::{TimeZone, Utc};
use chrono_tz::Tz;
use num_traits::ToPrimitive;
use risingwave_common::types::OrderedF64;
use risingwave_common::types::{NaiveDateTimeWrapper, OrderedF64};

use crate::{ExprError, Result};

/// Just a wrapper to reuse the `map_err` logic.
#[inline(always)]
fn lookup_time_zone(time_zone: &str) -> Result<Tz> {
Tz::from_str_insensitive(time_zone).map_err(|e| ExprError::InvalidParam {
name: "time_zone",
reason: e,
})
}

#[inline(always)]
pub fn f64_sec_to_timestampz(elem: OrderedF64) -> Result<i64> {
// TODO(#4515): handle +/- infinity
Expand All @@ -25,3 +36,125 @@ pub fn f64_sec_to_timestampz(elem: OrderedF64) -> Result<i64> {
.to_i64()
.ok_or(ExprError::NumericOutOfRange)
}

#[inline(always)]
pub fn timestamp_at_time_zone(input: NaiveDateTimeWrapper, time_zone: &str) -> Result<i64> {
let time_zone = lookup_time_zone(time_zone)?;
// https://www.postgresql.org/docs/current/datetime-invalid-input.html
// Special cases:
// * invalid time during daylight forward
// * PostgreSQL uses UTC offset before the transition
// * We report an error (FIXME)
// * ambiguous time during daylight backward
// * We follow PostgreSQL to use UTC offset after the transition
let instant_local = input
.0
.and_local_timezone(time_zone)
.latest()
.ok_or_else(|| ExprError::InvalidParam {
name: "local timestamp",
reason: format!(
"fail to interpret local timestamp \"{}\" in time zone \"{}\"",
input, time_zone
),
})?;
let usec = instant_local.timestamp_micros();
Ok(usec)
}

#[inline(always)]
pub fn timestampz_at_time_zone(input: i64, time_zone: &str) -> Result<NaiveDateTimeWrapper> {
let time_zone = lookup_time_zone(time_zone)?;
let instant_utc = Utc.timestamp(input / 1_000_000, (input % 1_000_000 * 1000) as u32);
let instant_local = instant_utc.with_timezone(&time_zone);
let naive = instant_local.naive_local();
Ok(NaiveDateTimeWrapper(naive))
}

#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;

use itertools::Itertools;

use super::*;
use crate::vector_op::cast::{str_to_timestamp, str_to_timestampz};

#[test]
fn test_time_zone_conversion() {
let zones = ["US/Pacific", "ASIA/SINGAPORE", "europe/zurich"];
#[rustfmt::skip]
let test_cases = [
// winter
["2022-01-01 00:00:00Z", "2021-12-31 16:00:00", "2022-01-01 08:00:00", "2022-01-01 01:00:00"],
// summer
["2022-07-01 00:00:00Z", "2022-06-30 17:00:00", "2022-07-01 08:00:00", "2022-07-01 02:00:00"],
// before and after PST -> PDT, where [02:00, 03:00) are invalid
["2022-03-13 09:59:00Z", "2022-03-13 01:59:00", "2022-03-13 17:59:00", "2022-03-13 10:59:00"],
["2022-03-13 10:00:00Z", "2022-03-13 03:00:00", "2022-03-13 18:00:00", "2022-03-13 11:00:00"],
// before and after CET -> CEST, where [02:00. 03:00) are invalid
["2022-03-27 00:59:00Z", "2022-03-26 17:59:00", "2022-03-27 08:59:00", "2022-03-27 01:59:00"],
["2022-03-27 01:00:00Z", "2022-03-26 18:00:00", "2022-03-27 09:00:00", "2022-03-27 03:00:00"],
// before and after CEST -> CET, where [02:00, 03:00) are ambiguous
["2022-10-29 23:59:00Z", "2022-10-29 16:59:00", "2022-10-30 07:59:00", "2022-10-30 01:59:00"],
["2022-10-30 02:00:00Z", "2022-10-29 19:00:00", "2022-10-30 10:00:00", "2022-10-30 03:00:00"],
// before and after PDT -> PST, where [01:00, 02:00) are ambiguous
["2022-11-06 07:59:00Z", "2022-11-06 00:59:00", "2022-11-06 15:59:00", "2022-11-06 08:59:00"],
["2022-11-06 10:00:00Z", "2022-11-06 02:00:00", "2022-11-06 18:00:00", "2022-11-06 11:00:00"],
];
for case in test_cases {
let usecs = str_to_timestampz(case[0]).unwrap();
case.iter().skip(1).zip_eq(zones).for_each(|(local, zone)| {
let local = str_to_timestamp(local).unwrap();

let actual = timestampz_at_time_zone(usecs, zone).unwrap();
assert_eq!(local, actual);

let actual = timestamp_at_time_zone(local, zone).unwrap();
assert_eq!(usecs, actual);
});
}
}

#[test]
fn test_time_zone_conversion_daylight_forward() {
for (local, zone) in [
("2022-03-13 02:00:00", "US/Pacific"),
("2022-03-13 02:59:00", "US/Pacific"),
("2022-03-27 02:00:00", "europe/zurich"),
("2022-03-27 02:59:00", "europe/zurich"),
] {
let local = str_to_timestamp(local).unwrap();

let actual = timestamp_at_time_zone(local, zone);
assert_matches!(actual, Err(_));
}
}

#[test]
fn test_time_zone_conversion_daylight_backward() {
#[rustfmt::skip]
let test_cases = [
("2022-10-30 00:00:00Z", "2022-10-30 02:00:00", "europe/zurich", false),
("2022-10-30 00:59:00Z", "2022-10-30 02:59:00", "europe/zurich", false),
("2022-10-30 01:00:00Z", "2022-10-30 02:00:00", "europe/zurich", true),
("2022-10-30 01:59:00Z", "2022-10-30 02:59:00", "europe/zurich", true),
("2022-11-06 08:00:00Z", "2022-11-06 01:00:00", "US/Pacific", false),
("2022-11-06 08:59:00Z", "2022-11-06 01:59:00", "US/Pacific", false),
("2022-11-06 09:00:00Z", "2022-11-06 01:00:00", "US/Pacific", true),
("2022-11-06 09:59:00Z", "2022-11-06 01:59:00", "US/Pacific", true),
];
for (instant, local, zone, preferred) in test_cases {
let usecs = str_to_timestampz(instant).unwrap();
let local = str_to_timestamp(local).unwrap();

let actual = timestampz_at_time_zone(usecs, zone).unwrap();
assert_eq!(local, actual);

if preferred {
let actual = timestamp_at_time_zone(local, zone).unwrap();
assert_eq!(usecs, actual)
}
}
}
}
10 changes: 10 additions & 0 deletions src/frontend/src/binder/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ impl Binder {
} => self.bind_in_list(*expr, list, negated),
// special syntax for date/time
Expr::Extract { field, expr } => self.bind_extract(field, *expr),
Expr::AtTimeZone {
timestamp,
time_zone,
} => self.bind_at_time_zone(*timestamp, time_zone),
// special syntaxt for string
Expr::Trim { expr, trim_where } => self.bind_trim(*expr, trim_where),
Expr::Substring {
Expand Down Expand Up @@ -144,6 +148,12 @@ impl Binder {
.into())
}

pub(super) fn bind_at_time_zone(&mut self, input: Expr, time_zone: String) -> Result<ExprImpl> {
let input = self.bind_expr(input)?;
let time_zone = self.bind_string(time_zone)?.into();
FunctionCall::new(ExprType::AtTimeZone, vec![input, time_zone]).map(Into::into)
}

pub(super) fn bind_in_list(
&mut self,
expr: Expr,
Expand Down
Loading

0 comments on commit 759b374

Please sign in to comment.