Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(expr): AT TIME ZONE to convert between timestamp and timestamptz #5968

Merged
merged 8 commits into from
Oct 25, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions e2e_test/batch/functions/at_time_zone.slt.part
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# zone-independent instant -> local naive time
query T
select '2022-01-01 00:00:00Z'::timestamp with time zone AT TIME ZONE 'US/Pacific';
----
2021-12-31 16:00:00

# local naive time -> zone-independent instant
query T
select '2021-12-31 16:00:00'::timestamp AT TIME ZONE 'us/pacific';
----
2022-01-01 00:00:00+00:00

# Unlike PostgreSQL, we do not support invalid local time during daylight saving forward yet.
statement error
select '2022-03-13 02:00:00'::timestamp AT TIME ZONE 'us/pacific';

# Like PostgreSQL, ambiguous local time during daylight saving backward are interpreted as after the transition.
query T
select '2022-11-06 01:00:00'::timestamp AT TIME ZONE 'us/pacific';
----
2022-11-06 09:00:00+00:00
1 change: 1 addition & 0 deletions proto/expr.proto
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ message ExprNode {
EXTRACT = 101;
TUMBLE_START = 103;
TO_TIMESTAMP = 104;
AT_TIME_ZONE = 105;
// other functions
CAST = 201;
SUBSTR = 202;
Expand Down
1 change: 1 addition & 0 deletions src/expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ async-trait = "0.1"
byteorder = "1"
bytes = "1"
chrono = { version = "0.4", default-features = false, features = ["clock", "std"] }
chrono-tz = { version = "0.7", features = ["case-insensitive"] }
crc32fast = "1"
dyn-clone = "1"
either = "1"
Expand Down
30 changes: 30 additions & 0 deletions src/expr/src/expr/expr_binary_nonnull.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ use crate::vector_op::extract::{
use crate::vector_op::like::like_default;
use crate::vector_op::position::position;
use crate::vector_op::round::round_digits;
use crate::vector_op::timestampz::{timestamp_at_time_zone, timestampz_at_time_zone};
use crate::vector_op::tumble::{
tumble_start_date, tumble_start_date_time, tumble_start_timestampz,
};
Expand Down Expand Up @@ -354,6 +355,34 @@ fn build_extract_expr(
Ok(expr)
}

fn build_at_time_zone_expr(
ret: DataType,
l: BoxedExpression,
r: BoxedExpression,
) -> Result<BoxedExpression> {
let expr: BoxedExpression = match l.return_type() {
DataType::Timestamp => Box::new(BinaryExpression::<
NaiveDateTimeArray,
Utf8Array,
I64Array,
_,
>::new(l, r, ret, timestamp_at_time_zone)),
DataType::Timestampz => Box::new(BinaryExpression::<
I64Array,
Utf8Array,
NaiveDateTimeArray,
_,
>::new(l, r, ret, timestampz_at_time_zone)),
_ => {
return Err(ExprError::UnsupportedFunction(format!(
"{:?} AT TIME ZONE is not supported yet!",
l.return_type()
)))
}
};
Ok(expr)
}

pub fn new_binary_expr(
expr_type: Type,
ret: DataType,
Expand Down Expand Up @@ -515,6 +544,7 @@ pub fn new_binary_expr(
}
}
Type::Extract => build_extract_expr(ret, l, r)?,
Type::AtTimeZone => build_at_time_zone_expr(ret, l, r)?,
Type::RoundDigit => Box::new(
BinaryExpression::<DecimalArray, I32Array, DecimalArray, _>::new(
l,
Expand Down
2 changes: 1 addition & 1 deletion src/expr/src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ pub fn build_from_prost(prost: &ExprNode) -> Result<BoxedExpression> {
Equal | NotEqual | LessThan | LessThanOrEqual | GreaterThan | GreaterThanOrEqual | Add
| Subtract | Multiply | Divide | Modulus | Extract | RoundDigit | TumbleStart
| Position | BitwiseShiftLeft | BitwiseShiftRight | BitwiseAnd | BitwiseOr | BitwiseXor
| ConcatOp => build_binary_expr_prost(prost),
| ConcatOp | AtTimeZone => build_binary_expr_prost(prost),
And | Or | IsDistinctFrom | IsNotDistinctFrom | ArrayAccess => {
build_nullable_binary_expr_prost(prost)
}
Expand Down
135 changes: 134 additions & 1 deletion src/expr/src/vector_op/timestampz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,22 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use chrono::{TimeZone, Utc};
use chrono_tz::Tz;
use num_traits::ToPrimitive;
use risingwave_common::types::OrderedF64;
use risingwave_common::types::{NaiveDateTimeWrapper, OrderedF64};

use crate::{ExprError, Result};

/// Just a wrapper to reuse the `map_err` logic.
#[inline(always)]
fn parse_time_zone(time_zone: &str) -> Result<Tz> {
Tz::from_str_insensitive(time_zone).map_err(|e| ExprError::InvalidParam {
name: "time_zone",
reason: e,
})
}

#[inline(always)]
pub fn f64_sec_to_timestampz(elem: OrderedF64) -> Result<i64> {
// TODO(#4515): handle +/- infinity
Expand All @@ -25,3 +36,125 @@ pub fn f64_sec_to_timestampz(elem: OrderedF64) -> Result<i64> {
.to_i64()
.ok_or(ExprError::NumericOutOfRange)
}

#[inline(always)]
pub fn timestamp_at_time_zone(input: NaiveDateTimeWrapper, time_zone: &str) -> Result<i64> {
let time_zone = parse_time_zone(time_zone)?;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems we need to parse the timezone every row, not sure whether it can be heavy. 🤔

Some possible optimizations:

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The parsing is a lookup of a static and perfect hash table (crate phf), where hash is done in a case-insensitive manner. So memoization / cache has no advantage here.

It is still true that most cases this is a constant, though.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool. Then there should be no problem.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated the function name to lookup_time_zone.

// https://www.postgresql.org/docs/current/datetime-invalid-input.html
// Special cases:
// * invalid time during daylight forward
// * PostgreSQL uses UTC offset before the transition
// * We report an error (FIXME)
// * ambiguous time during daylight backward
// * We follow PostgreSQL to use UTC offset after the transition
let instant_local = input
.0
.and_local_timezone(time_zone)
.latest()
.ok_or_else(|| ExprError::InvalidParam {
name: "local timestamp",
reason: format!(
"fail to interpret local timestamp \"{}\" in time zone \"{}\"",
input, time_zone
),
})?;
let usec = instant_local.timestamp_micros();
Ok(usec)
}

#[inline(always)]
pub fn timestampz_at_time_zone(input: i64, time_zone: &str) -> Result<NaiveDateTimeWrapper> {
let time_zone = parse_time_zone(time_zone)?;
let instant_utc = Utc.timestamp(input / 1_000_000, (input % 1_000_000 * 1000) as u32);
let instant_local = instant_utc.with_timezone(&time_zone);
let naive = instant_local.naive_local();
Ok(NaiveDateTimeWrapper(naive))
}

#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;

use itertools::Itertools;

use super::*;
use crate::vector_op::cast::{str_to_timestamp, str_to_timestampz};

#[test]
fn test_time_zone_conversion() {
let zones = ["US/Pacific", "ASIA/SINGAPORE", "europe/zurich"];
#[rustfmt::skip]
let test_cases = [
// winter
["2022-01-01 00:00:00Z", "2021-12-31 16:00:00", "2022-01-01 08:00:00", "2022-01-01 01:00:00"],
// summer
["2022-07-01 00:00:00Z", "2022-06-30 17:00:00", "2022-07-01 08:00:00", "2022-07-01 02:00:00"],
// before and after PST -> PDT, where [02:00, 03:00) are invalid
["2022-03-13 09:59:00Z", "2022-03-13 01:59:00", "2022-03-13 17:59:00", "2022-03-13 10:59:00"],
["2022-03-13 10:00:00Z", "2022-03-13 03:00:00", "2022-03-13 18:00:00", "2022-03-13 11:00:00"],
// before and after CET -> CEST, where [02:00. 03:00) are invalid
["2022-03-27 00:59:00Z", "2022-03-26 17:59:00", "2022-03-27 08:59:00", "2022-03-27 01:59:00"],
["2022-03-27 01:00:00Z", "2022-03-26 18:00:00", "2022-03-27 09:00:00", "2022-03-27 03:00:00"],
// before and after CEST -> CET, where [02:00, 03:00) are ambiguous
["2022-10-29 23:59:00Z", "2022-10-29 16:59:00", "2022-10-30 07:59:00", "2022-10-30 01:59:00"],
["2022-10-30 02:00:00Z", "2022-10-29 19:00:00", "2022-10-30 10:00:00", "2022-10-30 03:00:00"],
// before and after PDT -> PST, where [01:00, 02:00) are ambiguous
["2022-11-06 07:59:00Z", "2022-11-06 00:59:00", "2022-11-06 15:59:00", "2022-11-06 08:59:00"],
["2022-11-06 10:00:00Z", "2022-11-06 02:00:00", "2022-11-06 18:00:00", "2022-11-06 11:00:00"],
];
for case in test_cases {
let usecs = str_to_timestampz(case[0]).unwrap();
case.iter().skip(1).zip_eq(zones).for_each(|(local, zone)| {
let local = str_to_timestamp(local).unwrap();

let actual = timestampz_at_time_zone(usecs, zone).unwrap();
assert_eq!(local, actual);

let actual = timestamp_at_time_zone(local, zone).unwrap();
assert_eq!(usecs, actual);
});
}
}

#[test]
fn test_time_zone_conversion_daylight_forward() {
for (local, zone) in [
("2022-03-13 02:00:00", "US/Pacific"),
("2022-03-13 02:59:00", "US/Pacific"),
("2022-03-27 02:00:00", "europe/zurich"),
("2022-03-27 02:59:00", "europe/zurich"),
] {
let local = str_to_timestamp(local).unwrap();

let actual = timestamp_at_time_zone(local, zone);
assert_matches!(actual, Err(_));
}
}

#[test]
fn test_time_zone_conversion_daylight_backward() {
#[rustfmt::skip]
let test_cases = [
("2022-10-30 00:00:00Z", "2022-10-30 02:00:00", "europe/zurich", false),
("2022-10-30 00:59:00Z", "2022-10-30 02:59:00", "europe/zurich", false),
("2022-10-30 01:00:00Z", "2022-10-30 02:00:00", "europe/zurich", true),
("2022-10-30 01:59:00Z", "2022-10-30 02:59:00", "europe/zurich", true),
("2022-11-06 08:00:00Z", "2022-11-06 01:00:00", "US/Pacific", false),
("2022-11-06 08:59:00Z", "2022-11-06 01:59:00", "US/Pacific", false),
("2022-11-06 09:00:00Z", "2022-11-06 01:00:00", "US/Pacific", true),
("2022-11-06 09:59:00Z", "2022-11-06 01:59:00", "US/Pacific", true),
];
for (instant, local, zone, preferred) in test_cases {
let usecs = str_to_timestampz(instant).unwrap();
let local = str_to_timestamp(local).unwrap();

let actual = timestampz_at_time_zone(usecs, zone).unwrap();
assert_eq!(local, actual);

if preferred {
let actual = timestamp_at_time_zone(local, zone).unwrap();
assert_eq!(usecs, actual)
}
}
}
}
10 changes: 10 additions & 0 deletions src/frontend/src/binder/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ impl Binder {
} => self.bind_in_list(*expr, list, negated),
// special syntax for date/time
Expr::Extract { field, expr } => self.bind_extract(field, *expr),
Expr::AtTimeZone {
timestamp,
time_zone,
} => self.bind_at_time_zone(*timestamp, time_zone),
// special syntaxt for string
Expr::Trim { expr, trim_where } => self.bind_trim(*expr, trim_where),
Expr::Substring {
Expand Down Expand Up @@ -144,6 +148,12 @@ impl Binder {
.into())
}

pub(super) fn bind_at_time_zone(&mut self, input: Expr, time_zone: String) -> Result<ExprImpl> {
let input = self.bind_expr(input)?;
let time_zone = self.bind_string(time_zone)?.into();
FunctionCall::new(ExprType::AtTimeZone, vec![input, time_zone]).map(Into::into)
}

pub(super) fn bind_in_list(
&mut self,
expr: Expr,
Expand Down
Loading