-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: Timestamp with timezone not considered join on
#8150
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -140,6 +140,17 @@ SELECT | |||||
FROM | ||||||
test_timestamps_table_source; | ||||||
|
||||||
# create a table of timestamps with time zone | ||||||
statement ok | ||||||
CREATE TABLE test_timestamps_tz_table as | ||||||
SELECT | ||||||
arrow_cast(ts::timestamp::bigint, 'Timestamp(Nanosecond, Some("UTC"))') as nanos, | ||||||
arrow_cast(ts::timestamp::bigint / 1000, 'Timestamp(Microsecond, Some("UTC"))') as micros, | ||||||
arrow_cast(ts::timestamp::bigint / 1000000, 'Timestamp(Millisecond, Some("UTC"))') as millis, | ||||||
arrow_cast(ts::timestamp::bigint / 1000000000, 'Timestamp(Second, Some("UTC"))') as secs, | ||||||
names | ||||||
FROM | ||||||
test_timestamps_table_source; | ||||||
|
||||||
|
||||||
statement ok | ||||||
|
@@ -2460,6 +2471,16 @@ test_timestamps_table | |||||
NULL NULL NULL NULL Row 2 | ||||||
2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10 Row 3 | ||||||
|
||||||
# show the contents of the timestamp with timezone table | ||||||
query PPPPT | ||||||
select * from | ||||||
test_timestamps_tz_table | ||||||
---- | ||||||
2018-11-13T17:11:10.011375885Z 2018-11-13T17:11:10.011375Z 2018-11-13T17:11:10.011Z 2018-11-13T17:11:10Z Row 0 | ||||||
2011-12-13T11:13:10.123450Z 2011-12-13T11:13:10.123450Z 2011-12-13T11:13:10.123Z 2011-12-13T11:13:10Z Row 1 | ||||||
NULL NULL NULL NULL Row 2 | ||||||
2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10Z Row 3 | ||||||
|
||||||
# test timestamp join on nanos datatype | ||||||
query PPPPTPPPPT rowsort | ||||||
SELECT * FROM test_timestamps_table as t1 JOIN (SELECT * FROM test_timestamps_table ) as t2 ON t1.nanos = t2.nanos; | ||||||
|
@@ -2468,6 +2489,14 @@ SELECT * FROM test_timestamps_table as t1 JOIN (SELECT * FROM test_timestamps_ta | |||||
2018-11-13T17:11:10.011375885 2018-11-13T17:11:10.011375 2018-11-13T17:11:10.011 2018-11-13T17:11:10 Row 0 2018-11-13T17:11:10.011375885 2018-11-13T17:11:10.011375 2018-11-13T17:11:10.011 2018-11-13T17:11:10 Row 0 | ||||||
2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10 Row 3 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10 Row 3 | ||||||
|
||||||
# test timestamp with timezone join on nanos datatype | ||||||
query PPPPTPPPPT rowsort | ||||||
SELECT * FROM test_timestamps_tz_table as t1 JOIN (SELECT * FROM test_timestamps_tz_table ) as t2 ON t1.nanos = t2.nanos; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The original test on timestamp used |
||||||
---- | ||||||
2011-12-13T11:13:10.123450Z 2011-12-13T11:13:10.123450Z 2011-12-13T11:13:10.123Z 2011-12-13T11:13:10Z Row 1 2011-12-13T11:13:10.123450Z 2011-12-13T11:13:10.123450Z 2011-12-13T11:13:10.123Z 2011-12-13T11:13:10Z Row 1 | ||||||
2018-11-13T17:11:10.011375885Z 2018-11-13T17:11:10.011375Z 2018-11-13T17:11:10.011Z 2018-11-13T17:11:10Z Row 0 2018-11-13T17:11:10.011375885Z 2018-11-13T17:11:10.011375Z 2018-11-13T17:11:10.011Z 2018-11-13T17:11:10Z Row 0 | ||||||
2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10Z Row 3 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10Z Row 3 | ||||||
|
||||||
# test timestamp join on micros datatype | ||||||
query PPPPTPPPPT rowsort | ||||||
SELECT * FROM test_timestamps_table as t1 JOIN (SELECT * FROM test_timestamps_table ) as t2 ON t1.micros = t2.micros | ||||||
|
@@ -2476,6 +2505,14 @@ SELECT * FROM test_timestamps_table as t1 JOIN (SELECT * FROM test_timestamps_ta | |||||
2018-11-13T17:11:10.011375885 2018-11-13T17:11:10.011375 2018-11-13T17:11:10.011 2018-11-13T17:11:10 Row 0 2018-11-13T17:11:10.011375885 2018-11-13T17:11:10.011375 2018-11-13T17:11:10.011 2018-11-13T17:11:10 Row 0 | ||||||
2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10 Row 3 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10 Row 3 | ||||||
|
||||||
# test timestamp with timezone join on micros datatype | ||||||
query PPPPTPPPPT rowsort | ||||||
SELECT * FROM test_timestamps_tz_table as t1 JOIN (SELECT * FROM test_timestamps_tz_table ) as t2 ON t1.micros = t2.micros | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
---- | ||||||
2011-12-13T11:13:10.123450Z 2011-12-13T11:13:10.123450Z 2011-12-13T11:13:10.123Z 2011-12-13T11:13:10Z Row 1 2011-12-13T11:13:10.123450Z 2011-12-13T11:13:10.123450Z 2011-12-13T11:13:10.123Z 2011-12-13T11:13:10Z Row 1 | ||||||
2018-11-13T17:11:10.011375885Z 2018-11-13T17:11:10.011375Z 2018-11-13T17:11:10.011Z 2018-11-13T17:11:10Z Row 0 2018-11-13T17:11:10.011375885Z 2018-11-13T17:11:10.011375Z 2018-11-13T17:11:10.011Z 2018-11-13T17:11:10Z Row 0 | ||||||
2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10Z Row 3 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10Z Row 3 | ||||||
|
||||||
# test timestamp join on millis datatype | ||||||
query PPPPTPPPPT rowsort | ||||||
SELECT * FROM test_timestamps_table as t1 JOIN (SELECT * FROM test_timestamps_table ) as t2 ON t1.millis = t2.millis | ||||||
|
@@ -2484,6 +2521,46 @@ SELECT * FROM test_timestamps_table as t1 JOIN (SELECT * FROM test_timestamps_ta | |||||
2018-11-13T17:11:10.011375885 2018-11-13T17:11:10.011375 2018-11-13T17:11:10.011 2018-11-13T17:11:10 Row 0 2018-11-13T17:11:10.011375885 2018-11-13T17:11:10.011375 2018-11-13T17:11:10.011 2018-11-13T17:11:10 Row 0 | ||||||
2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10 Row 3 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10 Row 3 | ||||||
|
||||||
# test timestamp with timezone join on millis datatype | ||||||
query PPPPTPPPPT rowsort | ||||||
SELECT * FROM test_timestamps_tz_table as t1 JOIN (SELECT * FROM test_timestamps_tz_table ) as t2 ON t1.millis = t2.millis | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
---- | ||||||
2011-12-13T11:13:10.123450Z 2011-12-13T11:13:10.123450Z 2011-12-13T11:13:10.123Z 2011-12-13T11:13:10Z Row 1 2011-12-13T11:13:10.123450Z 2011-12-13T11:13:10.123450Z 2011-12-13T11:13:10.123Z 2011-12-13T11:13:10Z Row 1 | ||||||
2018-11-13T17:11:10.011375885Z 2018-11-13T17:11:10.011375Z 2018-11-13T17:11:10.011Z 2018-11-13T17:11:10Z Row 0 2018-11-13T17:11:10.011375885Z 2018-11-13T17:11:10.011375Z 2018-11-13T17:11:10.011Z 2018-11-13T17:11:10Z Row 0 | ||||||
2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10Z Row 3 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10Z Row 3 | ||||||
|
||||||
#### | ||||||
# Config setup | ||||||
#### | ||||||
|
||||||
statement ok | ||||||
set datafusion.explain.logical_plan_only = false; | ||||||
|
||||||
statement ok | ||||||
set datafusion.optimizer.prefer_hash_join = true; | ||||||
|
||||||
# explain hash join on timestamp with timezone type | ||||||
query TT | ||||||
EXPLAIN SELECT * FROM test_timestamps_tz_table as t1 JOIN test_timestamps_tz_table as t2 ON t1.millis = t2.millis | ||||||
---- | ||||||
logical_plan | ||||||
Inner Join: t1.millis = t2.millis | ||||||
--SubqueryAlias: t1 | ||||||
----TableScan: test_timestamps_tz_table projection=[nanos, micros, millis, secs, names] | ||||||
--SubqueryAlias: t2 | ||||||
----TableScan: test_timestamps_tz_table projection=[nanos, micros, millis, secs, names] | ||||||
physical_plan | ||||||
CoalesceBatchesExec: target_batch_size=2 | ||||||
--HashJoinExec: mode=Partitioned, join_type=Inner, on=[(millis@2, millis@2)] | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I verified that this test covers the code, and without the changes in this PR and it results in
|
||||||
----CoalesceBatchesExec: target_batch_size=2 | ||||||
------RepartitionExec: partitioning=Hash([millis@2], 2), input_partitions=2 | ||||||
--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 | ||||||
----------MemoryExec: partitions=1, partition_sizes=[1] | ||||||
----CoalesceBatchesExec: target_batch_size=2 | ||||||
------RepartitionExec: partitioning=Hash([millis@2], 2), input_partitions=2 | ||||||
--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 | ||||||
----------MemoryExec: partitions=1, partition_sizes=[1] | ||||||
|
||||||
# left_join_using_2 | ||||||
query II | ||||||
SELECT t1.c1, t2.c2 FROM test_partition_table t1 JOIN test_partition_table t2 USING (c2) ORDER BY t2.c2; | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it seems like there is no reason to check the timeunit either (as all branches are true)
However that is not something introduced in this PR