Skip to content

Commit

Permalink
Merge #43514
Browse files Browse the repository at this point in the history
43514: colexec: support TIMESTAMPTZ type r=yuzefovich a=yuzefovich

**colexec: support TIMESTAMPTZ type**

This commit adds the support for TimestampTZ data type which is
represented in the same way as Timestamp (as 'time.Time'). We already
had everything in place, so only the type-conversion was needed.

Addresses: #42043.

Release note (sql change): vectorized engine now supports TIMESTAMPTZ
data type.

**sqlsmith: add several types to vecSeedTable**

This commit adds previously supported INT2 and INT4 types to
vecSeedTable as well as newly supported TIMESTAMPTZ.

Release note: None

Co-authored-by: Yahor Yuzefovich <[email protected]>
  • Loading branch information
craig[bot] and yuzefovich committed Dec 31, 2019
2 parents f1f1525 + b3faa6b commit 0d1ae9c
Show file tree
Hide file tree
Showing 9 changed files with 73 additions and 32 deletions.
39 changes: 26 additions & 13 deletions pkg/col/colserde/arrowbatchconverter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ func randomBatch(allocator *colexec.Allocator) ([]coltypes.T, coldata.Batch) {

availableTyps := make([]coltypes.T, 0, len(coltypes.AllTypes))
for _, typ := range coltypes.AllTypes {
// TODO(asubiotto,jordan): We do not support decimal, timestamp conversion yet.
if typ == coltypes.Decimal || typ == coltypes.Timestamp {
// TODO(asubiotto,jordan): We do not support decimal conversion yet.
if typ == coltypes.Decimal {
continue
}
availableTyps = append(availableTyps, typ)
Expand Down Expand Up @@ -107,6 +107,17 @@ func assertEqualBatches(t *testing.T, expected, actual coldata.Batch) {
t.Fatalf("bytes mismatch at index %d:\nexpected:\n%sactual:\n%s", i, expectedBytes, resultBytes)
}
}
} else if typ == coltypes.Timestamp {
// Cannot use require.Equal for this type.
// TODO(yuzefovich): Again, why not?
expectedTimestamp := expectedVec.Timestamp()[0:expected.Length()]
resultTimestamp := actualVec.Timestamp()[0:actual.Length()]
require.Equal(t, len(expectedTimestamp), len(resultTimestamp))
for i := range expectedTimestamp {
if !expectedTimestamp[i].Equal(resultTimestamp[i]) {
t.Fatalf("Timestamp mismatch at index %d:\nexpected:\n%sactual:\n%s", i, expectedTimestamp[i], resultTimestamp[i])
}
}
} else {
require.Equal(
t,
Expand Down Expand Up @@ -177,19 +188,21 @@ func roundTripBatch(
func TestRecordBatchRoundtripThroughBytes(t *testing.T) {
defer leaktest.AfterTest(t)()

typs, b := randomBatch(testAllocator)
c, err := colserde.NewArrowBatchConverter(typs)
require.NoError(t, err)
r, err := colserde.NewRecordBatchSerializer(typs)
require.NoError(t, err)
for run := 0; run < 10; run++ {
typs, b := randomBatch(testAllocator)
c, err := colserde.NewArrowBatchConverter(typs)
require.NoError(t, err)
r, err := colserde.NewRecordBatchSerializer(typs)
require.NoError(t, err)

// Make a copy of the original batch because the converter modifies and casts
// data without copying for performance reasons.
expected := copyBatch(b)
actual, err := roundTripBatch(b, c, r)
require.NoError(t, err)
// Make a copy of the original batch because the converter modifies and
// casts data without copying for performance reasons.
expected := copyBatch(b)
actual, err := roundTripBatch(b, c, r)
require.NoError(t, err)

assertEqualBatches(t, expected, actual)
assertEqualBatches(t, expected, actual)
}
}

func BenchmarkArrowBatchConverter(b *testing.B) {
Expand Down
7 changes: 7 additions & 0 deletions pkg/col/colserde/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,11 @@ func schema(fb *flatbuffers.Builder, typs []coltypes.T) flatbuffers.UOffsetT {
arrowserde.FloatingPointAddPrecision(fb, arrowserde.PrecisionDOUBLE)
fbTypOffset = arrowserde.FloatingPointEnd(fb)
fbTyp = arrowserde.TypeFloatingPoint
case coltypes.Timestamp:
// Timestamps are marshaled into bytes, so we use binary headers.
arrowserde.BinaryStart(fb)
fbTypOffset = arrowserde.BinaryEnd(fb)
fbTyp = arrowserde.TypeTimestamp
default:
panic(errors.Errorf(`don't know how to map %s`, typ))
}
Expand Down Expand Up @@ -451,6 +456,8 @@ func typeFromField(field *arrowserde.Field) (coltypes.T, error) {
default:
return coltypes.Unhandled, errors.Errorf(`unhandled float precision %d`, floatType.Precision())
}
case arrowserde.TypeTimestamp:
return coltypes.Timestamp, nil
}
// It'd be nice if this error could include more details, but flatbuffers
// doesn't make a String method or anything like that.
Expand Down
3 changes: 3 additions & 0 deletions pkg/internal/sqlsmith/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,13 @@ CREATE INVERTED INDEX on seed (_jsonb);
vecSeedTable = `
CREATE TABLE IF NOT EXISTS seed_vec AS
SELECT
g::INT2 AS _int2,
g::INT4 AS _int4,
g::INT8 AS _int8,
g::FLOAT8 AS _float8,
'2001-01-01'::DATE + g AS _date,
'2001-01-01'::TIMESTAMP + g * '1 day'::INTERVAL AS _timestamp,
'2001-01-01'::TIMESTAMPTZ + g * '1 day'::INTERVAL AS _timestamptz,
g % 2 = 1 AS _bool,
g::DECIMAL AS _decimal,
g::STRING AS _string,
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/colencoding/key_encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ func decodeTableKeyToCol(
rkey, t, err = encoding.DecodeVarintDescending(key)
}
vec.Int64()[idx] = t
case types.TimestampFamily:
case types.TimestampFamily, types.TimestampTZFamily:
var t time.Time
if dir == sqlbase.IndexDescriptor_ASC {
rkey, t, err = encoding.DecodeTimeAscending(key)
Expand Down Expand Up @@ -297,7 +297,7 @@ func UnmarshalColumnValueToCol(
var v int64
v, err = value.GetInt()
vec.Int64()[idx] = v
case types.TimestampFamily:
case types.TimestampFamily, types.TimestampTZFamily:
var v time.Time
v, err = value.GetTime()
vec.Timestamp()[idx] = v
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/colencoding/value_encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ func decodeUntaggedDatumToCol(vec coldata.Vec, idx uint16, t *types.T, buf []byt
if err == nil {
vec.Bytes().Set(int(idx), data.GetBytes())
}
case types.TimestampFamily:
case types.TimestampFamily, types.TimestampTZFamily:
var t time.Time
buf, t, err = encoding.DecodeUntaggedTimeValue(buf)
vec.Timestamp()[idx] = t
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/colexec/supported_sql_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,5 @@ var allSupportedSQLTypes = []types.T{
*types.String,
*types.Uuid,
*types.Timestamp,
*types.TimestampTZ,
}
12 changes: 12 additions & 0 deletions pkg/sql/colexec/typeconv/typeconv.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ func FromColumnType(ct *types.T) coltypes.T {
return coltypes.Float64
case types.TimestampFamily:
return coltypes.Timestamp
case types.TimestampTZFamily:
return coltypes.Timestamp
}
return coltypes.Unhandled
}
Expand Down Expand Up @@ -86,6 +88,8 @@ func ToColumnType(t coltypes.T) *types.T {
return types.Int
case coltypes.Float64:
return types.Float
case coltypes.Timestamp:
return types.Timestamp
}
execerror.VectorizedInternalPanic(fmt.Sprintf("unexpected coltype %s", t.String()))
return nil
Expand Down Expand Up @@ -220,6 +224,14 @@ func GetDatumToPhysicalFn(ct *types.T) func(tree.Datum) (interface{}, error) {
}
return d.Time, nil
}
case types.TimestampTZFamily:
return func(datum tree.Datum) (interface{}, error) {
d, ok := datum.(*tree.DTimestampTZ)
if !ok {
return nil, errors.Errorf("expected *tree.DTimestampTZ, found %s", reflect.TypeOf(datum))
}
return d.Time, nil
}
}
// It would probably be more correct to return an error here, rather than a
// function which always returns an error. But since the function tends to be
Expand Down
2 changes: 2 additions & 0 deletions pkg/sql/colexec/vec_elem_to_datum.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ func PhysicalTypeColElemToDatum(
return da.NewDUuid(tree.DUuid{UUID: id})
case types.TimestampFamily:
return da.NewDTimestamp(tree.DTimestamp{Time: col.Timestamp()[rowIdx]})
case types.TimestampTZFamily:
return da.NewDTimestampTZ(tree.DTimestampTZ{Time: col.Timestamp()[rowIdx]})
default:
execerror.VectorizedInternalPanic(fmt.Sprintf("Unsupported column type %s", ct.String()))
// This code is unreachable, but the compiler cannot infer that.
Expand Down
35 changes: 19 additions & 16 deletions pkg/sql/logictest/testdata/logic_test/vectorize_types
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,19 @@
# Check that all types supported by the vectorized engine can be read correctly.
statement ok
CREATE TABLE all_types (
_bool BOOL,
_bytes BYTES,
_date DATE,
_decimal DECIMAL,
_int2 INT2,
_int4 INT4,
_int INT8,
_oid OID,
_float FLOAT8,
_string STRING,
_uuid UUID,
_timestamp TIMESTAMP
_bool BOOL,
_bytes BYTES,
_date DATE,
_decimal DECIMAL,
_int2 INT2,
_int4 INT4,
_int INT8,
_oid OID,
_float FLOAT8,
_string STRING,
_uuid UUID,
_timestamp TIMESTAMP,
_timestamptz TIMESTAMPTZ
)

statement ok
Expand All @@ -32,6 +33,7 @@ VALUES (
NULL,
NULL,
NULL,
NULL,
NULL
),
(
Expand All @@ -46,14 +48,15 @@ VALUES (
1.23,
'123',
'63616665-6630-3064-6465-616462656562',
'1-1-18 1:00:00.001'
'1-1-18 1:00:00.001',
'1-1-18 1:00:00.001-8'
)

query BTTRIIIORTTT
query BTTRIIIORTTTT
SELECT * FROM all_types ORDER BY 1
----
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
false 123 2019-10-22 00:00:00 +0000 +0000 1.23 123 123 123 123 1.23 123 63616665-6630-3064-6465-616462656562 2001-01-18 01:00:00.001 +0000 +0000
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
false 123 2019-10-22 00:00:00 +0000 +0000 1.23 123 123 123 123 1.23 123 63616665-6630-3064-6465-616462656562 2001-01-18 01:00:00.001 +0000 +0000 2001-01-18 09:00:00.001 +0000 UTC

statement ok
CREATE TABLE skip_unneeded_cols (
Expand Down

0 comments on commit 0d1ae9c

Please sign in to comment.