Merge #43514

43514: colexec: support TIMESTAMPTZ type r=yuzefovich a=yuzefovich **colexec: support TIMESTAMPTZ type** This commit adds the support for TimestampTZ data type which is represented in the same way as Timestamp (as 'time.Time'). We already had everything in place, so only the type-conversion was needed. Addresses: #42043. Release note (sql change): vectorized engine now supports TIMESTAMPTZ data type. **sqlsmith: add several types to vecSeedTable** This commit adds previously supported INT2 and INT4 types to vecSeedTable as well as newly supported TIMESTAMPTZ. Release note: None Co-authored-by: Yahor Yuzefovich <[email protected]>
cockroachdb · Dec 31, 2019 · 0d1ae9c · 0d1ae9c
2 parents f1f1525 + b3faa6b
commit 0d1ae9c
Show file tree

Hide file tree

Showing 9 changed files with 73 additions and 32 deletions.
diff --git a/pkg/col/colserde/arrowbatchconverter_test.go b/pkg/col/colserde/arrowbatchconverter_test.go
@@ -31,8 +31,8 @@ func randomBatch(allocator *colexec.Allocator) ([]coltypes.T, coldata.Batch) {
 
 	availableTyps := make([]coltypes.T, 0, len(coltypes.AllTypes))
 	for _, typ := range coltypes.AllTypes {
-		// TODO(asubiotto,jordan): We do not support decimal, timestamp conversion yet.
-		if typ == coltypes.Decimal || typ == coltypes.Timestamp {
+		// TODO(asubiotto,jordan): We do not support decimal conversion yet.
+		if typ == coltypes.Decimal {
 			continue
 		}
 		availableTyps = append(availableTyps, typ)
@@ -107,6 +107,17 @@ func assertEqualBatches(t *testing.T, expected, actual coldata.Batch) {
 					t.Fatalf("bytes mismatch at index %d:\nexpected:\n%sactual:\n%s", i, expectedBytes, resultBytes)
 				}
 			}
+		} else if typ == coltypes.Timestamp {
+			// Cannot use require.Equal for this type.
+			// TODO(yuzefovich): Again, why not?
+			expectedTimestamp := expectedVec.Timestamp()[0:expected.Length()]
+			resultTimestamp := actualVec.Timestamp()[0:actual.Length()]
+			require.Equal(t, len(expectedTimestamp), len(resultTimestamp))
+			for i := range expectedTimestamp {
+				if !expectedTimestamp[i].Equal(resultTimestamp[i]) {
+					t.Fatalf("Timestamp mismatch at index %d:\nexpected:\n%sactual:\n%s", i, expectedTimestamp[i], resultTimestamp[i])
+				}
+			}
 		} else {
 			require.Equal(
 				t,
@@ -177,19 +188,21 @@ func roundTripBatch(
 func TestRecordBatchRoundtripThroughBytes(t *testing.T) {
 	defer leaktest.AfterTest(t)()
 
-	typs, b := randomBatch(testAllocator)
-	c, err := colserde.NewArrowBatchConverter(typs)
-	require.NoError(t, err)
-	r, err := colserde.NewRecordBatchSerializer(typs)
-	require.NoError(t, err)
+	for run := 0; run < 10; run++ {
+		typs, b := randomBatch(testAllocator)
+		c, err := colserde.NewArrowBatchConverter(typs)
+		require.NoError(t, err)
+		r, err := colserde.NewRecordBatchSerializer(typs)
+		require.NoError(t, err)
 
-	// Make a copy of the original batch because the converter modifies and casts
-	// data without copying for performance reasons.
-	expected := copyBatch(b)
-	actual, err := roundTripBatch(b, c, r)
-	require.NoError(t, err)
+		// Make a copy of the original batch because the converter modifies and
+		// casts data without copying for performance reasons.
+		expected := copyBatch(b)
+		actual, err := roundTripBatch(b, c, r)
+		require.NoError(t, err)
 
-	assertEqualBatches(t, expected, actual)
+		assertEqualBatches(t, expected, actual)
+	}
 }
 
 func BenchmarkArrowBatchConverter(b *testing.B) {

diff --git a/pkg/col/colserde/file.go b/pkg/col/colserde/file.go
@@ -368,6 +368,11 @@ func schema(fb *flatbuffers.Builder, typs []coltypes.T) flatbuffers.UOffsetT {
 			arrowserde.FloatingPointAddPrecision(fb, arrowserde.PrecisionDOUBLE)
 			fbTypOffset = arrowserde.FloatingPointEnd(fb)
 			fbTyp = arrowserde.TypeFloatingPoint
+		case coltypes.Timestamp:
+			// Timestamps are marshaled into bytes, so we use binary headers.
+			arrowserde.BinaryStart(fb)
+			fbTypOffset = arrowserde.BinaryEnd(fb)
+			fbTyp = arrowserde.TypeTimestamp
 		default:
 			panic(errors.Errorf(`don't know how to map %s`, typ))
 		}
@@ -451,6 +456,8 @@ func typeFromField(field *arrowserde.Field) (coltypes.T, error) {
 		default:
 			return coltypes.Unhandled, errors.Errorf(`unhandled float precision %d`, floatType.Precision())
 		}
+	case arrowserde.TypeTimestamp:
+		return coltypes.Timestamp, nil
 	}
 	// It'd be nice if this error could include more details, but flatbuffers
 	// doesn't make a String method or anything like that.

diff --git a/pkg/internal/sqlsmith/setup.go b/pkg/internal/sqlsmith/setup.go
@@ -110,10 +110,13 @@ CREATE INVERTED INDEX on seed (_jsonb);
 	vecSeedTable = `
 CREATE TABLE IF NOT EXISTS seed_vec AS
 	SELECT
+		g::INT2 AS _int2,
+		g::INT4 AS _int4,
 		g::INT8 AS _int8,
 		g::FLOAT8 AS _float8,
 		'2001-01-01'::DATE + g AS _date,
 		'2001-01-01'::TIMESTAMP + g * '1 day'::INTERVAL AS _timestamp,
+		'2001-01-01'::TIMESTAMPTZ + g * '1 day'::INTERVAL AS _timestamptz,
 		g % 2 = 1 AS _bool,
 		g::DECIMAL AS _decimal,
 		g::STRING AS _string,

diff --git a/pkg/sql/colencoding/key_encoding.go b/pkg/sql/colencoding/key_encoding.go
@@ -238,7 +238,7 @@ func decodeTableKeyToCol(
 			rkey, t, err = encoding.DecodeVarintDescending(key)
 		}
 		vec.Int64()[idx] = t
-	case types.TimestampFamily:
+	case types.TimestampFamily, types.TimestampTZFamily:
 		var t time.Time
 		if dir == sqlbase.IndexDescriptor_ASC {
 			rkey, t, err = encoding.DecodeTimeAscending(key)
@@ -297,7 +297,7 @@ func UnmarshalColumnValueToCol(
 		var v int64
 		v, err = value.GetInt()
 		vec.Int64()[idx] = v
-	case types.TimestampFamily:
+	case types.TimestampFamily, types.TimestampTZFamily:
 		var v time.Time
 		v, err = value.GetTime()
 		vec.Timestamp()[idx] = v

diff --git a/pkg/sql/colencoding/value_encoding.go b/pkg/sql/colencoding/value_encoding.go
@@ -93,7 +93,7 @@ func decodeUntaggedDatumToCol(vec coldata.Vec, idx uint16, t *types.T, buf []byt
 		if err == nil {
 			vec.Bytes().Set(int(idx), data.GetBytes())
 		}
-	case types.TimestampFamily:
+	case types.TimestampFamily, types.TimestampTZFamily:
 		var t time.Time
 		buf, t, err = encoding.DecodeUntaggedTimeValue(buf)
 		vec.Timestamp()[idx] = t

diff --git a/pkg/sql/colexec/supported_sql_types.go b/pkg/sql/colexec/supported_sql_types.go
@@ -28,4 +28,5 @@ var allSupportedSQLTypes = []types.T{
 	*types.String,
 	*types.Uuid,
 	*types.Timestamp,
+	*types.TimestampTZ,
 }
diff --git a/pkg/sql/colexec/typeconv/typeconv.go b/pkg/sql/colexec/typeconv/typeconv.go
@@ -49,6 +49,8 @@ func FromColumnType(ct *types.T) coltypes.T {
 		return coltypes.Float64
 	case types.TimestampFamily:
 		return coltypes.Timestamp
+	case types.TimestampTZFamily:
+		return coltypes.Timestamp
 	}
 	return coltypes.Unhandled
 }
@@ -86,6 +88,8 @@ func ToColumnType(t coltypes.T) *types.T {
 		return types.Int
 	case coltypes.Float64:
 		return types.Float
+	case coltypes.Timestamp:
+		return types.Timestamp
 	}
 	execerror.VectorizedInternalPanic(fmt.Sprintf("unexpected coltype %s", t.String()))
 	return nil
@@ -220,6 +224,14 @@ func GetDatumToPhysicalFn(ct *types.T) func(tree.Datum) (interface{}, error) {
 			}
 			return d.Time, nil
 		}
+	case types.TimestampTZFamily:
+		return func(datum tree.Datum) (interface{}, error) {
+			d, ok := datum.(*tree.DTimestampTZ)
+			if !ok {
+				return nil, errors.Errorf("expected *tree.DTimestampTZ, found %s", reflect.TypeOf(datum))
+			}
+			return d.Time, nil
+		}
 	}
 	// It would probably be more correct to return an error here, rather than a
 	// function which always returns an error. But since the function tends to be

diff --git a/pkg/sql/colexec/vec_elem_to_datum.go b/pkg/sql/colexec/vec_elem_to_datum.go
@@ -73,6 +73,8 @@ func PhysicalTypeColElemToDatum(
 		return da.NewDUuid(tree.DUuid{UUID: id})
 	case types.TimestampFamily:
 		return da.NewDTimestamp(tree.DTimestamp{Time: col.Timestamp()[rowIdx]})
+	case types.TimestampTZFamily:
+		return da.NewDTimestampTZ(tree.DTimestampTZ{Time: col.Timestamp()[rowIdx]})
 	default:
 		execerror.VectorizedInternalPanic(fmt.Sprintf("Unsupported column type %s", ct.String()))
 		// This code is unreachable, but the compiler cannot infer that.

diff --git a/pkg/sql/logictest/testdata/logic_test/vectorize_types b/pkg/sql/logictest/testdata/logic_test/vectorize_types
@@ -3,18 +3,19 @@
 # Check that all types supported by the vectorized engine can be read correctly.
 statement ok
 CREATE TABLE all_types (
-    _bool      BOOL,
-    _bytes     BYTES,
-    _date      DATE,
-    _decimal   DECIMAL,
-    _int2      INT2,
-    _int4      INT4,
-    _int       INT8,
-    _oid       OID,
-    _float     FLOAT8,
-    _string    STRING,
-    _uuid      UUID,
-    _timestamp TIMESTAMP
+    _bool        BOOL,
+    _bytes       BYTES,
+    _date        DATE,
+    _decimal     DECIMAL,
+    _int2        INT2,
+    _int4        INT4,
+    _int         INT8,
+    _oid         OID,
+    _float       FLOAT8,
+    _string      STRING,
+    _uuid        UUID,
+    _timestamp   TIMESTAMP,
+    _timestamptz TIMESTAMPTZ
 )
 
 statement ok
@@ -32,6 +33,7 @@ VALUES (
         NULL,
         NULL,
         NULL,
+        NULL,
         NULL
        ),
        (
@@ -46,14 +48,15 @@ VALUES (
        1.23,
        '123',
        '63616665-6630-3064-6465-616462656562',
-       '1-1-18 1:00:00.001'
+       '1-1-18 1:00:00.001',
+       '1-1-18 1:00:00.001-8'
        )
 
-query BTTRIIIORTTT
+query BTTRIIIORTTTT
 SELECT * FROM all_types ORDER BY 1
 ----
-NULL   NULL  NULL                             NULL  NULL  NULL  NULL  NULL  NULL  NULL  NULL                                  NULL
-false  123   2019-10-22 00:00:00 +0000 +0000  1.23  123   123   123   123   1.23  123   63616665-6630-3064-6465-616462656562  2001-01-18 01:00:00.001 +0000 +0000
+NULL   NULL  NULL                             NULL  NULL  NULL  NULL  NULL  NULL  NULL  NULL                                  NULL                                 NULL
+false  123   2019-10-22 00:00:00 +0000 +0000  1.23  123   123   123   123   1.23  123   63616665-6630-3064-6465-616462656562  2001-01-18 01:00:00.001 +0000 +0000  2001-01-18 09:00:00.001 +0000 UTC
 
 statement ok
 CREATE TABLE skip_unneeded_cols (