Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

colexecbase: add all casts of native types to strings #85681

Merged
merged 2 commits into from
Aug 8, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
colexecbase: add all casts of native types to strings
This commit adds all of the casts from the types natively supported by
the vectorized engine to strings. This has an additional benefit (apart
from better performance on a lot of data: the concat binary projection
with a string on one side is now supported natively (we recently fixed
an issue to plan a cast of a non-string type to a string for a concat
operation, but since we didn't have the cast support, we'd be falling
back to the row-by-row engine in most cases).

Release note: None
  • Loading branch information
yuzefovich committed Aug 5, 2022
commit e20e1637f6b4cd7baa146dab4df545a0ec7ff9fe
5,951 changes: 4,524 additions & 1,427 deletions pkg/sql/colexec/colexecbase/cast.eg.go

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pkg/sql/colexec/colexecbase/cast_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ func TestRandomizedCast(t *testing.T) {
}
}
}
const numTypePairs = 5
var numTypePairs = rng.Intn(10) + 1
numRows := 1 + rng.Intn(coldata.BatchSize()) + rng.Intn(3)*coldata.BatchSize()
log.Infof(ctx, "num rows = %d", numRows)
for run := 0; run < numTypePairs; run++ {
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/colexec/execgen/cmd/execgen/avg_agg_gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ func genAvgAgg(inputFileContents string, wr io.Writer) error {
// canonical representatives, so we can operate with their type family
// directly.
for _, inputTypeFamily := range []types.Family{types.IntFamily, types.DecimalFamily, types.FloatFamily, types.IntervalFamily} {
tmplInfo := avgAggTypeTmplInfo{TypeFamily: toString(inputTypeFamily)}
tmplInfo := avgAggTypeTmplInfo{TypeFamily: familyToString(inputTypeFamily)}
for _, inputTypeWidth := range supportedWidthsByCanonicalTypeFamily[inputTypeFamily] {
// Note that we don't use execinfrapb.GetAggregateInfo because we don't
// want to bring in a dependency on that package to reduce the burden
Expand Down
174 changes: 145 additions & 29 deletions pkg/sql/colexec/execgen/cmd/execgen/cast_gen_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ var nativeCastInfos = []supportedNativeCastInfo{
{types.Bool, types.Int2, boolToIntOrFloat},
{types.Bool, types.Int4, boolToIntOrFloat},
{types.Bool, types.Int, boolToIntOrFloat},
{types.Bool, types.String, boolToString},

{types.Bytes, types.String, bytesToString},
{types.Bytes, types.Uuid, bytesToUUID},

{types.Date, types.Decimal, intToDecimal},
Expand All @@ -53,35 +55,43 @@ var nativeCastInfos = []supportedNativeCastInfo{
{types.Date, types.Int2, getIntToIntCastFunc(64 /* fromWidth */, 16 /* toWidth */)},
{types.Date, types.Int4, getIntToIntCastFunc(64 /* fromWidth */, 32 /* toWidth */)},
{types.Date, types.Int, getIntToIntCastFunc(64 /* fromWidth */, anyWidth)},
{types.Date, types.String, dateToString},

{types.Decimal, types.Bool, decimalToBool},
{types.Decimal, types.Decimal, decimalToDecimal},
{types.Decimal, types.Float, decimalToFloat},
{types.Decimal, types.Int2, getDecimalToIntCastFunc(16)},
{types.Decimal, types.Int4, getDecimalToIntCastFunc(32)},
{types.Decimal, types.Int, getDecimalToIntCastFunc(anyWidth)},
{types.Decimal, types.String, decimalToString},

{types.Float, types.Bool, numToBool},
{types.Float, types.Decimal, floatToDecimal},
{types.Float, types.Int2, floatToInt(16, 64 /* floatWidth */)},
{types.Float, types.Int4, floatToInt(32, 64 /* floatWidth */)},
{types.Float, types.Int, floatToInt(anyWidth, 64 /* floatWidth */)},
{types.Float, types.String, floatToString},

{types.Int2, types.Bool, numToBool},
{types.Int2, types.Decimal, intToDecimal},
{types.Int2, types.Float, intToFloat},
{types.Int2, types.Int4, getIntToIntCastFunc(16, 32)},
{types.Int2, types.Int, getIntToIntCastFunc(16, anyWidth)},
{types.Int2, types.String, intToString},
{types.Int4, types.Bool, numToBool},
{types.Int4, types.Decimal, intToDecimal},
{types.Int4, types.Float, intToFloat},
{types.Int4, types.Int2, getIntToIntCastFunc(32, 16)},
{types.Int4, types.Int, getIntToIntCastFunc(32, anyWidth)},
{types.Int4, types.String, intToString},
{types.Int, types.Bool, numToBool},
{types.Int, types.Decimal, intToDecimal},
{types.Int, types.Float, intToFloat},
{types.Int, types.Int2, getIntToIntCastFunc(anyWidth, 16)},
{types.Int, types.Int4, getIntToIntCastFunc(anyWidth, 32)},
{types.Int, types.String, intToString},

{types.Interval, types.String, intervalToString},

{types.Jsonb, types.String, jsonToString},

Expand All @@ -99,6 +109,12 @@ var nativeCastInfos = []supportedNativeCastInfo{
{types.String, types.Timestamp, getStringToTimestampCastFunc(true /* withoutTimezone */)},
{types.String, types.TimestampTZ, getStringToTimestampCastFunc(false /* withoutTimezone */)},
{types.String, types.Uuid, stringToUUID},

{types.Timestamp, types.String, timestampToString},

{types.TimestampTZ, types.String, timestampTZToString},

{types.Uuid, types.String, uuidToString},
}

type supportedNativeCastInfo struct {
Expand All @@ -117,6 +133,18 @@ func boolToIntOrFloat(to, from, _, _, _ string) string {
return fmt.Sprintf(convStr, to, from)
}

func boolToString(to, from, _, toType, _ string) string {
return toString(fmt.Sprintf("%s = []byte(strconv.FormatBool(%s))", to, from), to, toType)
}

func bytesToString(to, from, evalCtx, toType, _ string) string {
convStr := `
_format := %[3]s.SessionData().DataConversionConfig.BytesEncodeFormat
%[1]s = []byte(lex.EncodeByteArrayToRawBytes(string(%[2]s), _format, false /* skipHexPrefix */))
`
return toString(fmt.Sprintf(convStr, to, from, evalCtx), to, toType)
}

func bytesToUUID(to, from, _, _, _ string) string {
convStr := `
_uuid, err := uuid.FromBytes(%[2]s)
Expand All @@ -128,6 +156,16 @@ func bytesToUUID(to, from, _, _, _ string) string {
return fmt.Sprintf(convStr, to, from)
}

func dateToString(to, from, _, toType, buf string) string {
convStr := `
_date := pgdate.MakeCompatibleDateFromDisk(%[2]s)
%[3]s.Reset()
_date.Format(%[3]s)
%[1]s = []byte(%[3]s.String())
`
return toString(fmt.Sprintf(convStr, to, from, buf), to, toType)
}

func decimalToBool(to, from, _, _, _ string) string {
return fmt.Sprintf("%[1]s = %[2]s.Sign() != 0", to, from)
}
Expand Down Expand Up @@ -191,6 +229,10 @@ func getDecimalToIntCastFunc(toIntWidth int32) castFunc {
}
}

func decimalToString(to, from, _, toType, _ string) string {
return toString(fmt.Sprintf("%s = []byte(%s.String())", to, from), to, toType)
}

// toDecimal returns the templated code that performs the cast to a decimal. It
// first will execute whatever is passed in 'conv' (the main conversion) and
// then will perform the rounding of 'to' variable according to 'toType'.
Expand Down Expand Up @@ -235,6 +277,14 @@ func floatToInt(intWidth, floatWidth int32) castFunc {
}
}

func floatToString(to, from, evalCtx, toType, _ string) string {
convStr := `
dcc := %[3]s.SessionData().DataConversionConfig
%[1]s = tree.PgwireFormatFloat(nil /* buf */, %[2]s, dcc, types.Float)
`
return toString(fmt.Sprintf(convStr, to, from, evalCtx), to, toType)
}

func intToDecimal(to, from, _, toType, _ string) string {
conv := `
%[1]s.SetInt64(int64(%[2]s))
Expand Down Expand Up @@ -286,20 +336,37 @@ func getIntToIntCastFunc(fromWidth, toWidth int32) castFunc {
}
}

func jsonToString(to, from, _, toType, _ string) string {
func intToString(to, from, _, toType, _ string) string {
convStr := `
_string := %[2]s.String()
switch %[3]s.Oid() {
case oid.T_char:
// "char" is supposed to truncate long values.
_string = util.TruncateString(_string, 1)
case oid.T_bpchar:
// bpchar types truncate trailing whitespace.
_string = strings.TrimRight(_string, " ")
if %[3]s.Oid() == oid.T_char {
// int to "char" casts just return the corresponding ASCII byte.
if %[2]s > math.MaxInt8 || %[2]s < math.MinInt8 {
colexecerror.ExpectedError(tree.ErrCharOutOfRange)
}
if %[2]s == 0 {
%[1]s = []byte{}
} else {
%[1]s = []byte{byte(%[2]s)}
}
} else {
%[1]s = []byte(strconv.FormatInt(int64(%[2]s), 10))
}
%[1]s = []byte(_string)
`
return fmt.Sprintf(convStr, to, from, toType)
`
return toString(fmt.Sprintf(convStr, to, from, toType), to, toType)
}

func intervalToString(to, from, evalCtx, toType, buf string) string {
convStr := `
dcc := %[3]s.SessionData().DataConversionConfig
%[4]s.Reset()
%[2]s.FormatWithStyle(%[4]s, dcc.IntervalStyle)
%[1]s = []byte(%[4]s.String())
`
return toString(fmt.Sprintf(convStr, to, from, evalCtx, buf), to, toType)
}

func jsonToString(to, from, _, toType, _ string) string {
return toString(fmt.Sprintf("%s = []byte(%s.String())", to, from), to, toType)
}

func stringToBool(to, from, _, _, _ string) string {
Expand Down Expand Up @@ -427,13 +494,19 @@ func stringToJSON(to, from, _, _, _ string) string {
return fmt.Sprintf(convStr, to, from)
}

func stringToString(to, from, _, toType, _ string) string {
// TODO(yuzefovich): figure out whether we can avoid converting to a string in
// the template below.

// toString returns the templated code that performs the cast to a string. It
// first will execute whatever is passed in 'conv' (the main conversion) and
// then will perform the truncation of 'to' variable according to 'toType'.
func toString(conv, to, toType string) string {
// The logic here is a combination of the relevant pieces from
// eval.performCastWithoutPrecisionTruncation as well as from
// tree.AdjustValueToType.
convStr := `
if %[3]s.Oid() == oid.T_name {
// For Names we don't perform the truncation, and there is no need
// to do anything about the Oids since those are stored in the type.
%[1]s = %[2]s
} else {
%[1]s
if %[3]s.Oid() != oid.T_name {
// bpchar types truncate trailing whitespace.
if %[3]s.Oid() == oid.T_bpchar {
%[2]s = bytes.TrimRight(%[2]s, " ")
Expand All @@ -442,20 +515,37 @@ func stringToString(to, from, _, toType, _ string) string {
// 'hello'::CHAR(2) -> 'he'
// This is true of all the string type variants.
if %[3]s.Width() > 0 {
// TODO(yuzefovich): figure out whether we can avoid converting
// to a string.
%[2]s = []byte(util.TruncateString(string(%[2]s), int(%[3]s.Width())))
}
if %[3]s.Oid() == oid.T_char {
// "char" is supposed to truncate long values.
// TODO(yuzefovich): figure out whether we can avoid converting
// to a string.
%[2]s = []byte(util.TruncateString(string(%[2]s), 1))
}
%[1]s = %[2]s
}
if %[3]s.Width() > 0 && utf8.RuneCountInString(string(%[2]s)) > int(%[3]s.Width()) {
_typeString := %[3]s.SQLString()
colexecerror.ExpectedError(
pgerror.Newf(pgcode.StringDataRightTruncation, "value too long for type " + _typeString,
))
}
`
return fmt.Sprintf(convStr, to, from, toType)
return fmt.Sprintf(convStr, conv, to, toType)
}

func stringToString(to, from, _, toType, _ string) string {
return toString(fmt.Sprintf("%s = %s", to, from), to, toType)
}

// roundTimestamp is a template that takes a timestamp specified by 'from',
// rounds it according to 'precision', and assigns the result to 'to' timestamp.
func roundTimestamp(to, from, precision string) string {
roundStr := `
%[1]s = %[2]s.Round(%[3]s)
if %[1]s.After(tree.MaxSupportedTime) || %[1]s.Before(tree.MinSupportedTime) {
colexecerror.ExpectedError(tree.NewTimestampExceedsBoundsError(%[1]s))
}
`
return fmt.Sprintf(roundStr, to, from, precision)
}

func getStringToTimestampCastFunc(withoutTimezone bool) castFunc {
Expand All @@ -475,12 +565,10 @@ func getStringToTimestampCastFunc(withoutTimezone bool) castFunc {
if err != nil {
colexecerror.ExpectedError(err)
}
%[1]s = _t.Round(_roundTo)
if %[1]s.After(tree.MaxSupportedTime) || %[1]s.Before(tree.MinSupportedTime) {
colexecerror.ExpectedError(tree.NewTimestampExceedsBoundsError(%[1]s))
}
%[1]s
`
return fmt.Sprintf(convStr, to, from, evalCtx, toType, parseTimestampKind)
roundAndAssign := roundTimestamp(to, "_t", "_roundTo")
return fmt.Sprintf(convStr, roundAndAssign, from, evalCtx, toType, parseTimestampKind)
}
}

Expand All @@ -495,6 +583,34 @@ func stringToUUID(to, from, _, _, _ string) string {
return fmt.Sprintf(convStr, to, from)
}

func timestampToString(to, from, _, toType, _ string) string {
return toString(fmt.Sprintf("%s = []byte(tree.FormatTimestamp(%s))", to, from), to, toType)
}

func timestampTZToString(to, from, evalCtx, toType, buf string) string {
convStr := `
// Convert to context timezone for correct display.
_t := %[2]s.In(%[3]s.GetLocation())
%[5]s
%[4]s.Reset()
tree.FormatTimestampTZ(_t, %[4]s)
%[1]s = []byte(%[4]s.String())
`
roundT := roundTimestamp("_t", "_t", "time.Microsecond")
return toString(fmt.Sprintf(convStr, to, from, evalCtx, buf, roundT), to, toType)
}

func uuidToString(to, from, _, toType, _ string) string {
convStr := `
_uuid, err := uuid.FromBytes(%[2]s)
if err != nil {
colexecerror.ExpectedError(err)
}
%[1]s = []byte(_uuid.String())
`
return toString(fmt.Sprintf(convStr, to, from), to, toType)
}

// getDatumToNativeCastFunc returns a castFunc for casting datum-backed value
// to a value of the specified physical representation (i.e. to natively
// supported type). The returned castFunc assumes that there is a converter
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/colexec/execgen/cmd/execgen/overloads_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ func (b *overloadBase) String() string {
return fmt.Sprintf("%s: %s", b.Name, b.OpStr)
}

func toString(family types.Family) string {
func familyToString(family types.Family) string {
switch family {
case typeconv.DatumVecCanonicalTypeFamily:
return "typeconv.DatumVecCanonicalTypeFamily"
Expand All @@ -140,7 +140,7 @@ type argTypeOverloadBase struct {
func newArgTypeOverloadBase(canonicalTypeFamily types.Family) *argTypeOverloadBase {
return &argTypeOverloadBase{
CanonicalTypeFamily: canonicalTypeFamily,
CanonicalTypeFamilyStr: toString(canonicalTypeFamily),
CanonicalTypeFamilyStr: familyToString(canonicalTypeFamily),
}
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/colexec/execgen/cmd/execgen/overloads_gen_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,13 @@ func populateTwoArgsOverloads(
if !found {
colexecerror.InternalError(errors.AssertionFailedf("didn't find supported widths for %s", leftFamily))
}
leftFamilyStr := toString(leftFamily)
leftFamilyStr := familyToString(leftFamily)
for _, rightFamily := range combinableCanonicalTypeFamilies[leftFamily] {
rightWidths, found := supportedWidthsByCanonicalTypeFamily[rightFamily]
if !found {
colexecerror.InternalError(errors.AssertionFailedf("didn't find supported widths for %s", rightFamily))
}
rightFamilyStr := toString(rightFamily)
rightFamilyStr := familyToString(rightFamily)
for _, leftWidth := range leftWidths {
for _, rightWidth := range rightWidths {
customizer, ok := customizers[typePair{leftFamily, leftWidth, rightFamily, rightWidth}]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func rangeOffsetHandlerGenerator(inputFileContents string, wr io.Writer) error {
ordColDirInfo := windowFrameOrdDirInfo{IsOrdColAsc: isOrdColAsc}
for _, typeFamily := range rangeOrderColTypeFamilies {
canonicalTypeFamily := typeconv.TypeFamilyToCanonicalTypeFamily(typeFamily)
typeFamilyStr := toString(typeFamily)
typeFamilyStr := familyToString(typeFamily)
typeFamilyInfo := windowFrameOrderTypeFamilyInfo{
TypeFamily: typeFamilyStr,
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/colexec/execgen/cmd/execgen/span_encoder_gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func genSpanEncoder(inputFileContents string, wr io.Writer) error {
// We are currently unable to encode JSON as a table key.
continue
}
familyInfo := spanEncoderTypeFamilyInfo{TypeFamily: toString(family)}
familyInfo := spanEncoderTypeFamilyInfo{TypeFamily: familyToString(family)}
for _, width := range supportedWidthsByCanonicalTypeFamily[family] {
overload := spanEncoderTmplInfo{
Asc: asc,
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/colexec/execgen/cmd/execgen/sum_agg_gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ func genSumAgg(inputFileContents string, wr io.Writer, isSumInt bool) error {
var tmplInfos []sumAggTypeTmplInfo
for _, inputTypeFamily := range supportedTypeFamilies {
tmplInfo := sumAggTypeTmplInfo{
TypeFamily: toString(inputTypeFamily),
TypeFamily: familyToString(inputTypeFamily),
}
for _, inputTypeWidth := range supportedWidthsByCanonicalTypeFamily[inputTypeFamily] {
// Note that we don't use execinfrapb.GetAggregateInfo because we don't
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/sem/eval/cast.go
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ func performCastWithoutPrecisionTruncation(
s = string(b)
case *tree.DInt:
if typ.Oid() == oid.T_char {
// int to "char" casts just return the correspondong ASCII byte.
// int to "char" casts just return the corresponding ASCII byte.
if *t > math.MaxInt8 || *t < math.MinInt8 {
return nil, tree.ErrCharOutOfRange
} else if *t == 0 {
Expand Down
Loading