Skip to content

Commit

Permalink
opt: synthesize check constraints on enum columns
Browse files Browse the repository at this point in the history
This PR teaches the optimizer how to synthesize check constraints on
columns of an ENUM type, allowing queries like:

```
CREATE TYPE t AS ENUM ('howdy', 'hello');
CREATE TABLE tt (x t, y INT, PRIMARY KEY (x, y));
SELECT x, y FROM tt WHERE y = 2
```

to be planned using constrained spans on the enum values, rather than a
full table scan.

Release note (performance improvement): Allow the optimizer to use enum
information to generate better query plans.
  • Loading branch information
rohany committed May 21, 2020
1 parent 436a82a commit 5177a94
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 58 deletions.
21 changes: 18 additions & 3 deletions pkg/sql/opt/exec/execbuilder/testdata/enums
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,20 @@ query T
EXPLAIN (OPT) SELECT * FROM t WHERE x > 'hello'
----
scan t
└── constraint: /1: [/'howdy' - ]
└── constraint: /1: [/'howdy' - /'hi']

# Test that we can perform constrained scans using secondary indexes too.
query T
EXPLAIN (OPT) SELECT * FROM t WHERE y = 'hello'
----
scan t@i
└── constraint: /2/1: [/'hello' - /'hello']
└── constraint: /2/1: [/'hello'/'hello' - /'hello'/'hi']

query T
EXPLAIN (OPT) SELECT * FROM t WHERE y > 'hello' AND y < 'hi'
----
scan t@i
└── constraint: /2/1: [/'howdy' - /'howdy']
└── constraint: /2/1: [/'howdy'/'hello' - /'howdy'/'hi']

query T
EXPLAIN (opt) SELECT * FROM t WHERE x IN ('hello', 'hi')
Expand All @@ -51,3 +51,18 @@ scan t
├── [/'hello' - /'hello']
└── [/'hi' - /'hi']

# TODO (rohany): Why is the NOT NULL important here? I couldn't get this
# plan otherwise. If x is NULL, why can't the optimizer also issue a scan
# for /Null/2? Instead, a plan that just filters on y=2 is generated.
statement ok
CREATE TABLE checks (x greeting NOT NULL, y int, INDEX (x, y))

# Check that inferred check constraints from enum columns are used in plans.
query T
EXPLAIN (OPT) SELECT x, y FROM checks WHERE y = 2
----
scan checks@checks_x_y_idx
└── constraint: /1/2/3
├── [/'hello'/2 - /'hello'/2]
├── [/'howdy'/2 - /'howdy'/2]
└── [/'hi'/2 - /'hi'/2]
53 changes: 47 additions & 6 deletions pkg/sql/opt_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,11 @@ type optTable struct {
outboundFKs []optForeignKeyConstraint
inboundFKs []optForeignKeyConstraint

// checkConstraints is the set of check constraints for this table. It
// can be different from desc's constraints because of synthesized
// constraints for user defined types.
checkConstraints []cat.CheckConstraint

// colMap is a mapping from unique ColumnID to column ordinal within the
// table. This is a common lookup that needs to be fast.
colMap map[sqlbase.ColumnID]int
Expand Down Expand Up @@ -625,6 +630,46 @@ func newOptTable(
ot.families[i].init(ot, &desc.Families[i+1])
}

// Synthesize any check constraints for user defined types.
var synthesizedChecks []cat.CheckConstraint
for i := 0; i < ot.WritableColumnCount(); i++ {
col := ot.Column(i)
colType := col.DatumType()
if colType.UserDefined() {
switch colType.Family() {
case types.EnumFamily:
// We synthesize an (x IN (v1, v2, v3...)) check for enum types.
expr := &tree.ComparisonExpr{
Operator: tree.In,
Left: &tree.ColumnItem{ColumnName: col.ColName()},
Right: tree.NewDTuple(colType, tree.MakeAllDEnumsInType(colType)...),
}
synthesizedChecks = append(synthesizedChecks, cat.CheckConstraint{
// TODO (rohany): I'm not sure if we should change the default
// tree.Serialize to just dump expressions with UDT's as IDs.
// That saves name resolution in alot of cases when we serialize
// expressions, as well as allowing expressions in computed and
// default columns to not need renames when a type changes.
// I'm currently doing this because when we resolve a type by ID
// we don't resolve the parent DB and schema into a resolved name
// for the type, so the standard formatting would be ambiguous.
Constraint: tree.AsStringWithFlags(expr, tree.FmtParsable|tree.FmtFormatUserDefinedTypesAsIDs),
Validated: true,
})
}
}
}
// Move all existing and synthesized checks into the opt table.
activeChecks := desc.ActiveChecks()
ot.checkConstraints = make([]cat.CheckConstraint, 0, len(activeChecks)+len(synthesizedChecks))
for i := range activeChecks {
ot.checkConstraints = append(ot.checkConstraints, cat.CheckConstraint{
Constraint: activeChecks[i].Expr,
Validated: activeChecks[i].Validity == sqlbase.ConstraintValidity_Validated,
})
}
ot.checkConstraints = append(ot.checkConstraints, synthesizedChecks...)

// Add stats last, now that other metadata is initialized.
if stats != nil {
ot.stats = make([]optTableStat, len(stats))
Expand Down Expand Up @@ -780,16 +825,12 @@ func (ot *optTable) Statistic(i int) cat.TableStatistic {

// CheckCount is part of the cat.Table interface.
func (ot *optTable) CheckCount() int {
return len(ot.desc.ActiveChecks())
return len(ot.checkConstraints)
}

// Check is part of the cat.Table interface.
func (ot *optTable) Check(i int) cat.CheckConstraint {
check := ot.desc.ActiveChecks()[i]
return cat.CheckConstraint{
Constraint: check.Expr,
Validated: check.Validity == sqlbase.ConstraintValidity_Validated,
}
return ot.checkConstraints[i]
}

// FamilyCount is part of the cat.Table interface.
Expand Down
86 changes: 41 additions & 45 deletions pkg/sql/resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,21 +153,13 @@ func (p *planner) ResolveType(name *tree.UnresolvedObjectName) (*types.T, error)
return nil, err
}
tn := tree.MakeTypeNameFromPrefix(prefix, tree.Name(name.Object()))
tdesc := desc.(*sqlbase.TypeDescriptor)
// Hydrate the types.T from the resolved descriptor. Once we cache
// descriptors, this hydration should install pointers to cached data.
switch t := tdesc.Kind; t {
case sqlbase.TypeDescriptor_ENUM:
typ := types.MakeEnum(uint32(tdesc.ID))
if err := tdesc.HydrateTypeInfo(typ); err != nil {
return nil, err
}
// Override the hydrated name with the fully resolved type name.
typ.TypeMeta.Name = &tn
return typ, nil
default:
return nil, errors.AssertionFailedf("unknown type kind %s", t.String())
typ, err := sqlbase.MakeTypeFromTypeDesc(desc.(*TypeDescriptor))
if err != nil {
return nil, err
}
// Override the hydrated name with the fully resolved type name.
typ.TypeMeta.Name = &tn
return typ, nil
}

// TODO (rohany): Once we start to cache type descriptors, this needs to
Expand All @@ -183,15 +175,43 @@ func (p *planner) getTypeDescByID(ctx context.Context, id sqlbase.ID) (*TypeDesc
if !ok {
return nil, errors.AssertionFailedf("%s was not a type descriptor", rawDesc)
}
// TODO (rohany): Should we perform lookups on the parent database and schema
// in order to provide this type with a fully resolved name?
return typDesc, nil
}

// ResolveTypeByID implements the tree.TypeResolver interface. We disallow
// accessing types directly by their ID in standard SQL contexts, so error
// out nicely here.
// TODO (rohany): Is there a need to disable this in the general case?
// ResolveTypeByID implements the tree.TypeResolver interface.
func (p *planner) ResolveTypeByID(id uint32) (*types.T, error) {
return nil, errors.Newf("type id reference @%d not allowed in this context", id)
// TODO (rohany): This should take in a context. See #49262.
desc, err := p.getTypeDescByID(p.EvalContext().Context, sqlbase.ID(id))
if err != nil {
return nil, err
}
return sqlbase.MakeTypeFromTypeDesc(desc)
}

// Helper method to hydrate the types within a TableDescriptor.
func (p *planner) hydrateTableDescriptor(ctx context.Context, desc *TableDescriptor) error {
for i := range desc.Columns {
col := &desc.Columns[i]
if col.Type.UserDefined() {
// Look up its type descriptor.
typDesc, err := p.getTypeDescByID(ctx, sqlbase.ID(col.Type.StableTypeID()))
if err != nil {
return err
}
// TODO (rohany): This should be a noop if the hydrated type
// information present in the descriptor has the same version as
// the resolved type descriptor we found here.
// TODO (rohany): Once types are leased we need to create a new
// ImmutableTableDescriptor when a type lease expires rather than
// overwriting the types information in the shared descriptor.
if err := typDesc.HydrateTypeInfo(col.Type); err != nil {
return err
}
}
}
return nil
}

// maybeHydrateTypesInDescriptor hydrates any types.T's in the input descriptor.
Expand All @@ -200,39 +220,15 @@ func (p *planner) ResolveTypeByID(id uint32) (*types.T, error) {
func (p *planner) maybeHydrateTypesInDescriptor(
ctx context.Context, objDesc tree.NameResolutionResult,
) error {
// Helper method to hydrate the types within a TableDescriptor.
hydrateDesc := func(desc *TableDescriptor) error {
for i := range desc.Columns {
col := &desc.Columns[i]
if col.Type.UserDefined() {
// Look up its type descriptor.
typDesc, err := p.getTypeDescByID(ctx, sqlbase.ID(col.Type.StableTypeID()))
if err != nil {
return err
}
// TODO (rohany): This should be a noop if the hydrated type
// information present in the descriptor has the same version as
// the resolved type descriptor we found here.
// TODO (rohany): Once types are leased we need to create a new
// ImmutableTableDescriptor when a type lease expires rather than
// overwriting the types information in the shared descriptor.
if err := typDesc.HydrateTypeInfo(col.Type); err != nil {
return err
}
}
}
return nil
}

// As of now, only {Mutable,Immutable}TableDescriptor have types.T that
// need to be hydrated.
switch desc := objDesc.(type) {
case *sqlbase.MutableTableDescriptor:
if err := hydrateDesc(desc.TableDesc()); err != nil {
if err := p.hydrateTableDescriptor(ctx, desc.TableDesc()); err != nil {
return err
}
case *sqlbase.ImmutableTableDescriptor:
if err := hydrateDesc(desc.TableDesc()); err != nil {
if err := p.hydrateTableDescriptor(ctx, desc.TableDesc()); err != nil {
return err
}
}
Expand Down
15 changes: 15 additions & 0 deletions pkg/sql/sem/tree/datum.go
Original file line number Diff line number Diff line change
Expand Up @@ -3815,6 +3815,21 @@ func MakeDEnumFromLogicalRepresentation(typ *types.T, rep string) (*DEnum, error
}, nil
}

// MakeAllDEnumsInType generates a slice of all values in an enum.
// TODO (rohany): In the future, take an option of whether to include
// non-writeable enum values or not.
func MakeAllDEnumsInType(typ *types.T) []Datum {
result := make([]Datum, len(typ.TypeMeta.EnumData.LogicalRepresentations))
for i := 0; i < len(result); i++ {
result[i] = &DEnum{
EnumTyp: typ,
PhysicalRep: typ.TypeMeta.EnumData.PhysicalRepresentations[i],
LogicalRep: typ.TypeMeta.EnumData.LogicalRepresentations[i],
}
}
return result
}

// Format implements the NodeFormatter interface.
func (d *DEnum) Format(ctx *FmtCtx) {
s := DString(d.LogicalRep)
Expand Down
6 changes: 3 additions & 3 deletions pkg/sql/sem/tree/format.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ const (
// If set, user defined types will be printed as '@id', where id is the
// stable type ID for the user defined type. This is used in DistSQL flows
// where we don't want to perform name resolution of types again.
fmtFormatUserDefinedTypesAsIDs
FmtFormatUserDefinedTypesAsIDs
)

// Composite/derived flag definitions follow.
Expand All @@ -157,9 +157,9 @@ const (

// FmtDistSQLSerialization is just like FmtCheckEquivalence, but it can be
// used to serialize expressions for query distribution. In particular, it
// includes the flag fmtFormatUserDefinedTypesAsIDs which serializes user
// includes the flag FmtFormatUserDefinedTypesAsIDs which serializes user
// defined types in a way that avoids name resolution for DistSQL evaluation.
FmtDistSQLSerialization FmtFlags = FmtCheckEquivalence | fmtFormatUserDefinedTypesAsIDs
FmtDistSQLSerialization FmtFlags = FmtCheckEquivalence | FmtFormatUserDefinedTypesAsIDs

// FmtArrayToString is a special composite flag suitable
// for the output of array_to_string(). This de-quotes
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/sem/tree/type_name.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ func ResolveType(ref ResolvableTypeReference, resolver TypeReferenceResolver) (*

// FormatTypeReference formats a ResolvableTypeReference.
func (ctx *FmtCtx) FormatTypeReference(ref ResolvableTypeReference) {
if ctx.HasFlags(fmtFormatUserDefinedTypesAsIDs) {
if ctx.HasFlags(FmtFormatUserDefinedTypesAsIDs) {
switch t := ref.(type) {
case *types.T:
if t.UserDefined() {
Expand Down
14 changes: 14 additions & 0 deletions pkg/sql/sqlbase/structured.go
Original file line number Diff line number Diff line change
Expand Up @@ -4174,6 +4174,20 @@ func (desc *TypeDescriptor) HydrateTypeInfo(typ *types.T) error {
}
}

// MakeTypeFromTypeDesc creates a types.T from the input type descriptor.
func MakeTypeFromTypeDesc(desc *TypeDescriptor) (*types.T, error) {
switch t := desc.Kind; t {
case TypeDescriptor_ENUM:
typ := types.MakeEnum(uint32(desc.ID))
if err := desc.HydrateTypeInfo(typ); err != nil {
return nil, err
}
return typ, nil
default:
return nil, errors.AssertionFailedf("unknown type kind %s", t.String())
}
}

// NameResolutionResult implements the NameResolutionResult interface.
func (desc *TypeDescriptor) NameResolutionResult() {}

Expand Down

0 comments on commit 5177a94

Please sign in to comment.