Skip to content

Commit

Permalink
opt: make placeholder fast path conditional on the estimated row count
Browse files Browse the repository at this point in the history
This change is best explained by this comment:

```
// We are dealing with a memo that still contains placeholders. The statistics
// for such a memo can be wildly overestimated. Even if our plan is correct,
// the estimated row count for a scan is passed to the execution engine which
// uses it to make sizing decisions. Passing a very high count can affect
// performance significantly (see #64214). So we only use the fast path if the
// estimated row count is small; typically this will happen when we constrain
// columns that form a key and we know there will be at most one row.
```

Fixes #64214.

Release note (bug fix): fixed a performance regression for very simple
queries.
  • Loading branch information
RaduBerinde committed Apr 26, 2021
1 parent 44c12c2 commit 976118b
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 14 deletions.
25 changes: 19 additions & 6 deletions pkg/sql/opt/xform/placeholder_fast_path.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import (
"github.com/cockroachdb/errors"
)

const maxRowCountForPlaceholderFastPath = 10

// TryPlaceholderFastPath attempts to produce a fully optimized memo with
// placeholders. This is only possible in very simple cases and involves special
// operators (PlaceholderScan) which use placeholders and resolve them at
Expand Down Expand Up @@ -47,13 +49,24 @@ func (o *Optimizer) TryPlaceholderFastPath() (_ opt.Expr, ok bool, err error) {
}()

root := o.mem.RootExpr().(memo.RelExpr)
rootProps := o.mem.RootProps()

if !rootProps.Ordering.Any() {
rootRelProps := root.Relational()
// We are dealing with a memo that still contains placeholders. The statistics
// for such a memo can be wildly overestimated. Even if our plan is correct,
// the estimated row count for a scan is passed to the execution engine which
// uses it to make sizing decisions. Passing a very high count can affect
// performance significantly (see #64214). So we only use the fast path if the
// estimated row count is small; typically this will happen when we constrain
// columns that form a key and we know there will be at most one row.
if rootRelProps.Stats.RowCount > maxRowCountForPlaceholderFastPath {
return nil, false, nil
}

rootColumns := root.Relational().OutputCols
rootPhysicalProps := o.mem.RootProps()

if !rootPhysicalProps.Ordering.Any() {
return nil, false, nil
}

// TODO(radu): if we want to support more cases, we should use optgen to write
// the rules.
Expand Down Expand Up @@ -175,7 +188,7 @@ func (o *Optimizer) TryPlaceholderFastPath() (_ opt.Expr, ok bool, err error) {

// Success!
newPrivate := scan.ScanPrivate
newPrivate.Cols = rootColumns
newPrivate.Cols = rootRelProps.OutputCols
newPrivate.Index = foundIndex.Ordinal()

span := make(memo.ScalarListExpr, numConstrained)
Expand All @@ -198,8 +211,8 @@ func (o *Optimizer) TryPlaceholderFastPath() (_ opt.Expr, ok bool, err error) {
ScanPrivate: newPrivate,
}
placeholderScan = o.mem.AddPlaceholderScanToGroup(placeholderScan, root)
o.mem.SetBestProps(placeholderScan, rootProps, &physical.Provided{}, 1.0 /* cost */)
o.mem.SetRoot(placeholderScan, rootProps)
o.mem.SetBestProps(placeholderScan, rootPhysicalProps, &physical.Provided{}, 1.0 /* cost */)
o.mem.SetRoot(placeholderScan, rootPhysicalProps)

if util.CrdbTestBuild && !o.mem.IsOptimized() {
return nil, false, errors.AssertionFailedf("IsOptimized() should be true")
Expand Down
48 changes: 40 additions & 8 deletions pkg/sql/opt/xform/testdata/placeholder-fast-path/scan
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,32 @@ SELECT v+1 FROM kv WHERE k = $1
----
no fast path

# The fast path should not kick in because the estimated row count is too high.
placeholder-fast-path
SELECT a, b, c FROM abcd WHERE a=$1 AND b=$2
----
no fast path

# Now inject statistics so that the estimated row count is small.
exec-ddl
ALTER TABLE abcd INJECT STATISTICS '[
{
"columns": ["a"],
"created_at": "2018-05-01 1:00:00.00000+00:00",
"row_count": 10,
"distinct_count": 5
}
]'
----

# The fast path should now kick in.
placeholder-fast-path
SELECT a, b, c FROM abcd WHERE a=$1 AND b=$2
----
placeholder-scan abcd@secondary
├── columns: a:1!null b:2!null c:3
├── has-placeholder
├── stats: [rows=109.89, distinct(1)=100, null(1)=0, distinct(2)=100, null(2)=0, distinct(1,2)=109.89, null(1,2)=0]
├── stats: [rows=1.1, distinct(1)=1.1, null(1)=0, distinct(2)=1, null(2)=0]
├── fd: ()-->(1,2)
└── span
├── $1
Expand All @@ -105,7 +124,7 @@ SELECT a, b, c FROM abcd WHERE b=$1 AND a=$2
placeholder-scan abcd@secondary
├── columns: a:1!null b:2!null c:3
├── has-placeholder
├── stats: [rows=109.89, distinct(1)=100, null(1)=0, distinct(2)=100, null(2)=0, distinct(1,2)=109.89, null(1,2)=0]
├── stats: [rows=1.1, distinct(1)=1.1, null(1)=0, distinct(2)=1, null(2)=0]
├── fd: ()-->(1,2)
└── span
├── $2
Expand All @@ -118,7 +137,7 @@ SELECT a, b, c FROM abcd WHERE a=0 AND b=$1
placeholder-scan abcd@secondary
├── columns: a:1!null b:2!null c:3
├── has-placeholder
├── stats: [rows=3.33, distinct(1)=1, null(1)=0, distinct(2)=3.33, null(2)=0, distinct(1,2)=3.33, null(1,2)=0]
├── stats: [rows=0.666, distinct(1)=0.666, null(1)=0, distinct(2)=0.666, null(2)=0, distinct(1,2)=0.666, null(1,2)=0]
├── fd: ()-->(1,2)
└── span
├── 0
Expand All @@ -130,7 +149,7 @@ SELECT a, b, c FROM abcd WHERE a=$1 AND b=0
placeholder-scan abcd@secondary
├── columns: a:1!null b:2!null c:3
├── has-placeholder
├── stats: [rows=3.33, distinct(1)=3.33, null(1)=0, distinct(2)=1, null(2)=0, distinct(1,2)=3.33, null(1,2)=0]
├── stats: [rows=3.3, distinct(1)=3.3, null(1)=0, distinct(2)=1, null(2)=0]
├── fd: ()-->(1,2)
└── span
├── $1
Expand All @@ -143,7 +162,7 @@ SELECT a, b, c FROM abcd WHERE a=1+2 AND b=$1
placeholder-scan abcd@secondary
├── columns: a:1!null b:2!null c:3
├── has-placeholder
├── stats: [rows=3.33, distinct(1)=1, null(1)=0, distinct(2)=3.33, null(2)=0, distinct(1,2)=3.33, null(1,2)=0]
├── stats: [rows=0.666, distinct(1)=0.666, null(1)=0, distinct(2)=0.666, null(2)=0, distinct(1,2)=0.666, null(1,2)=0]
├── fd: ()-->(1,2)
└── span
├── 3
Expand All @@ -155,7 +174,7 @@ SELECT a, b, c FROM abcd WHERE a=fnv32a('foo') AND b=$1
placeholder-scan abcd@secondary
├── columns: a:1!null b:2!null c:3
├── has-placeholder
├── stats: [rows=3.33, distinct(1)=1, null(1)=0, distinct(2)=3.33, null(2)=0, distinct(1,2)=3.33, null(1,2)=0]
├── stats: [rows=0.666, distinct(1)=0.666, null(1)=0, distinct(2)=0.666, null(2)=0, distinct(1,2)=0.666, null(1,2)=0]
├── fd: ()-->(1,2)
└── span
├── 2851307223
Expand Down Expand Up @@ -203,7 +222,7 @@ SELECT a, d FROM abcd WHERE d=$1 AND c=$2
placeholder-scan abcd@secondary
├── columns: a:1 d:4!null
├── has-placeholder
├── stats: [rows=109.89]
├── stats: [rows=1.0989]
├── fd: ()-->(4)
└── span
├── $1
Expand Down Expand Up @@ -235,6 +254,19 @@ CREATE TABLE partial1 (
)
----

# The fast path is conditional on having a small estimated row count. Inject
# statistics so that we don't have to worry about this aspect in tests.
exec-ddl
ALTER TABLE partial1 INJECT STATISTICS '[
{
"columns": ["k"],
"created_at": "2018-05-01 1:00:00.00000+00:00",
"row_count": 10,
"distinct_count": 10
}
]'
----

# Make sure the fast path doesn't choose the cab index, getting in the way of
# using partial_ab (which might be the better index when the placeholder is 0).
placeholder-fast-path
Expand All @@ -250,7 +282,7 @@ SELECT a, b FROM partial1 WHERE a = $1
placeholder-scan partial1@pseudo_ab,partial
├── columns: a:2!null b:3
├── has-placeholder
├── stats: [rows=330, distinct(2)=100, null(2)=0]
├── stats: [rows=3.3, distinct(2)=1, null(2)=0]
├── fd: ()-->(2)
└── span
└── $1

0 comments on commit 976118b

Please sign in to comment.