Skip to content

Commit

Permalink
opt: add rule to normalize Like to Range
Browse files Browse the repository at this point in the history
It is easier to calculate stats for a `Range` expression than for
`Like` expressions. This PR adds a rule that converts a `Like` operator
with tight constraints to a `Range` operator.

Fixes cockroachdb#52153

Release note: None

Release justification: This change is small and is unlikely to break
anything.
  • Loading branch information
Isaac Pugh committed Aug 27, 2020
1 parent 772a79f commit d08e47b
Show file tree
Hide file tree
Showing 10 changed files with 142 additions and 13 deletions.
22 changes: 16 additions & 6 deletions pkg/sql/opt/memo/testdata/logprops/constraints
Original file line number Diff line number Diff line change
Expand Up @@ -1027,9 +1027,14 @@ select
│ ├── prune: (1-3)
│ └── interesting orderings: (+1)
└── filters
└── like [type=bool, outer=(3), constraints=(/3: [/'ABC' - /'ABD'); tight)]
├── variable: v:3 [type=string]
└── const: 'ABC%' [type=string]
└── range [type=bool, outer=(3), constraints=(/3: [/'ABC' - /'ABD'); tight)]
└── and [type=bool]
├── ge [type=bool]
│ ├── variable: v:3 [type=string]
│ └── const: 'ABC' [type=string]
└── lt [type=bool]
├── variable: v:3 [type=string]
└── const: 'ABD' [type=string]

opt
SELECT * FROM kuv WHERE v LIKE 'ABC_'
Expand Down Expand Up @@ -1087,9 +1092,14 @@ select
│ ├── prune: (1-3)
│ └── interesting orderings: (+1)
└── filters
└── like [type=bool, outer=(3), constraints=(/3: [/'ABC' - /'ABC']; tight), fd=()-->(3)]
├── variable: v:3 [type=string]
└── const: 'ABC' [type=string]
└── range [type=bool, outer=(3), constraints=(/3: [/'ABC' - /'ABC']; tight), fd=()-->(3)]
└── and [type=bool]
├── ge [type=bool]
│ ├── variable: v:3 [type=string]
│ └── const: 'ABC' [type=string]
└── le [type=bool]
├── variable: v:3 [type=string]
└── const: 'ABC' [type=string]

opt
SELECT * FROM kuv WHERE v LIKE '%'
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/memo/testdata/stats/select
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ select
│ └── fd: (1,2)-->(3)
└── filters
├── d_id:1 = 1 [type=bool, outer=(1), constraints=(/1: [/1 - /1]; tight), fd=()-->(1)]
└── d_name:3 LIKE 'bob' [type=bool, outer=(3), constraints=(/3: [/'bob' - /'bob']; tight), fd=()-->(3)]
└── (d_name:3 >= 'bob') AND (d_name:3 <= 'bob') [type=bool, outer=(3), constraints=(/3: [/'bob' - /'bob']; tight), fd=()-->(3)]

# This tests selectivityFromReducedCols.
# Since (1,2)-->(3) in order to use selectivityFromReducedCols,
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/memo/testdata/stats_quality/tpch/q20
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ sort
│ │ │ └── filters
│ │ │ └── ps_availqty:16 > (sum:47 * 0.5) [type=bool, outer=(16,47), immutable, constraints=(/16: (/NULL - ])]
│ │ └── filters
│ │ └── p_name:21 LIKE 'forest%' [type=bool, outer=(21), constraints=(/21: [/'forest' - /'foresu'); tight)]
│ │ └── (p_name:21 >= 'forest') AND (p_name:21 < 'foresu') [type=bool, outer=(21), constraints=(/21: [/'forest' - /'foresu'); tight)]
│ └── filters (true)
├── select
│ ├── save-table-name: q20_select_15
Expand Down
20 changes: 20 additions & 0 deletions pkg/sql/opt/norm/rules/select.opt
Original file line number Diff line number Diff line change
Expand Up @@ -391,3 +391,23 @@ $input
)
(RemoveFiltersItem $filter $item)
)

# NormalizeSelectLiketoRange converts a Like operator to a Range operator.
# It's easier to calculate stats for Range expressions than for Like
# expressions.
[NormalizeSelectLikeToRange, Normalize]
(Select
$input:*
$filters:[
...
$item:(FiltersItem
(Like
$left:(Variable)
$right:(Const)
)
) & (Succeeded $rng:(ConvertLikeToRange $item))
...
]
)
=>
(Select $input (ReplaceFiltersItem $filters $item $rng))
36 changes: 36 additions & 0 deletions pkg/sql/opt/norm/scalar_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
package norm

import (
"github.com/cockroachdb/cockroach/pkg/sql/opt/constraint"
"sort"

"github.com/cockroachdb/cockroach/pkg/sql/opt"
Expand Down Expand Up @@ -327,3 +328,38 @@ func (c *CustomFuncs) VarsAreSame(left, right opt.ScalarExpr) bool {
rv := right.(*memo.VariableExpr)
return lv.Col == rv.Col
}

// ConvertLikeToRange returns a Range that is equivalent to the given Like expression.
func (c *CustomFuncs) ConvertLikeToRange(item *memo.FiltersItem) opt.ScalarExpr {
if !item.ScalarProps().TightConstraints {
return nil
}
consSet := item.ScalarProps().Constraints
if consSet == nil || consSet.Length() != 1 {
return nil
}
spans := consSet.Constraint(0).Spans
if spans.Count() != 1 {
return nil
}
span := spans.Get(0)
like := item.Condition.(*memo.LikeExpr)
typ := like.Right.(*memo.ConstExpr).Typ
leftBound := c.f.ConstructConst(span.StartKey().Value(0), typ)
rightBound := c.f.ConstructConst(span.EndKey().Value(0), typ)
var left, right opt.ScalarExpr

if span.StartBoundary() == constraint.IncludeBoundary {
left = c.f.ConstructGe(like.Left, leftBound)
} else {
left = c.f.ConstructGt(like.Left, leftBound)
}

if span.EndBoundary() == constraint.IncludeBoundary {
right = c.f.ConstructLe(like.Left, rightBound)
} else {
right = c.f.ConstructLt(like.Left, rightBound)
}

return c.f.ConstructRange(c.f.ConstructAnd(left, right))
}
2 changes: 1 addition & 1 deletion pkg/sql/opt/norm/testdata/rules/bool
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ select
│ └── fd: (1)-->(2-5)
└── filters
├── s:4 NOT LIKE 'foo' [outer=(4), constraints=(/4: (/NULL - ])]
├── s:4 LIKE 'foo' [outer=(4), constraints=(/4: [/'foo' - /'foo']; tight), fd=()-->(4)]
├── (s:4 >= 'foo') AND (s:4 <= 'foo') [outer=(4), constraints=(/4: [/'foo' - /'foo']; tight), fd=()-->(4)]
├── s:4 NOT ILIKE 'foo' [outer=(4), constraints=(/4: (/NULL - ])]
└── s:4 ILIKE 'foo' [outer=(4), constraints=(/4: (/NULL - ])]

Expand Down
65 changes: 64 additions & 1 deletion pkg/sql/opt/norm/testdata/rules/select
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ select
│ ├── key: (1)
│ └── fd: (1)-->(2-5)
└── filters
├── (s:4 LIKE 'a%') AND (s:4 = 'aa') [outer=(4), constraints=(/4: [/'aa' - /'aa']; tight), fd=()-->(4)]
├── ((s:4 = 'aa') AND (s:4 >= 'a')) AND (s:4 < 'b') [outer=(4), constraints=(/4: [/'aa' - /'aa']; tight), fd=()-->(4)]
└── s:4 SIMILAR TO 'a_' [outer=(4), constraints=(/4: [/'a' - /'b'))]

# One of the constraints is not tight, so it should not be consolidated.
Expand Down Expand Up @@ -1905,3 +1905,66 @@ select
│ └── (1.00,)
└── filters
└── column1:1::STRING != '1.00' [outer=(1), immutable]

# --------------------------------------------------
# NormalizeSelectLikeToRange
# --------------------------------------------------

norm expect=NormalizeSelectLikeToRange
SELECT * FROM a WHERE s LIKE 'FOREST%'
----
select
├── columns: k:1!null i:2 f:3 s:4!null j:5
├── key: (1)
├── fd: (1)-->(2-5)
├── scan a
│ ├── columns: k:1!null i:2 f:3 s:4 j:5
│ ├── key: (1)
│ └── fd: (1)-->(2-5)
└── filters
└── (s:4 >= 'FOREST') AND (s:4 < 'FORESU') [outer=(4), constraints=(/4: [/'FOREST' - /'FORESU'); tight)]

# Case without pattern matching selectors.
norm expect=NormalizeSelectLikeToRange
SELECT * FROM a WHERE s LIKE 'foo'
----
select
├── columns: k:1!null i:2 f:3 s:4!null j:5
├── key: (1)
├── fd: ()-->(4), (1)-->(2,3,5)
├── scan a
│ ├── columns: k:1!null i:2 f:3 s:4 j:5
│ ├── key: (1)
│ └── fd: (1)-->(2-5)
└── filters
└── (s:4 >= 'foo') AND (s:4 <= 'foo') [outer=(4), constraints=(/4: [/'foo' - /'foo']; tight), fd=()-->(4)]

# No-op case because the constraints are not tight.
norm expect-not=NormalizeSelectLikeToRange
SELECT * FROM a WHERE s LIKE 'foo_'
----
select
├── columns: k:1!null i:2 f:3 s:4!null j:5
├── key: (1)
├── fd: (1)-->(2-5)
├── scan a
│ ├── columns: k:1!null i:2 f:3 s:4 j:5
│ ├── key: (1)
│ └── fd: (1)-->(2-5)
└── filters
└── s:4 LIKE 'foo_' [outer=(4), constraints=(/4: [/'foo' - /'fop'))]

# No-op case because the Like expression does not have a tight constraint.
norm expect-not=NormalizeSelectLikeToRange
SELECT * FROM a WHERE s LIKE '%FOREST'
----
select
├── columns: k:1!null i:2 f:3 s:4!null j:5
├── key: (1)
├── fd: (1)-->(2-5)
├── scan a
│ ├── columns: k:1!null i:2 f:3 s:4 j:5
│ ├── key: (1)
│ └── fd: (1)-->(2-5)
└── filters
└── s:4 LIKE '%FOREST' [outer=(4), constraints=(/4: (/NULL - ])]
2 changes: 1 addition & 1 deletion pkg/sql/opt/xform/testdata/external/tpch
Original file line number Diff line number Diff line change
Expand Up @@ -2245,7 +2245,7 @@ sort
│ │ │ └── filters
│ │ │ └── ps_availqty:16 > (sum:47 * 0.5) [outer=(16,47), immutable, constraints=(/16: (/NULL - ])]
│ │ └── filters
│ │ └── p_name:21 LIKE 'forest%' [outer=(21), constraints=(/21: [/'forest' - /'foresu'); tight)]
│ │ └── (p_name:21 >= 'forest') AND (p_name:21 < 'foresu') [outer=(21), constraints=(/21: [/'forest' - /'foresu'); tight)]
│ └── filters (true)
├── select
│ ├── columns: n_nationkey:9!null n_name:10!null
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/xform/testdata/external/tpch-no-stats
Original file line number Diff line number Diff line change
Expand Up @@ -2178,7 +2178,7 @@ sort
│ │ │ └── filters
│ │ │ └── ps_availqty:16 > (sum:47 * 0.5) [outer=(16,47), immutable, constraints=(/16: (/NULL - ])]
│ │ └── filters
│ │ └── p_name:21 LIKE 'forest%' [outer=(21), constraints=(/21: [/'forest' - /'foresu'); tight)]
│ │ └── (p_name:21 >= 'forest') AND (p_name:21 < 'foresu') [outer=(21), constraints=(/21: [/'forest' - /'foresu'); tight)]
│ └── filters (true)
└── filters
└── n_name:10 = 'CANADA' [outer=(10), constraints=(/10: [/'CANADA' - /'CANADA']; tight), fd=()-->(10)]
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/xform/testdata/rules/join
Original file line number Diff line number Diff line change
Expand Up @@ -3489,7 +3489,7 @@ project
│ └── filters (true)
└── filters
├── st_covers(c.geom:10, n.geom:16) [outer=(10,16), immutable]
└── name:15 LIKE 'Upper%' [outer=(15), constraints=(/15: [/'Upper' - /'Uppes'); tight)]
└── (name:15 >= 'Upper') AND (name:15 < 'Uppes') [outer=(15), constraints=(/15: [/'Upper' - /'Uppes'); tight)]

# It's not possible to generate an inverted join when there is an OR with a
# non-geospatial function.
Expand Down

0 comments on commit d08e47b

Please sign in to comment.