Skip to content

Commit

Permalink
colexec: add support for ILIKE and NOT ILIKE
Browse files Browse the repository at this point in the history
This commit adds the native vectorized support for ILIKE and NOT ILIKE
comparisons. The idea is simple - convert both the argument and the
pattern to capital letters. This required minor changes to the templates
to add a "prelude" step of that conversion as well as conversion of the
pattern to the upper case during planning.

Initially, I generated separate operators for case-insensitive cases,
but the benchmarks shown that the performance impact of a single
conditional inside of the `for` loop is barely noticeable given that the
branch prediction will always be right, so I refactored the existing
operators to support case insensitivity.

```
name                                     old time/op    new time/op    delta
LikeOps/selPrefixBytesBytesConstOp-24      16.8µs ± 0%    17.7µs ± 0%  +5.30%  (p=0.000 n=10+10)
LikeOps/selSuffixBytesBytesConstOp-24      18.7µs ± 0%    19.2µs ± 0%  +2.99%  (p=0.000 n=10+10)
LikeOps/selContainsBytesBytesConstOp-24    28.0µs ± 0%    27.8µs ± 0%  -0.73%  (p=0.000 n=10+10)
LikeOps/selRegexpBytesBytesConstOp-24       479µs ± 0%     480µs ± 0%  +0.33%  (p=0.008 n=9+10)
LikeOps/selSkeletonBytesBytesConstOp-24    40.2µs ± 0%    41.4µs ± 0%  +3.20%  (p=0.000 n=9+10)
LikeOps/selRegexpSkeleton-24                860µs ± 0%     857µs ± 0%  -0.36%  (p=0.023 n=10+10)
```

Release note (performance improvement): ILIKE and NOT ILIKE filters can
now be evaluated more efficiently in some cases.
  • Loading branch information
yuzefovich committed Aug 6, 2022
1 parent 1f9b640 commit 40c08c3
Show file tree
Hide file tree
Showing 11 changed files with 415 additions and 185 deletions.
22 changes: 15 additions & 7 deletions pkg/sql/colexec/colbuilder/execplan.go
Original file line number Diff line number Diff line change
Expand Up @@ -1795,6 +1795,12 @@ func addProjection(
return colexecbase.NewSimpleProjectOp(op, len(typs), projection), newTypes
}

func examineLikeOp(op treecmp.ComparisonOperator) (negate bool, caseInsensitive bool) {
negate = op.Symbol == treecmp.NotLike || op.Symbol == treecmp.NotILike
caseInsensitive = op.Symbol == treecmp.ILike || op.Symbol == treecmp.NotILike
return negate, caseInsensitive
}

func planSelectionOperators(
ctx context.Context,
evalCtx *eval.Context,
Expand Down Expand Up @@ -1842,10 +1848,11 @@ func planSelectionOperators(
lTyp := ct[leftIdx]
if constArg, ok := t.Right.(tree.Datum); ok {
switch cmpOp.Symbol {
case treecmp.Like, treecmp.NotLike:
negate := cmpOp.Symbol == treecmp.NotLike
case treecmp.Like, treecmp.NotLike, treecmp.ILike, treecmp.NotILike:
negate, caseInsensitive := examineLikeOp(cmpOp)
op, err = colexecsel.GetLikeOperator(
evalCtx, leftOp, leftIdx, string(tree.MustBeDString(constArg)), negate,
evalCtx, leftOp, leftIdx, string(tree.MustBeDString(constArg)),
negate, caseInsensitive,
)
case treecmp.In, treecmp.NotIn:
negate := cmpOp.Symbol == treecmp.NotIn
Expand Down Expand Up @@ -2370,7 +2377,8 @@ func planProjectionExpr(
var hasOptimizedOp bool
if isCmpProjOp {
switch cmpProjOp.Symbol {
case treecmp.Like, treecmp.NotLike, treecmp.In, treecmp.NotIn, treecmp.IsDistinctFrom, treecmp.IsNotDistinctFrom:
case treecmp.Like, treecmp.NotLike, treecmp.ILike, treecmp.NotILike,
treecmp.In, treecmp.NotIn, treecmp.IsDistinctFrom, treecmp.IsNotDistinctFrom:
hasOptimizedOp = true
}
}
Expand Down Expand Up @@ -2437,11 +2445,11 @@ func planProjectionExpr(
resultIdx = len(typs)
if isCmpProjOp {
switch cmpProjOp.Symbol {
case treecmp.Like, treecmp.NotLike:
negate := cmpProjOp.Symbol == treecmp.NotLike
case treecmp.Like, treecmp.NotLike, treecmp.ILike, treecmp.NotILike:
negate, caseInsensitive := examineLikeOp(cmpProjOp)
op, err = colexecprojconst.GetLikeProjectionOperator(
allocator, evalCtx, input, leftIdx, resultIdx,
string(tree.MustBeDString(rConstArg)), negate,
string(tree.MustBeDString(rConstArg)), negate, caseInsensitive,
)
case treecmp.In, treecmp.NotIn:
negate := cmpProjOp.Symbol == treecmp.NotIn
Expand Down
5 changes: 4 additions & 1 deletion pkg/sql/colexec/colexeccmp/like_ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,16 @@ const (
//
// The second return parameter always contains a single []byte, unless
// "skeleton" LikeOpType is returned.
func GetLikeOperatorType(pattern string) (LikeOpType, [][]byte, error) {
func GetLikeOperatorType(pattern string, caseInsensitive bool) (LikeOpType, [][]byte, error) {
if pattern == "" {
return LikeConstant, [][]byte{{}}, nil
}
if pattern == "%" {
return LikeAlwaysMatch, [][]byte{{}}, nil
}
if caseInsensitive {
pattern = strings.ToUpper(pattern)
}
hasEscape := strings.Contains(pattern, `\`)
if !hasEscape && len(pattern) > 1 && !strings.ContainsAny(pattern[1:len(pattern)-1], "_%") {
// There are no wildcards in the middle of the string as well as no
Expand Down
19 changes: 16 additions & 3 deletions pkg/sql/colexec/colexecprojconst/like_ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@ func GetLikeProjectionOperator(
resultIdx int,
pattern string,
negate bool,
caseInsensitive bool,
) (colexecop.Operator, error) {
likeOpType, patterns, err := colexeccmp.GetLikeOperatorType(pattern)
likeOpType, patterns, err := colexeccmp.GetLikeOperatorType(pattern, caseInsensitive)
if err != nil {
return nil, err
}
Expand All @@ -46,13 +47,21 @@ func GetLikeProjectionOperator(
}
switch likeOpType {
case colexeccmp.LikeAlwaysMatch:
// Use an empty prefix operator to get correct NULL behavior.
// Use an empty prefix operator to get correct NULL behavior. We don't
// need to pay attention to the case sensitivity here since the pattern
// will always match anyway.
return &projPrefixBytesBytesConstOp{
projConstOpBase: base,
constArg: []byte{},
negate: negate,
}, nil
case colexeccmp.LikeConstant:
if caseInsensitive {
// We don't have an equivalent projection operator that would
// convert the argument to capital letters, so for now we fall back
// to the default comparison operator.
return nil, errors.New("ILIKE and NOT ILIKE aren't supported with a constant pattern")
}
if negate {
return &projNEBytesBytesConstOp{
projConstOpBase: base,
Expand All @@ -68,15 +77,17 @@ func GetLikeProjectionOperator(
projConstOpBase: base,
constArg: pat,
negate: negate,
caseInsensitive: caseInsensitive,
}, nil
case colexeccmp.LikePrefix:
return &projPrefixBytesBytesConstOp{
projConstOpBase: base,
constArg: pat,
negate: negate,
caseInsensitive: caseInsensitive,
}, nil
case colexeccmp.LikeRegexp:
re, err := eval.ConvertLikeToRegexp(ctx, string(patterns[0]), false, '\\')
re, err := eval.ConvertLikeToRegexp(ctx, string(patterns[0]), caseInsensitive, '\\')
if err != nil {
return nil, err
}
Expand All @@ -90,12 +101,14 @@ func GetLikeProjectionOperator(
projConstOpBase: base,
constArg: patterns,
negate: negate,
caseInsensitive: caseInsensitive,
}, nil
case colexeccmp.LikeSuffix:
return &projSuffixBytesBytesConstOp{
projConstOpBase: base,
constArg: pat,
negate: negate,
caseInsensitive: caseInsensitive,
}, nil
default:
return nil, errors.AssertionFailedf("unsupported like op type %d", likeOpType)
Expand Down
11 changes: 11 additions & 0 deletions pkg/sql/colexec/colexecprojconst/proj_const_ops_tmpl.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ type _OP_CONST_NAME struct {
// {{if .Negatable}}
negate bool
// {{end}}
// {{if .CaseInsensitive}}
caseInsensitive bool
// {{end}}
}

func (p _OP_CONST_NAME) Next() coldata.Batch {
Expand All @@ -114,6 +117,14 @@ func (p _OP_CONST_NAME) Next() coldata.Batch {
// should be negated.
// */}}
_negate := p.negate
// {{ end }}
// {{if .CaseInsensitive}}
// {{/*
// In order to inline the templated code of the LIKE overloads, we need
// to have a `_caseInsensitive` local variable indicating whether the
// operator is case insensitive.
// */}}
_caseInsensitive := p.caseInsensitive
// {{end}}
batch := p.Input.Next()
n := batch.Length()
Expand Down
72 changes: 64 additions & 8 deletions pkg/sql/colexec/colexecprojconst/proj_like_ops.eg.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 40c08c3

Please sign in to comment.