Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

colexec: add support for ILIKE and NOT ILIKE #85695

Merged
merged 2 commits into from
Aug 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions pkg/sql/colexec/colbuilder/execplan.go
Original file line number Diff line number Diff line change
Expand Up @@ -1795,6 +1795,12 @@ func addProjection(
return colexecbase.NewSimpleProjectOp(op, len(typs), projection), newTypes
}

func examineLikeOp(op treecmp.ComparisonOperator) (negate bool, caseInsensitive bool) {
negate = op.Symbol == treecmp.NotLike || op.Symbol == treecmp.NotILike
caseInsensitive = op.Symbol == treecmp.ILike || op.Symbol == treecmp.NotILike
return negate, caseInsensitive
}

func planSelectionOperators(
ctx context.Context,
evalCtx *eval.Context,
Expand Down Expand Up @@ -1842,10 +1848,11 @@ func planSelectionOperators(
lTyp := ct[leftIdx]
if constArg, ok := t.Right.(tree.Datum); ok {
switch cmpOp.Symbol {
case treecmp.Like, treecmp.NotLike:
negate := cmpOp.Symbol == treecmp.NotLike
case treecmp.Like, treecmp.NotLike, treecmp.ILike, treecmp.NotILike:
negate, caseInsensitive := examineLikeOp(cmpOp)
op, err = colexecsel.GetLikeOperator(
evalCtx, leftOp, leftIdx, string(tree.MustBeDString(constArg)), negate,
evalCtx, leftOp, leftIdx, string(tree.MustBeDString(constArg)),
negate, caseInsensitive,
)
case treecmp.In, treecmp.NotIn:
negate := cmpOp.Symbol == treecmp.NotIn
Expand Down Expand Up @@ -2370,7 +2377,8 @@ func planProjectionExpr(
var hasOptimizedOp bool
if isCmpProjOp {
switch cmpProjOp.Symbol {
case treecmp.Like, treecmp.NotLike, treecmp.In, treecmp.NotIn, treecmp.IsDistinctFrom, treecmp.IsNotDistinctFrom:
case treecmp.Like, treecmp.NotLike, treecmp.ILike, treecmp.NotILike,
treecmp.In, treecmp.NotIn, treecmp.IsDistinctFrom, treecmp.IsNotDistinctFrom:
hasOptimizedOp = true
}
}
Expand Down Expand Up @@ -2437,11 +2445,11 @@ func planProjectionExpr(
resultIdx = len(typs)
if isCmpProjOp {
switch cmpProjOp.Symbol {
case treecmp.Like, treecmp.NotLike:
negate := cmpProjOp.Symbol == treecmp.NotLike
case treecmp.Like, treecmp.NotLike, treecmp.ILike, treecmp.NotILike:
negate, caseInsensitive := examineLikeOp(cmpProjOp)
op, err = colexecprojconst.GetLikeProjectionOperator(
allocator, evalCtx, input, leftIdx, resultIdx,
string(tree.MustBeDString(rConstArg)), negate,
string(tree.MustBeDString(rConstArg)), negate, caseInsensitive,
)
case treecmp.In, treecmp.NotIn:
negate := cmpProjOp.Symbol == treecmp.NotIn
Expand Down
50 changes: 4 additions & 46 deletions pkg/sql/colexec/colexeccmp/like_ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,58 +25,34 @@ const (
// LikeConstant is used when comparing against a constant string with no
// wildcards.
LikeConstant
// LikeConstantNegate is used when comparing against a constant string with
// no wildcards, and the result is negated.
LikeConstantNegate
// LikeContains is used when comparing against a constant substring.
LikeContains
// LikeContainsNegate is used when comparing against a constant substring,
// and the result is negated.
LikeContainsNegate
// LikeNeverMatch doesn't match anything.
LikeNeverMatch
// LikePrefix is used when comparing against a constant prefix.
LikePrefix
// LikePrefixNegate is used when comparing against a constant prefix, and
// the result is negated.
LikePrefixNegate
// LikeRegexp is the default slow case when we need to fallback to RegExp
// matching.
LikeRegexp
// LikeRegexpNegate is the default slow case when we need to fallback to
// RegExp matching, but the result is negated.
LikeRegexpNegate
// LikeSkeleton is used when comparing against a "skeleton" string (of the
// form '%foo%bar%' with any number of "skeleton words").
LikeSkeleton
// LikeSkeletonNegate is used when comparing against a "skeleton" string (of
// the form '%foo%bar%' with any number of "skeleton words"), and the result
// is negated.
LikeSkeletonNegate
// LikeSuffix is used when comparing against a constant suffix.
LikeSuffix
// LikeSuffixNegate is used when comparing against a constant suffix, and
// the result is negated.
LikeSuffixNegate
)

// GetLikeOperatorType returns LikeOpType corresponding to the inputs.
//
// The second return parameter always contains a single []byte, unless
// "skeleton" LikeOpType is returned.
func GetLikeOperatorType(pattern string, negate bool) (LikeOpType, [][]byte, error) {
func GetLikeOperatorType(pattern string, caseInsensitive bool) (LikeOpType, [][]byte, error) {
if pattern == "" {
if negate {
return LikeConstantNegate, [][]byte{{}}, nil
}
return LikeConstant, [][]byte{{}}, nil
}
if pattern == "%" {
if negate {
return LikeNeverMatch, [][]byte{{}}, nil
}
return LikeAlwaysMatch, [][]byte{{}}, nil
}
if caseInsensitive {
pattern = strings.ToUpper(pattern)
}
hasEscape := strings.Contains(pattern, `\`)
if !hasEscape && len(pattern) > 1 && !strings.ContainsAny(pattern[1:len(pattern)-1], "_%") {
// There are no wildcards in the middle of the string as well as no
Expand All @@ -94,30 +70,18 @@ func GetLikeOperatorType(pattern string, negate bool) (LikeOpType, [][]byte, err
lastChar := pattern[len(pattern)-1]
if !isWildcard(firstChar) && !isWildcard(lastChar) {
// No wildcards, so this is just an exact string match.
if negate {
return LikeConstantNegate, [][]byte{[]byte(pattern)}, nil
}
return LikeConstant, [][]byte{[]byte(pattern)}, nil
}
if firstChar == '%' && !isWildcard(lastChar) {
suffix := pattern[1:]
if negate {
return LikeSuffixNegate, [][]byte{[]byte(suffix)}, nil
}
return LikeSuffix, [][]byte{[]byte(suffix)}, nil
}
if lastChar == '%' && !isWildcard(firstChar) {
prefix := pattern[:len(pattern)-1]
if negate {
return LikePrefixNegate, [][]byte{[]byte(prefix)}, nil
}
return LikePrefix, [][]byte{[]byte(prefix)}, nil
}
if firstChar == '%' && lastChar == '%' {
contains := pattern[1 : len(pattern)-1]
if negate {
return LikeContainsNegate, [][]byte{[]byte(contains)}, nil
}
return LikeContains, [][]byte{[]byte(contains)}, nil
}
}
Expand All @@ -136,15 +100,9 @@ func GetLikeOperatorType(pattern string, negate bool) (LikeOpType, [][]byte, err
skeleton = append(skeleton, pat[:idx])
pat = pat[idx+1:]
}
if negate {
return LikeSkeletonNegate, skeleton, nil
}
return LikeSkeleton, skeleton, nil
}
// Default (slow) case: execute as a regular expression match.
if negate {
return LikeRegexpNegate, [][]byte{[]byte(pattern)}, nil
}
return LikeRegexp, [][]byte{[]byte(pattern)}, nil
}

Expand Down
95 changes: 40 additions & 55 deletions pkg/sql/colexec/colexecprojconst/like_ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@ func GetLikeProjectionOperator(
resultIdx int,
pattern string,
negate bool,
caseInsensitive bool,
) (colexecop.Operator, error) {
likeOpType, patterns, err := colexeccmp.GetLikeOperatorType(pattern, negate)
likeOpType, patterns, err := colexeccmp.GetLikeOperatorType(pattern, caseInsensitive)
if err != nil {
return nil, err
}
Expand All @@ -45,85 +46,69 @@ func GetLikeProjectionOperator(
outputIdx: resultIdx,
}
switch likeOpType {
case colexeccmp.LikeConstant:
return &projEQBytesBytesConstOp{
projConstOpBase: base,
constArg: pat,
}, nil
case colexeccmp.LikeConstantNegate:
return &projNEBytesBytesConstOp{
projConstOpBase: base,
constArg: pat,
}, nil
case colexeccmp.LikeNeverMatch:
// Use an empty not-prefix operator to get correct NULL behavior.
return &projNotPrefixBytesBytesConstOp{
projConstOpBase: base,
constArg: []byte{},
}, nil
case colexeccmp.LikeAlwaysMatch:
// Use an empty prefix operator to get correct NULL behavior.
// Use an empty prefix operator to get correct NULL behavior. We don't
// need to pay attention to the case sensitivity here since the pattern
// will always match anyway.
return &projPrefixBytesBytesConstOp{
projConstOpBase: base,
constArg: []byte{},
negate: negate,
}, nil
case colexeccmp.LikeSuffix:
return &projSuffixBytesBytesConstOp{
projConstOpBase: base,
constArg: pat,
}, nil
case colexeccmp.LikeSuffixNegate:
return &projNotSuffixBytesBytesConstOp{
projConstOpBase: base,
constArg: pat,
}, nil
case colexeccmp.LikePrefix:
return &projPrefixBytesBytesConstOp{
projConstOpBase: base,
constArg: pat,
}, nil
case colexeccmp.LikePrefixNegate:
return &projNotPrefixBytesBytesConstOp{
case colexeccmp.LikeConstant:
if caseInsensitive {
// We don't have an equivalent projection operator that would
// convert the argument to capital letters, so for now we fall back
// to the default comparison operator.
return nil, errors.New("ILIKE and NOT ILIKE aren't supported with a constant pattern")
}
if negate {
return &projNEBytesBytesConstOp{
projConstOpBase: base,
constArg: pat,
}, nil
}
return &projEQBytesBytesConstOp{
projConstOpBase: base,
constArg: pat,
}, nil
case colexeccmp.LikeContains:
return &projContainsBytesBytesConstOp{
projConstOpBase: base,
constArg: pat,
negate: negate,
caseInsensitive: caseInsensitive,
}, nil
case colexeccmp.LikeContainsNegate:
return &projNotContainsBytesBytesConstOp{
case colexeccmp.LikePrefix:
return &projPrefixBytesBytesConstOp{
projConstOpBase: base,
constArg: pat,
}, nil
case colexeccmp.LikeSkeleton:
return &projSkeletonBytesBytesConstOp{
projConstOpBase: base,
constArg: patterns,
}, nil
case colexeccmp.LikeSkeletonNegate:
return &projNotSkeletonBytesBytesConstOp{
projConstOpBase: base,
constArg: patterns,
negate: negate,
caseInsensitive: caseInsensitive,
}, nil
case colexeccmp.LikeRegexp:
re, err := eval.ConvertLikeToRegexp(ctx, string(patterns[0]), false, '\\')
re, err := eval.ConvertLikeToRegexp(ctx, string(patterns[0]), caseInsensitive, '\\')
if err != nil {
return nil, err
}
return &projRegexpBytesBytesConstOp{
projConstOpBase: base,
constArg: re,
negate: negate,
}, nil
case colexeccmp.LikeRegexpNegate:
re, err := eval.ConvertLikeToRegexp(ctx, string(patterns[0]), false, '\\')
if err != nil {
return nil, err
}
return &projNotRegexpBytesBytesConstOp{
case colexeccmp.LikeSkeleton:
return &projSkeletonBytesBytesConstOp{
projConstOpBase: base,
constArg: re,
constArg: patterns,
negate: negate,
caseInsensitive: caseInsensitive,
}, nil
case colexeccmp.LikeSuffix:
return &projSuffixBytesBytesConstOp{
projConstOpBase: base,
constArg: pat,
negate: negate,
caseInsensitive: caseInsensitive,
}, nil
default:
return nil, errors.AssertionFailedf("unsupported like op type %d", likeOpType)
Expand Down
22 changes: 22 additions & 0 deletions pkg/sql/colexec/colexecprojconst/proj_const_ops_tmpl.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,12 @@ type _OP_CONST_NAME struct {
// {{else}}
constArg _R_GO_TYPE
// {{end}}
// {{if .Negatable}}
negate bool
// {{end}}
// {{if .CaseInsensitive}}
caseInsensitive bool
// {{end}}
}

func (p _OP_CONST_NAME) Next() coldata.Batch {
Expand All @@ -104,6 +110,22 @@ func (p _OP_CONST_NAME) Next() coldata.Batch {
// */}}
_overloadHelper := p.BinaryOverloadHelper
// {{end}}
// {{if .Negatable}}
// {{/*
// In order to inline the templated code of the LIKE overloads, we need
// to have a `_negate` local variable indicating whether the assignment
// should be negated.
// */}}
_negate := p.negate
// {{ end }}
// {{if .CaseInsensitive}}
// {{/*
// In order to inline the templated code of the LIKE overloads, we need
// to have a `_caseInsensitive` local variable indicating whether the
// operator is case insensitive.
// */}}
_caseInsensitive := p.caseInsensitive
// {{end}}
batch := p.Input.Next()
n := batch.Length()
if n == 0 {
Expand Down
Loading