From b5c54c2a66999c16319bbf75b7f8a19ae98a586c Mon Sep 17 00:00:00 2001 From: Kousik Mitra Date: Sun, 30 Apr 2023 02:10:58 +0530 Subject: [PATCH 1/9] Enable support for not regex operation --- pkg/traceql/ast_validate.go | 3 +- pkg/traceql/test_examples.yaml | 385 ++++++++++++++++----------------- 2 files changed, 193 insertions(+), 195 deletions(-) diff --git a/pkg/traceql/ast_validate.go b/pkg/traceql/ast_validate.go index cf5d9d3cc47..30dff1c4880 100644 --- a/pkg/traceql/ast_validate.go +++ b/pkg/traceql/ast_validate.go @@ -178,8 +178,7 @@ func (o BinaryOperation) validate() error { } switch o.Op { - case OpNotRegex, - OpSpansetChild, + case OpSpansetChild, OpSpansetDescendant, OpSpansetSibling: return newUnsupportedError(fmt.Sprintf("binary operation (%v)", o.Op)) diff --git a/pkg/traceql/test_examples.yaml b/pkg/traceql/test_examples.yaml index 6385b8d3e93..03022f728d6 100644 --- a/pkg/traceql/test_examples.yaml +++ b/pkg/traceql/test_examples.yaml @@ -1,258 +1,257 @@ # valid queries parse successfully and return nil when calling .validate() valid: # spanset filters - - '{ true }' - - '{ !true }' - - '{ true && false }' - - '{ true || false }' - - '{ 1 = 2 }' - - '{ 1 != 2 }' - - '{ 1 > 2 }' - - '{ 1 >= 2 }' - - '{ 1 < 2 }' - - '{ 1 <= 2 }' - - '{ -1 = 2 }' + - "{ true }" + - "{ !true }" + - "{ true && false }" + - "{ true || false }" + - "{ 1 = 2 }" + - "{ 1 != 2 }" + - "{ 1 > 2 }" + - "{ 1 >= 2 }" + - "{ 1 < 2 }" + - "{ 1 <= 2 }" + - "{ -1 = 2 }" - '{ "test" =~ "test" }' + - '{ "test" !~ "test" }' - '{ "test" = "test" }' - '{ "test" != "test" }' - - '{ .a }' - - '{ !.a }' - - '{ .a && false }' - - '{ .a || true }' - - '{ .a = 2 }' - - '{ .a != 2 }' - - '{ .a > 2 }' - - '{ .a >= 2 }' - - '{ .a < 2 }' - - '{ .a <= 2 }' - - '{ -.a = 2 }' + - "{ .a }" + - "{ !.a }" + - "{ .a && false }" + - "{ .a || true }" + - "{ .a = 2 }" + - "{ .a != 2 }" + - "{ .a > 2 }" + - "{ .a >= 2 }" + - "{ .a < 2 }" + - "{ .a <= 2 }" + - "{ -.a = 2 }" - '{ .a =~ "test" }' + - '{ .a !~ "test" }' - '{ .a = "test" }' - '{ .a != "test" }' - - '{ resource.a != 3 }' - - '{ span.a != 3 }' + - "{ resource.a != 3 }" + - "{ span.a != 3 }" - '{ !("test" != .c || ((true && .b) || 3 < .a)) }' - - '{ status = ok }' - - '{ status = unset }' - - '{ status = error }' - - '{ status != error }' - - '{ kind = internal }' - - '{ kind = client }' - - '{ kind = consumer }' - - '{ duration > 1s }' - - '{ 1 < 1h }' - - '{ 1 <= 1.1 }' + - "{ status = ok }" + - "{ status = unset }" + - "{ status = error }" + - "{ status != error }" + - "{ kind = internal }" + - "{ kind = client }" + - "{ kind = consumer }" + - "{ duration > 1s }" + - "{ 1 < 1h }" + - "{ 1 <= 1.1 }" # binary operations - - '{ 1 + 1 = 2 }' - - '{ 1 - 1 = 2 }' - - '{ 1 * 1 = 2 }' - - '{ 1 / 1 = 2 }' - - '{ 1 ^ 1 = 2 }' - - '{ .a + 1 = 2 }' - - '{ .a - 1 = 2 }' - - '{ .a * 1 = 2 }' - - '{ .a / 1 = 2 }' - - '{ .a ^ 1 = 2 }' - - '{ duration > 1s * 2s }' - - '{ 1 * 1h = 1 }' # combining float, int and duration can make sense, but can also be weird. we just accept it all - - '{ 1 / 1.1 = 1 }' + - "{ 1 + 1 = 2 }" + - "{ 1 - 1 = 2 }" + - "{ 1 * 1 = 2 }" + - "{ 1 / 1 = 2 }" + - "{ 1 ^ 1 = 2 }" + - "{ .a + 1 = 2 }" + - "{ .a - 1 = 2 }" + - "{ .a * 1 = 2 }" + - "{ .a / 1 = 2 }" + - "{ .a ^ 1 = 2 }" + - "{ duration > 1s * 2s }" + - "{ 1 * 1h = 1 }" # combining float, int and duration can make sense, but can also be weird. we just accept it all + - "{ 1 / 1.1 = 1 }" - '{ .http.status >= "200" }' # spanset expressions - - '{ true } && { true }' - - '{ true } || { true }' + - "{ true } && { true }" + - "{ true } || { true }" # scalar filters - - 'avg(.field) > 1' - - 'max(duration) >= 1s' - - 'max(duration) > 1' # same note as above for int, float and duration - - '{ true } | max(duration) = 1h' - - '{ true } | min(duration) = 1h' - - '{ true } | sum(duration) = 1h' - - '{ true } | max(.a) = 1' - - '{ true } | max(span.a) = 1' - - '{ true } | max(resource.a) = 1' - - '{ true } | max(1 + .a) = 1' - - '{ true } | max((1 + .a) * 2) = 1' - - 'max(duration) > 3s | { status = error || .http.status = 500 }' + - "avg(.field) > 1" + - "max(duration) >= 1s" + - "max(duration) > 1" # same note as above for int, float and duration + - "{ true } | max(duration) = 1h" + - "{ true } | min(duration) = 1h" + - "{ true } | sum(duration) = 1h" + - "{ true } | max(.a) = 1" + - "{ true } | max(span.a) = 1" + - "{ true } | max(resource.a) = 1" + - "{ true } | max(1 + .a) = 1" + - "{ true } | max((1 + .a) * 2) = 1" + - "max(duration) > 3s | { status = error || .http.status = 500 }" # pipelines - - '{ true } | { .a }' - - '{ true } | count() = 1' - - '{ true } | avg(duration) = 1h' - - 'count() = 1 | { true }' - - '{ true } | count() = 1 | { true }' + - "{ true } | { .a }" + - "{ true } | count() = 1" + - "{ true } | avg(duration) = 1h" + - "count() = 1 | { true }" + - "{ true } | count() = 1 | { true }" # pipeline expressions - - '({ true } | count() > 1 | { false }) && ({ true } | count() > 1 | { false })' - - '({ true } | count() > 1 | { false }) || ({ true } | count() > 1 | { false })' - + - "({ true } | count() > 1 | { false }) && ({ true } | count() > 1 | { false })" + - "({ true } | count() > 1 | { false }) || ({ true } | count() > 1 | { false })" + # parse_fails throw an error when parsing parse_fails: - - 'true' - - '[ true ]' - - '( true )' + - "true" + - "[ true ]" + - "( true )" # spanset filters - - '{ . }' - - '{ < }' - - '{ .a < }' - - '{ .a < 3' - - '{ (.a < 3 }' - - '{ attribute = 4 }' # custom attribute not prefixed with ., span., resource. or parent. - - '{ .attribute == 4 }' # invalid operator - - '{ span. }' + - "{ . }" + - "{ < }" + - "{ .a < }" + - "{ .a < 3" + - "{ (.a < 3 }" + - "{ attribute = 4 }" # custom attribute not prefixed with ., span., resource. or parent. + - "{ .attribute == 4 }" # invalid operator + - "{ span. }" # spanset expressions - - '{ true } + { true }' - - '{ true } - { true }' - - '{ true } * { true }' - - '{ true } / { true }' - - '{ true } ^ { true }' - - '{ true } = { true }' # an interesting operator. possible future addition - - '{ true } <= { true }' - - '{ true } >= { true }' - - '{ true } < { true }' + - "{ true } + { true }" + - "{ true } - { true }" + - "{ true } * { true }" + - "{ true } / { true }" + - "{ true } ^ { true }" + - "{ true } = { true }" # an interesting operator. possible future addition + - "{ true } <= { true }" + - "{ true } >= { true }" + - "{ true } < { true }" # scalar expressions must evaluate to a number - 'max(name) = "foo"' - 'avg("foo") = "bar"' - - 'max(status) = ok' - - 'max(kind) = consumer' - - 'max(duration) < ok' + - "max(status) = ok" + - "max(kind) = consumer" + - "max(duration) < ok" - 'min(1) = "foo"' - - 'min(parent) = nil' + - "min(parent) = nil" - 'avg(childCount) > "foo"' # scalar filters - - 'avg(.field) + 1' # scalar filters must resolve to boolean - - 'sum(3) - 2' - - 'min(childCount) && 2' + - "avg(.field) + 1" # scalar filters must resolve to boolean + - "sum(3) - 2" + - "min(childCount) && 2" # pipelines - - 'coalesce() | { true }' # pipelines can't start with coalesce - - 'count() > 3 && { true }' # scalar filters have to be in pipeline - - '{ true } | count()' # naked scalar pipelines not allowed - - '{ true } | notAnAggregate() = 1' - - '{ true } | count = 1' - - '{ true } | max() = 1' - - '{ true } | by()' + - "coalesce() | { true }" # pipelines can't start with coalesce + - "count() > 3 && { true }" # scalar filters have to be in pipeline + - "{ true } | count()" # naked scalar pipelines not allowed + - "{ true } | notAnAggregate() = 1" + - "{ true } | count = 1" + - "{ true } | max() = 1" + - "{ true } | by()" # pipeline expressions - - '({ true }) + (count()) = 1' - - '({ true }) && (count())' - - '({ true } | count()) && ({ true } | count()) = 1' - - '({ true }) + ({ true }) = 1' - - '({ true } | count()) + ({ true } | count())' + - "({ true }) + (count()) = 1" + - "({ true }) && (count())" + - "({ true } | count()) && ({ true } | count()) = 1" + - "({ true }) + ({ true }) = 1" + - "({ true } | count()) + ({ true } | count())" # todo: improve the following - - '(by(namespace) | count()) > 2 * 2' # scalar expressions are currently not allowed in scalar pipelines - - '(by(namespace) | count()) * 2 > 2' - - '2 < (by(namespace) | count())' # static value needs to be on the RHS to remove conflicts with scalar expressions + - "(by(namespace) | count()) > 2 * 2" # scalar expressions are currently not allowed in scalar pipelines + - "(by(namespace) | count()) * 2 > 2" + - "2 < (by(namespace) | count())" # static value needs to be on the RHS to remove conflicts with scalar expressions # validate_fails parse correctly and return an error **besides unsupported** when calling .validate() validate_fails: # span expressions must evaluate to a boolean - - '{ status }' - - '{ kind }' - - '{ ok }' - - '{ 1.1 }' - - '{ 1h }' + - "{ status }" + - "{ kind }" + - "{ ok }" + - "{ 1.1 }" + - "{ 1h }" - '{ "foo" }' - - '{ 1 + 1 }' + - "{ 1 + 1 }" # binary operators - incorrect types - '{ 1 + "foo" = 1 }' - - '{ 1 - true = 1 }' - - '{ 1 / ok = 1 }' - - '{ 1 ^ name = 1 }' + - "{ 1 - true = 1 }" + - "{ 1 / ok = 1 }" + - "{ 1 ^ name = 1 }" - '{ 1 = "foo" }' - - '{ 1 != true }' - - '{ 1 > ok }' - - '{ 1 = name }' - - '{ 1 =~ 2}' + - "{ 1 != true }" + - "{ 1 > ok }" + - "{ 1 = name }" + - "{ 1 =~ 2}" - '{ 1 && "foo" }' - - '{ 1 || ok }' - - '{ true || 1.1 }' - - '{ status > ok }' - - '{ kind < consumer }' + - "{ 1 || ok }" + - "{ true || 1.1 }" + - "{ status > ok }" + - "{ kind < consumer }" # unary operators - incorrect types - - '{ -true }' + - "{ -true }" - '{ -"foo" = "bar" }' - - '{ -ok = status }' + - "{ -ok = status }" - '{ -name = "foo" }' - '{ !"foo" = "bar" }' - - '{ !ok = status }' - - '{ !consumer = kind }' + - "{ !ok = status }" + - "{ !consumer = kind }" - '{ !name = "foo" }' - - '{ !1 = 1 }' - - '{ !1h = 1 }' - - '{ !1.1 = 1.1 }' + - "{ !1 = 1 }" + - "{ !1h = 1 }" + - "{ !1.1 = 1.1 }" # scalar expressions must evaluate to a number - - 'min(1 = 3) = 1' + - "min(1 = 3) = 1" # scalar expressions must reference the span - - 'sum(3) = 2' - - 'sum(3) = min(14)' - - 'min(2h) < max(duration)' - - 'min(3) = max(duration)' - - 'min(1) = max(2) + 3' - - 'min(1.1 - 3) > 1' - - 'max(1h + 2h) > 1' + - "sum(3) = 2" + - "sum(3) = min(14)" + - "min(2h) < max(duration)" + - "min(3) = max(duration)" + - "min(1) = max(2) + 3" + - "min(1.1 - 3) > 1" + - "max(1h + 2h) > 1" # unsupported parse correctly and return an unsupported error when calling .validate() unsupported: # coalesce - will be valid when supported - - '{ true } | coalesce()' - - '{ true } | by(1 + .a) | coalesce()' + - "{ true } | coalesce()" + - "{ true } | by(1 + .a) | coalesce()" # by - will be valid when supported - - '{ true } | by(.a)' - - '{ true } | by(1 + .a)' - - 'by(.a) | { true }' - - '{ true } | by(name) | count() > 2' - - '{ true } | by(.field) | avg(.b) = 2' + - "{ true } | by(.a)" + - "{ true } | by(1 + .a)" + - "by(.a) | { true }" + - "{ true } | by(name) | count() > 2" + - "{ true } | by(.field) | avg(.b) = 2" # by - will *not* be valid when supported - group expressions must reference the span - - '{ true } | by(1)' + - "{ true } | by(1)" - '{ true } | by("foo")' # complex scalar filters - will be valid when supported - - 'min(.field) < max(duration)' - - 'sum(.field) = min(.field)' - - 'min(.field) + max(.field) > 1' - - 'min(.field) + max(childCount) > max(duration) - min(.field)' - - 'min(childCount) < 2 / 6' - - 'max(1 - (2 + .field)) < avg(3 * duration ^ 2)' + - "min(.field) < max(duration)" + - "sum(.field) = min(.field)" + - "min(.field) + max(.field) > 1" + - "min(.field) + max(childCount) > max(duration) - min(.field)" + - "min(childCount) < 2 / 6" + - "max(1 - (2 + .field)) < avg(3 * duration ^ 2)" # aggregates - will be valid when supported - - 'min(childCount) < 2' - - '{ true } | max(parent.a) = 1' - - '{ true } | by(3 * .field - 2) | max(duration) < 1s' - - '{ .http.status = 200 } | max(.field) - min(.field) > 3' + - "min(childCount) < 2" + - "{ true } | max(parent.a) = 1" + - "{ true } | by(3 * .field - 2) | max(duration) < 1s" + - "{ .http.status = 200 } | max(.field) - min(.field) > 3" # parent - will be valid when supported - - '{ parent.a != 3 }' - - '{ parent.resource.a && true }' - - '{ parent.span.a > 3 }' - - '{ parent.duration = 1h }' - - '{ parent = nil }' - - '{ (-(3 / 2) * .test - parent.blerg + .other)^3 = 2 }' + - "{ parent.a != 3 }" + - "{ parent.resource.a && true }" + - "{ parent.span.a > 3 }" + - "{ parent.duration = 1h }" + - "{ parent = nil }" + - "{ (-(3 / 2) * .test - parent.blerg + .other)^3 = 2 }" # parent - will not be valid when supported - - '{ parent }' - - '{ 1 % parent = 1 }' - - '{ 1 >= parent }' - - '{ -parent = nil }' - - '{ !parent = nil }' + - "{ parent }" + - "{ 1 % parent = 1 }" + - "{ 1 >= parent }" + - "{ -parent = nil }" + - "{ !parent = nil }" # nil - will be valid when supported - - '{ .foo = nil }' - # binary operations - will be valid when supported - - '{ "test" !~ "test" }' - - '{ .a !~ "test" }' + - "{ .foo = nil }" # childCount - will be valid when supported - - '{ 1 = childCount }' + - "{ 1 = childCount }" # childCount - will be invalid when supported - '{ "foo" = childCount }' # spanset operations - will be valid when supported - - '{ true } >> { true }' - - '{ true } > { true }' - - '{ true } ~ { true }' - - '({ true } | count() > 1 | { false }) >> ({ true } | count() > 1 | { false })' - - '({ true } | count() > 1 | { false }) > ({ true } | count() > 1 | { false })' - - '({ true } | count() > 1 | { false }) ~ ({ true } | count() > 1 | { false })' + - "{ true } >> { true }" + - "{ true } > { true }" + - "{ true } ~ { true }" + - "({ true } | count() > 1 | { false }) >> ({ true } | count() > 1 | { false })" + - "({ true } | count() > 1 | { false }) > ({ true } | count() > 1 | { false })" + - "({ true } | count() > 1 | { false }) ~ ({ true } | count() > 1 | { false })" # spanset pipelines + scalar filters - will be valid when supported - - '{ true } | count() + count() = 1' - - '({ true } | count()) + ({ true } | count()) = 1' - - '({ true } | count()) - ({ true } | count()) <= 1' - - '({ true } | count()) / ({ true } | count()) > ({ true } | count()) / ({ true } | count())' - - '({ true } | count()) * ({ true } | count()) < ({ true } | count()) / ({ true } | count())' - - '({ .http.status = 200 } | count()) + ({ name = `foo` } | avg(duration)) = 2' - - '({ .a } | count()) > ({ .b } | count())' + - "{ true } | count() + count() = 1" + - "({ true } | count()) + ({ true } | count()) = 1" + - "({ true } | count()) - ({ true } | count()) <= 1" + - "({ true } | count()) / ({ true } | count()) > ({ true } | count()) / ({ true } | count())" + - "({ true } | count()) * ({ true } | count()) < ({ true } | count()) / ({ true } | count())" + - "({ .http.status = 200 } | count()) + ({ name = `foo` } | avg(duration)) = 2" + - "({ .a } | count()) > ({ .b } | count())" # other scalar filters. no idea if these should be supported - - '3 = 2' # naked scalar filter, technically allowed - - 'avg(.field) > 1 - 3' # scalar expressions in scalar filters are currently not allowed. possible future addition + - "3 = 2" # naked scalar filter, technically allowed + - "avg(.field) > 1 - 3" # scalar expressions in scalar filters are currently not allowed. possible future addition # parsed and the ast is dumped to stdout. this is a debugging tool -dump: \ No newline at end of file +dump: From 54239cb45877261f5c1394d8d5c43128e26c075c Mon Sep 17 00:00:00 2001 From: Kousik Mitra Date: Sun, 30 Apr 2023 02:11:22 +0530 Subject: [PATCH 2/9] Add predicate to match not regex operations --- pkg/parquetquery/predicate_test.go | 188 ++++++++++++++++++++++------- pkg/parquetquery/predicates.go | 45 ++++--- 2 files changed, 175 insertions(+), 58 deletions(-) diff --git a/pkg/parquetquery/predicate_test.go b/pkg/parquetquery/predicate_test.go index c93e75e3765..bbafc578539 100644 --- a/pkg/parquetquery/predicate_test.go +++ b/pkg/parquetquery/predicate_test.go @@ -7,6 +7,7 @@ import ( "github.com/google/uuid" "github.com/segmentio/parquet-go" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -32,41 +33,153 @@ func (p *mockPredicate) KeepValue(parquet.Value) bool { p.valCalled func (p *mockPredicate) KeepPage(parquet.Page) bool { p.pageCalled = true; return p.ret } func (p *mockPredicate) KeepColumnChunk(parquet.ColumnChunk) bool { p.chunkCalled = true; return p.ret } +type predicateTestCase struct { + testName string + writeData func(w *parquet.Writer) //nolint:all + keptChunks int + keptPages int + keptValues int + predicate Predicate +} + func TestSubstringPredicate(t *testing.T) { + testCases := []predicateTestCase{ + { + testName: "all chunks/pages/values inspected", + predicate: NewSubstringPredicate("b"), + keptChunks: 1, + keptPages: 1, + keptValues: 2, + writeData: func(w *parquet.Writer) { //nolint:all + type String struct { + S string `parquet:",dict"` + } + require.NoError(t, w.Write(&String{"abc"})) // kept + require.NoError(t, w.Write(&String{"bcd"})) // kept + require.NoError(t, w.Write(&String{"cde"})) // skipped + }, + }, + { + testName: "dictionary in the page header allows for skipping a page", + predicate: NewSubstringPredicate("x"), // Not present in any values + keptChunks: 1, + keptPages: 0, + keptValues: 0, + writeData: func(w *parquet.Writer) { //nolint:all + type dictString struct { + S string `parquet:",dict"` + } + require.NoError(t, w.Write(&dictString{"abc"})) + require.NoError(t, w.Write(&dictString{"abc"})) + require.NoError(t, w.Write(&dictString{"abc"})) + require.NoError(t, w.Write(&dictString{"abc"})) + require.NoError(t, w.Write(&dictString{"abc"})) + }, + }, + } + + for _, tC := range testCases { + t.Run(tC.testName, func(t *testing.T) { + testPredicate(t, tC) + }) + } +} + +func TestNewRegexInPredicate(t *testing.T) { + testCases := []predicateTestCase{ + { + testName: "all chunks/pages/values inspected", + predicate: func() Predicate { + pred, err := NewRegexInPredicate([]string{"a.*"}) + assert.NoError(t, err) + return pred + }(), + keptChunks: 1, + keptPages: 1, + keptValues: 2, + writeData: func(w *parquet.Writer) { //nolint:all + type String struct { + S string `parquet:",dict"` + } + require.NoError(t, w.Write(&String{"abc"})) // kept + require.NoError(t, w.Write(&String{"acd"})) // kept + require.NoError(t, w.Write(&String{"cde"})) // skipped + }, + }, + { + testName: "dictionary in the page header allows for skipping a page", + predicate: func() Predicate { + pred, err := NewRegexInPredicate([]string{"x.*"}) + assert.NoError(t, err) + return pred + }(), // Not present in any values + keptChunks: 1, + keptPages: 0, + keptValues: 0, + writeData: func(w *parquet.Writer) { //nolint:all + type dictString struct { + S string `parquet:",dict"` + } + require.NoError(t, w.Write(&dictString{"abc"})) + require.NoError(t, w.Write(&dictString{"abc"})) + }, + }, + } - // Normal case - all chunks/pages/values inspected - testPredicate(t, predicateTestCase{ - predicate: NewSubstringPredicate("b"), - keptChunks: 1, - keptPages: 1, - keptValues: 2, - writeData: func(w *parquet.Writer) { //nolint:all - type String struct { - S string `parquet:",dict"` - } - require.NoError(t, w.Write(&String{"abc"})) // kept - require.NoError(t, w.Write(&String{"bcd"})) // kept - require.NoError(t, w.Write(&String{"cde"})) // skipped + for _, tC := range testCases { + t.Run(tC.testName, func(t *testing.T) { + testPredicate(t, tC) + }) + } +} + +func TestNewRegexNotInPredicate(t *testing.T) { + testCases := []predicateTestCase{ + { + testName: "all chunks/pages/values inspected", + predicate: func() Predicate { + pred, err := NewRegexNotInPredicate([]string{"a.*"}) + assert.NoError(t, err) + return pred + }(), + keptChunks: 1, + keptPages: 1, + keptValues: 2, + writeData: func(w *parquet.Writer) { //nolint:all + type String struct { + S string `parquet:",dict"` + } + require.NoError(t, w.Write(&String{"abc"})) // skipped + require.NoError(t, w.Write(&String{"acd"})) // skipped + require.NoError(t, w.Write(&String{"cde"})) // kept + require.NoError(t, w.Write(&String{"xde"})) // kept + }, }, - }) - - // Dictionary in the page header allows for skipping a page - testPredicate(t, predicateTestCase{ - predicate: NewSubstringPredicate("x"), // Not present in any values - keptChunks: 1, - keptPages: 0, - keptValues: 0, - writeData: func(w *parquet.Writer) { //nolint:all - type dictString struct { - S string `parquet:",dict"` - } - require.NoError(t, w.Write(&dictString{"abc"})) - require.NoError(t, w.Write(&dictString{"abc"})) - require.NoError(t, w.Write(&dictString{"abc"})) - require.NoError(t, w.Write(&dictString{"abc"})) - require.NoError(t, w.Write(&dictString{"abc"})) + { + testName: "dictionary in the page header allows for skipping a page", + predicate: func() Predicate { + pred, err := NewRegexNotInPredicate([]string{"x.*"}) + assert.NoError(t, err) + return pred + }(), // Not present in any values + keptChunks: 1, + keptPages: 0, + keptValues: 0, + writeData: func(w *parquet.Writer) { //nolint:all + type dictString struct { + S string `parquet:",dict"` + } + require.NoError(t, w.Write(&dictString{"xyz"})) + require.NoError(t, w.Write(&dictString{"xyz"})) + }, }, - }) + } + + for _, tC := range testCases { + t.Run(tC.testName, func(t *testing.T) { + testPredicate(t, tC) + }) + } } // TestOrPredicateCallsKeepColumnChunk ensures that the OrPredicate calls @@ -120,17 +233,10 @@ func TestOrPredicateCallsKeepColumnChunk(t *testing.T) { } } -type predicateTestCase struct { - writeData func(w *parquet.Writer) //nolint:all - keptChunks int - keptPages int - keptValues int - predicate Predicate -} - -// testPredicate by writing data and then iterating the column. The data model -// must contain a single column. +// testPredicate by writing data and then iterating the column. +// The data model must contain a single column. func testPredicate(t *testing.T, tc predicateTestCase) { + t.Helper() buf := new(bytes.Buffer) w := parquet.NewWriter(buf) tc.writeData(w) diff --git a/pkg/parquetquery/predicates.go b/pkg/parquetquery/predicates.go index 0acd8d403c3..f5b78edd556 100644 --- a/pkg/parquetquery/predicates.go +++ b/pkg/parquetquery/predicates.go @@ -82,21 +82,33 @@ func (p *StringInPredicate) KeepPage(page pq.Page) bool { return p.helper.keepPage(page, p.KeepValue) } -// RegexInPredicate checks for match against any of the given regexs. -// Memoized and resets on each row group. -type RegexInPredicate struct { - regs []*regexp.Regexp - matches map[string]bool +type regexPredicate struct { + regs []*regexp.Regexp + matches map[string]bool + shouldMatch bool helper DictionaryPredicateHelper } -var _ Predicate = (*RegexInPredicate)(nil) +var _ Predicate = (*regexPredicate)(nil) + +// NewRegexInPredicate checks for match against any of the given regexs. +// Memoized and resets on each row group. +func NewRegexInPredicate(regs []string) (Predicate, error) { + return newRegexPredicate(regs, true) +} + +// NewRegexNotInPredicate checks for values that not match against any of the given regexs. +// Memoized and resets on each row group. +func NewRegexNotInPredicate(regs []string) (Predicate, error) { + return newRegexPredicate(regs, false) +} -func NewRegexInPredicate(regs []string) (*RegexInPredicate, error) { - p := &RegexInPredicate{ - regs: make([]*regexp.Regexp, 0, len(regs)), - matches: make(map[string]bool), +func newRegexPredicate(regs []string, shouldMatch bool) (Predicate, error) { + p := ®exPredicate{ + regs: make([]*regexp.Regexp, 0, len(regs)), + matches: make(map[string]bool), + shouldMatch: shouldMatch, } for _, reg := range regs { r, err := regexp.Compile(reg) @@ -108,7 +120,7 @@ func NewRegexInPredicate(regs []string) (*RegexInPredicate, error) { return p, nil } -func (p *RegexInPredicate) String() string { +func (p *regexPredicate) String() string { var strings string for _, s := range p.regs { strings += fmt.Sprintf("%s, ", s.String()) @@ -116,9 +128,8 @@ func (p *RegexInPredicate) String() string { return fmt.Sprintf("RegexInPredicate{%s}", strings) } -func (p *RegexInPredicate) keep(v *pq.Value) bool { +func (p *regexPredicate) keep(v *pq.Value) bool { if v.IsNull() { - // Null return false } @@ -129,7 +140,7 @@ func (p *RegexInPredicate) keep(v *pq.Value) bool { matched := false for _, r := range p.regs { - if r.MatchString(s) { + if r.MatchString(s) == p.shouldMatch { matched = true break } @@ -139,7 +150,7 @@ func (p *RegexInPredicate) keep(v *pq.Value) bool { return matched } -func (p *RegexInPredicate) KeepColumnChunk(cc pq.ColumnChunk) bool { +func (p *regexPredicate) KeepColumnChunk(cc pq.ColumnChunk) bool { p.helper.setNewRowGroup() // Reset match cache on each row group change @@ -149,11 +160,11 @@ func (p *RegexInPredicate) KeepColumnChunk(cc pq.ColumnChunk) bool { return true } -func (p *RegexInPredicate) KeepValue(v pq.Value) bool { +func (p *regexPredicate) KeepValue(v pq.Value) bool { return p.keep(&v) } -func (p *RegexInPredicate) KeepPage(page pq.Page) bool { +func (p *regexPredicate) KeepPage(page pq.Page) bool { return p.helper.keepPage(page, p.KeepValue) } From 57c3b27e24471f952d5e96c55090c6d64fc426e5 Mon Sep 17 00:00:00 2001 From: Kousik Mitra Date: Sun, 30 Apr 2023 02:11:57 +0530 Subject: [PATCH 3/9] Support for searching pattern with not regex op --- tempodb/encoding/vparquet/block_traceql.go | 4 +++- tempodb/encoding/vparquet/block_traceql_test.go | 2 ++ tempodb/encoding/vparquet2/block_traceql.go | 4 +++- tempodb/encoding/vparquet2/block_traceql_test.go | 2 ++ 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tempodb/encoding/vparquet/block_traceql.go b/tempodb/encoding/vparquet/block_traceql.go index 255ab26393b..ce620dbc090 100644 --- a/tempodb/encoding/vparquet/block_traceql.go +++ b/tempodb/encoding/vparquet/block_traceql.go @@ -195,7 +195,7 @@ func checkConditions(conditions []traceql.Condition) error { case traceql.OpEqual, traceql.OpNotEqual, traceql.OpGreater, traceql.OpGreaterEqual, traceql.OpLess, traceql.OpLessEqual, - traceql.OpRegex: + traceql.OpRegex, traceql.OpNotRegex: if opCount != 1 { return fmt.Errorf("operation %v must have exactly 1 argument. condition: %+v", cond.Op, cond) } @@ -837,6 +837,8 @@ func createStringPredicate(op traceql.Operator, operands traceql.Operands) (parq case traceql.OpRegex: return parquetquery.NewRegexInPredicate([]string{s}) + case traceql.OpNotRegex: + return parquetquery.NewRegexNotInPredicate([]string{s}) case traceql.OpEqual: return parquetquery.NewStringInPredicate([]string{s}), nil case traceql.OpGreater: diff --git a/tempodb/encoding/vparquet/block_traceql_test.go b/tempodb/encoding/vparquet/block_traceql_test.go index 399d7e35900..ff6cf89b9f1 100644 --- a/tempodb/encoding/vparquet/block_traceql_test.go +++ b/tempodb/encoding/vparquet/block_traceql_test.go @@ -138,6 +138,7 @@ func TestBackendBlockSearchTraceQL(t *testing.T) { traceql.MustExtractFetchSpansRequest(`{.foo = "def"}`), // String == traceql.MustExtractFetchSpansRequest(`{.foo != "deg"}`), // String != traceql.MustExtractFetchSpansRequest(`{.foo =~ "d.*"}`), // String Regex + traceql.MustExtractFetchSpansRequest(`{.foo !~ "x.*"}`), // String Not Regex traceql.MustExtractFetchSpansRequest(`{resource.foo = "abc"}`), // Resource-level only traceql.MustExtractFetchSpansRequest(`{span.foo = "def"}`), // Span-level only traceql.MustExtractFetchSpansRequest(`{.foo}`), // Projection only @@ -222,6 +223,7 @@ func TestBackendBlockSearchTraceQL(t *testing.T) { // TODO - Should the below query return data or not? It does match the resource // makeReq(parse(t, `{.foo = "abc"}`)), // This should not return results because the span has overridden this attribute to "def". traceql.MustExtractFetchSpansRequest(`{.foo =~ "xyz.*"}`), // Regex IN + traceql.MustExtractFetchSpansRequest(`{.foo !~ ".*"}`), // Regex IN traceql.MustExtractFetchSpansRequest(`{span.bool = true}`), // Bool not match traceql.MustExtractFetchSpansRequest(`{` + LabelDuration + ` > 100s}`), // Intrinsic: duration traceql.MustExtractFetchSpansRequest(`{` + LabelStatus + ` = ok}`), // Intrinsic: status diff --git a/tempodb/encoding/vparquet2/block_traceql.go b/tempodb/encoding/vparquet2/block_traceql.go index 5b4d63fab1a..0172ef7750f 100644 --- a/tempodb/encoding/vparquet2/block_traceql.go +++ b/tempodb/encoding/vparquet2/block_traceql.go @@ -196,7 +196,7 @@ func checkConditions(conditions []traceql.Condition) error { case traceql.OpEqual, traceql.OpNotEqual, traceql.OpGreater, traceql.OpGreaterEqual, traceql.OpLess, traceql.OpLessEqual, - traceql.OpRegex: + traceql.OpRegex, traceql.OpNotRegex: if opCount != 1 { return fmt.Errorf("operation %v must have exactly 1 argument. condition: %+v", cond.Op, cond) } @@ -825,6 +825,8 @@ func createStringPredicate(op traceql.Operator, operands traceql.Operands) (parq case traceql.OpRegex: return parquetquery.NewRegexInPredicate([]string{s}) + case traceql.OpNotRegex: + return parquetquery.NewRegexNotInPredicate([]string{s}) case traceql.OpEqual: return parquetquery.NewStringInPredicate([]string{s}), nil diff --git a/tempodb/encoding/vparquet2/block_traceql_test.go b/tempodb/encoding/vparquet2/block_traceql_test.go index b4e9707928d..0fbb58b5d10 100644 --- a/tempodb/encoding/vparquet2/block_traceql_test.go +++ b/tempodb/encoding/vparquet2/block_traceql_test.go @@ -139,6 +139,7 @@ func TestBackendBlockSearchTraceQL(t *testing.T) { traceql.MustExtractFetchSpansRequest(`{.foo = "def"}`), // String == traceql.MustExtractFetchSpansRequest(`{.foo != "deg"}`), // String != traceql.MustExtractFetchSpansRequest(`{.foo =~ "d.*"}`), // String Regex + traceql.MustExtractFetchSpansRequest(`{.foo !~ "x.*"}`), // String Not Regex traceql.MustExtractFetchSpansRequest(`{resource.foo = "abc"}`), // Resource-level only traceql.MustExtractFetchSpansRequest(`{span.foo = "def"}`), // Span-level only traceql.MustExtractFetchSpansRequest(`{.foo}`), // Projection only @@ -223,6 +224,7 @@ func TestBackendBlockSearchTraceQL(t *testing.T) { // TODO - Should the below query return data or not? It does match the resource // makeReq(parse(t, `{.foo = "abc"}`)), // This should not return results because the span has overridden this attribute to "def". traceql.MustExtractFetchSpansRequest(`{.foo =~ "xyz.*"}`), // Regex IN + traceql.MustExtractFetchSpansRequest(`{.foo !~ ".*"}`), // String Not Regex traceql.MustExtractFetchSpansRequest(`{span.bool = true}`), // Bool not match traceql.MustExtractFetchSpansRequest(`{` + LabelDuration + ` > 100s}`), // Intrinsic: duration traceql.MustExtractFetchSpansRequest(`{` + LabelStatus + ` = ok}`), // Intrinsic: status From 8261728b2bbc53f635ebc9913537ac2d621c30ee Mon Sep 17 00:00:00 2001 From: Kousik Mitra Date: Sun, 30 Apr 2023 02:46:08 +0530 Subject: [PATCH 4/9] Add doc for negated regex --- docs/sources/tempo/traceql/_index.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/sources/tempo/traceql/_index.md b/docs/sources/tempo/traceql/_index.md index eb5592e54ba..782c2fb14f0 100644 --- a/docs/sources/tempo/traceql/_index.md +++ b/docs/sources/tempo/traceql/_index.md @@ -120,6 +120,7 @@ The implemented comparison operators are: - `<` (less than) - `<=` (less than or equal to) - `=~` (regular expression) +- `!~` (negated regular expression) TraceQL uses Golang regular expressions. Online regular expression testing sites like https://regex101.com/ are convenient to validate regular expressions used in TraceQL queries. From b012030b133d0f88e968bf3548ba98903b2c857e Mon Sep 17 00:00:00 2001 From: Kousik Mitra Date: Sun, 30 Apr 2023 02:51:33 +0530 Subject: [PATCH 5/9] Update changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b6725572300..a3704987c09 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ ## main / unreleased -* [ENHANCEMENT] Add `prefix` configuration option to `storage.trace.azure` and `storage.trace.gcs` [#2362](https://github.com/grafana/tempo/pull/2386) (@kousikmitra) +* [ENHANCEMENT] Add support to filter using negated regex operator `!~` [#2410](https://github.com/grafana/tempo/pull/2410) (@kousikmitra) +* [ENHANCEMENT] Add `prefix` configuration option to `storage.trace.azure` and `storage.trace.gcs` [#2386](https://github.com/grafana/tempo/pull/2386) (@kousikmitra) * [ENHANCEMENT] Add `prefix` configuration option to `storage.trace.s3` [#2362](https://github.com/grafana/tempo/pull/2362) (@kousikmitra) * [FEATURE] Add support for `q` query param in `/api/v2/search//values` to filter results based on a TraceQL query [#2253](https://github.com/grafana/tempo/pull/2253) (@mapno) * [ENHANCEMENT] Add `scope` parameter to `/api/search/tags` [#2282](https://github.com/grafana/tempo/pull/2282) (@joe-elliott) From 47587a1bfa4c2fdb89bf654aedcb76fd159a1555 Mon Sep 17 00:00:00 2001 From: Kousik Mitra Date: Mon, 1 May 2023 17:51:43 +0530 Subject: [PATCH 6/9] Replace asserts with require --- pkg/parquetquery/predicate_test.go | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pkg/parquetquery/predicate_test.go b/pkg/parquetquery/predicate_test.go index bbafc578539..b8f907b1f32 100644 --- a/pkg/parquetquery/predicate_test.go +++ b/pkg/parquetquery/predicate_test.go @@ -7,7 +7,6 @@ import ( "github.com/google/uuid" "github.com/segmentio/parquet-go" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -91,7 +90,8 @@ func TestNewRegexInPredicate(t *testing.T) { testName: "all chunks/pages/values inspected", predicate: func() Predicate { pred, err := NewRegexInPredicate([]string{"a.*"}) - assert.NoError(t, err) + require.NoError(t, err) + return pred }(), keptChunks: 1, @@ -110,7 +110,8 @@ func TestNewRegexInPredicate(t *testing.T) { testName: "dictionary in the page header allows for skipping a page", predicate: func() Predicate { pred, err := NewRegexInPredicate([]string{"x.*"}) - assert.NoError(t, err) + require.NoError(t, err) + return pred }(), // Not present in any values keptChunks: 1, @@ -139,7 +140,8 @@ func TestNewRegexNotInPredicate(t *testing.T) { testName: "all chunks/pages/values inspected", predicate: func() Predicate { pred, err := NewRegexNotInPredicate([]string{"a.*"}) - assert.NoError(t, err) + require.NoError(t, err) + return pred }(), keptChunks: 1, @@ -159,7 +161,8 @@ func TestNewRegexNotInPredicate(t *testing.T) { testName: "dictionary in the page header allows for skipping a page", predicate: func() Predicate { pred, err := NewRegexNotInPredicate([]string{"x.*"}) - assert.NoError(t, err) + require.NoError(t, err) + return pred }(), // Not present in any values keptChunks: 1, From f81560f67df6fdab6a943b4f9dcf1d4552f75e91 Mon Sep 17 00:00:00 2001 From: Kousik Mitra Date: Mon, 1 May 2023 18:04:08 +0530 Subject: [PATCH 7/9] move dictString def to pkg level as testDictString --- pkg/parquetquery/predicate_test.go | 65 ++++++++++++------------------ 1 file changed, 26 insertions(+), 39 deletions(-) diff --git a/pkg/parquetquery/predicate_test.go b/pkg/parquetquery/predicate_test.go index b8f907b1f32..b70c1e2aa6e 100644 --- a/pkg/parquetquery/predicate_test.go +++ b/pkg/parquetquery/predicate_test.go @@ -10,8 +10,6 @@ import ( "github.com/stretchr/testify/require" ) -var _ Predicate = (*mockPredicate)(nil) - type mockPredicate struct { ret bool valCalled bool @@ -19,6 +17,12 @@ type mockPredicate struct { chunkCalled bool } +type testDictString struct { + S string `parquet:",dict"` +} + +var _ Predicate = (*mockPredicate)(nil) + func newAlwaysTruePredicate() *mockPredicate { return &mockPredicate{ret: true} } @@ -50,12 +54,10 @@ func TestSubstringPredicate(t *testing.T) { keptPages: 1, keptValues: 2, writeData: func(w *parquet.Writer) { //nolint:all - type String struct { - S string `parquet:",dict"` - } - require.NoError(t, w.Write(&String{"abc"})) // kept - require.NoError(t, w.Write(&String{"bcd"})) // kept - require.NoError(t, w.Write(&String{"cde"})) // skipped + + require.NoError(t, w.Write(&testDictString{"abc"})) // kept + require.NoError(t, w.Write(&testDictString{"bcd"})) // kept + require.NoError(t, w.Write(&testDictString{"cde"})) // skipped }, }, { @@ -65,14 +67,11 @@ func TestSubstringPredicate(t *testing.T) { keptPages: 0, keptValues: 0, writeData: func(w *parquet.Writer) { //nolint:all - type dictString struct { - S string `parquet:",dict"` - } - require.NoError(t, w.Write(&dictString{"abc"})) - require.NoError(t, w.Write(&dictString{"abc"})) - require.NoError(t, w.Write(&dictString{"abc"})) - require.NoError(t, w.Write(&dictString{"abc"})) - require.NoError(t, w.Write(&dictString{"abc"})) + require.NoError(t, w.Write(&testDictString{"abc"})) + require.NoError(t, w.Write(&testDictString{"abc"})) + require.NoError(t, w.Write(&testDictString{"abc"})) + require.NoError(t, w.Write(&testDictString{"abc"})) + require.NoError(t, w.Write(&testDictString{"abc"})) }, }, } @@ -98,12 +97,9 @@ func TestNewRegexInPredicate(t *testing.T) { keptPages: 1, keptValues: 2, writeData: func(w *parquet.Writer) { //nolint:all - type String struct { - S string `parquet:",dict"` - } - require.NoError(t, w.Write(&String{"abc"})) // kept - require.NoError(t, w.Write(&String{"acd"})) // kept - require.NoError(t, w.Write(&String{"cde"})) // skipped + require.NoError(t, w.Write(&testDictString{"abc"})) // kept + require.NoError(t, w.Write(&testDictString{"acd"})) // kept + require.NoError(t, w.Write(&testDictString{"cde"})) // skipped }, }, { @@ -118,11 +114,8 @@ func TestNewRegexInPredicate(t *testing.T) { keptPages: 0, keptValues: 0, writeData: func(w *parquet.Writer) { //nolint:all - type dictString struct { - S string `parquet:",dict"` - } - require.NoError(t, w.Write(&dictString{"abc"})) - require.NoError(t, w.Write(&dictString{"abc"})) + require.NoError(t, w.Write(&testDictString{"abc"})) + require.NoError(t, w.Write(&testDictString{"abc"})) }, }, } @@ -148,13 +141,10 @@ func TestNewRegexNotInPredicate(t *testing.T) { keptPages: 1, keptValues: 2, writeData: func(w *parquet.Writer) { //nolint:all - type String struct { - S string `parquet:",dict"` - } - require.NoError(t, w.Write(&String{"abc"})) // skipped - require.NoError(t, w.Write(&String{"acd"})) // skipped - require.NoError(t, w.Write(&String{"cde"})) // kept - require.NoError(t, w.Write(&String{"xde"})) // kept + require.NoError(t, w.Write(&testDictString{"abc"})) // skipped + require.NoError(t, w.Write(&testDictString{"acd"})) // skipped + require.NoError(t, w.Write(&testDictString{"cde"})) // kept + require.NoError(t, w.Write(&testDictString{"xde"})) // kept }, }, { @@ -169,11 +159,8 @@ func TestNewRegexNotInPredicate(t *testing.T) { keptPages: 0, keptValues: 0, writeData: func(w *parquet.Writer) { //nolint:all - type dictString struct { - S string `parquet:",dict"` - } - require.NoError(t, w.Write(&dictString{"xyz"})) - require.NoError(t, w.Write(&dictString{"xyz"})) + require.NoError(t, w.Write(&testDictString{"xyz"})) + require.NoError(t, w.Write(&testDictString{"xyz"})) }, }, } From f2ea0d55536aed1bddbde0c8296dac5805984cff Mon Sep 17 00:00:00 2001 From: Kousik Mitra Date: Mon, 1 May 2023 18:38:44 +0530 Subject: [PATCH 8/9] Add benchmark for regex predicate --- pkg/parquetquery/predicate_test.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pkg/parquetquery/predicate_test.go b/pkg/parquetquery/predicate_test.go index b70c1e2aa6e..cf36d9da349 100644 --- a/pkg/parquetquery/predicate_test.go +++ b/pkg/parquetquery/predicate_test.go @@ -286,3 +286,21 @@ func BenchmarkStringInPredicate(b *testing.B) { } } } + +func BenchmarkRegexInPredicate(b *testing.B) { + p, err := NewRegexInPredicate([]string{"abc"}) + require.NoError(b, err) + + s := make([]parquet.Value, 1000) + for i := 0; i < 1000; i++ { + s[i] = parquet.ValueOf(uuid.New().String()) + } + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + for _, ss := range s { + p.KeepValue(ss) + } + } +} From 3b1c8232c474a8c0cdeb55c2b9f16b1d82728906 Mon Sep 17 00:00:00 2001 From: Kousik Mitra Date: Mon, 1 May 2023 19:26:17 +0530 Subject: [PATCH 9/9] Revert auto format changes in yaml file --- pkg/traceql/test_examples.yaml | 381 +++++++++++++++++---------------- 1 file changed, 191 insertions(+), 190 deletions(-) diff --git a/pkg/traceql/test_examples.yaml b/pkg/traceql/test_examples.yaml index 03022f728d6..cf428e70435 100644 --- a/pkg/traceql/test_examples.yaml +++ b/pkg/traceql/test_examples.yaml @@ -1,257 +1,258 @@ # valid queries parse successfully and return nil when calling .validate() valid: # spanset filters - - "{ true }" - - "{ !true }" - - "{ true && false }" - - "{ true || false }" - - "{ 1 = 2 }" - - "{ 1 != 2 }" - - "{ 1 > 2 }" - - "{ 1 >= 2 }" - - "{ 1 < 2 }" - - "{ 1 <= 2 }" - - "{ -1 = 2 }" + - '{ true }' + - '{ !true }' + - '{ true && false }' + - '{ true || false }' + - '{ 1 = 2 }' + - '{ 1 != 2 }' + - '{ 1 > 2 }' + - '{ 1 >= 2 }' + - '{ 1 < 2 }' + - '{ 1 <= 2 }' + - '{ -1 = 2 }' - '{ "test" =~ "test" }' - '{ "test" !~ "test" }' - '{ "test" = "test" }' - '{ "test" != "test" }' - - "{ .a }" - - "{ !.a }" - - "{ .a && false }" - - "{ .a || true }" - - "{ .a = 2 }" - - "{ .a != 2 }" - - "{ .a > 2 }" - - "{ .a >= 2 }" - - "{ .a < 2 }" - - "{ .a <= 2 }" - - "{ -.a = 2 }" + - '{ .a }' + - '{ !.a }' + - '{ .a && false }' + - '{ .a || true }' + - '{ .a = 2 }' + - '{ .a != 2 }' + - '{ .a > 2 }' + - '{ .a >= 2 }' + - '{ .a < 2 }' + - '{ .a <= 2 }' + - '{ -.a = 2 }' - '{ .a =~ "test" }' - '{ .a !~ "test" }' - '{ .a = "test" }' - '{ .a != "test" }' - - "{ resource.a != 3 }" - - "{ span.a != 3 }" + - '{ resource.a != 3 }' + - '{ span.a != 3 }' - '{ !("test" != .c || ((true && .b) || 3 < .a)) }' - - "{ status = ok }" - - "{ status = unset }" - - "{ status = error }" - - "{ status != error }" - - "{ kind = internal }" - - "{ kind = client }" - - "{ kind = consumer }" - - "{ duration > 1s }" - - "{ 1 < 1h }" - - "{ 1 <= 1.1 }" + - '{ status = ok }' + - '{ status = unset }' + - '{ status = error }' + - '{ status != error }' + - '{ kind = internal }' + - '{ kind = client }' + - '{ kind = consumer }' + - '{ duration > 1s }' + - '{ 1 < 1h }' + - '{ 1 <= 1.1 }' # binary operations - - "{ 1 + 1 = 2 }" - - "{ 1 - 1 = 2 }" - - "{ 1 * 1 = 2 }" - - "{ 1 / 1 = 2 }" - - "{ 1 ^ 1 = 2 }" - - "{ .a + 1 = 2 }" - - "{ .a - 1 = 2 }" - - "{ .a * 1 = 2 }" - - "{ .a / 1 = 2 }" - - "{ .a ^ 1 = 2 }" - - "{ duration > 1s * 2s }" - - "{ 1 * 1h = 1 }" # combining float, int and duration can make sense, but can also be weird. we just accept it all - - "{ 1 / 1.1 = 1 }" + - '{ 1 + 1 = 2 }' + - '{ 1 - 1 = 2 }' + - '{ 1 * 1 = 2 }' + - '{ 1 / 1 = 2 }' + - '{ 1 ^ 1 = 2 }' + - '{ .a + 1 = 2 }' + - '{ .a - 1 = 2 }' + - '{ .a * 1 = 2 }' + - '{ .a / 1 = 2 }' + - '{ .a ^ 1 = 2 }' + - '{ duration > 1s * 2s }' + - '{ 1 * 1h = 1 }' # combining float, int and duration can make sense, but can also be weird. we just accept it all + - '{ 1 / 1.1 = 1 }' - '{ .http.status >= "200" }' # spanset expressions - - "{ true } && { true }" - - "{ true } || { true }" + - '{ true } && { true }' + - '{ true } || { true }' # scalar filters - - "avg(.field) > 1" - - "max(duration) >= 1s" - - "max(duration) > 1" # same note as above for int, float and duration - - "{ true } | max(duration) = 1h" - - "{ true } | min(duration) = 1h" - - "{ true } | sum(duration) = 1h" - - "{ true } | max(.a) = 1" - - "{ true } | max(span.a) = 1" - - "{ true } | max(resource.a) = 1" - - "{ true } | max(1 + .a) = 1" - - "{ true } | max((1 + .a) * 2) = 1" - - "max(duration) > 3s | { status = error || .http.status = 500 }" + - 'avg(.field) > 1' + - 'max(duration) >= 1s' + - 'max(duration) > 1' # same note as above for int, float and duration + - '{ true } | max(duration) = 1h' + - '{ true } | min(duration) = 1h' + - '{ true } | sum(duration) = 1h' + - '{ true } | max(.a) = 1' + - '{ true } | max(span.a) = 1' + - '{ true } | max(resource.a) = 1' + - '{ true } | max(1 + .a) = 1' + - '{ true } | max((1 + .a) * 2) = 1' + - 'max(duration) > 3s | { status = error || .http.status = 500 }' # pipelines - - "{ true } | { .a }" - - "{ true } | count() = 1" - - "{ true } | avg(duration) = 1h" - - "count() = 1 | { true }" - - "{ true } | count() = 1 | { true }" + - '{ true } | { .a }' + - '{ true } | count() = 1' + - '{ true } | avg(duration) = 1h' + - 'count() = 1 | { true }' + - '{ true } | count() = 1 | { true }' # pipeline expressions - - "({ true } | count() > 1 | { false }) && ({ true } | count() > 1 | { false })" - - "({ true } | count() > 1 | { false }) || ({ true } | count() > 1 | { false })" - + - '({ true } | count() > 1 | { false }) && ({ true } | count() > 1 | { false })' + - '({ true } | count() > 1 | { false }) || ({ true } | count() > 1 | { false })' + # parse_fails throw an error when parsing parse_fails: - - "true" - - "[ true ]" - - "( true )" + - 'true' + - '[ true ]' + - '( true )' # spanset filters - - "{ . }" - - "{ < }" - - "{ .a < }" - - "{ .a < 3" - - "{ (.a < 3 }" - - "{ attribute = 4 }" # custom attribute not prefixed with ., span., resource. or parent. - - "{ .attribute == 4 }" # invalid operator - - "{ span. }" + - '{ . }' + - '{ < }' + - '{ .a < }' + - '{ .a < 3' + - '{ (.a < 3 }' + - '{ attribute = 4 }' # custom attribute not prefixed with ., span., resource. or parent. + - '{ .attribute == 4 }' # invalid operator + - '{ span. }' # spanset expressions - - "{ true } + { true }" - - "{ true } - { true }" - - "{ true } * { true }" - - "{ true } / { true }" - - "{ true } ^ { true }" - - "{ true } = { true }" # an interesting operator. possible future addition - - "{ true } <= { true }" - - "{ true } >= { true }" - - "{ true } < { true }" + - '{ true } + { true }' + - '{ true } - { true }' + - '{ true } * { true }' + - '{ true } / { true }' + - '{ true } ^ { true }' + - '{ true } = { true }' # an interesting operator. possible future addition + - '{ true } <= { true }' + - '{ true } >= { true }' + - '{ true } < { true }' # scalar expressions must evaluate to a number - 'max(name) = "foo"' - 'avg("foo") = "bar"' - - "max(status) = ok" - - "max(kind) = consumer" - - "max(duration) < ok" + - 'max(status) = ok' + - 'max(kind) = consumer' + - 'max(duration) < ok' - 'min(1) = "foo"' - - "min(parent) = nil" + - 'min(parent) = nil' - 'avg(childCount) > "foo"' # scalar filters - - "avg(.field) + 1" # scalar filters must resolve to boolean - - "sum(3) - 2" - - "min(childCount) && 2" + - 'avg(.field) + 1' # scalar filters must resolve to boolean + - 'sum(3) - 2' + - 'min(childCount) && 2' # pipelines - - "coalesce() | { true }" # pipelines can't start with coalesce - - "count() > 3 && { true }" # scalar filters have to be in pipeline - - "{ true } | count()" # naked scalar pipelines not allowed - - "{ true } | notAnAggregate() = 1" - - "{ true } | count = 1" - - "{ true } | max() = 1" - - "{ true } | by()" + - 'coalesce() | { true }' # pipelines can't start with coalesce + - 'count() > 3 && { true }' # scalar filters have to be in pipeline + - '{ true } | count()' # naked scalar pipelines not allowed + - '{ true } | notAnAggregate() = 1' + - '{ true } | count = 1' + - '{ true } | max() = 1' + - '{ true } | by()' # pipeline expressions - - "({ true }) + (count()) = 1" - - "({ true }) && (count())" - - "({ true } | count()) && ({ true } | count()) = 1" - - "({ true }) + ({ true }) = 1" - - "({ true } | count()) + ({ true } | count())" + - '({ true }) + (count()) = 1' + - '({ true }) && (count())' + - '({ true } | count()) && ({ true } | count()) = 1' + - '({ true }) + ({ true }) = 1' + - '({ true } | count()) + ({ true } | count())' # todo: improve the following - - "(by(namespace) | count()) > 2 * 2" # scalar expressions are currently not allowed in scalar pipelines - - "(by(namespace) | count()) * 2 > 2" - - "2 < (by(namespace) | count())" # static value needs to be on the RHS to remove conflicts with scalar expressions + - '(by(namespace) | count()) > 2 * 2' # scalar expressions are currently not allowed in scalar pipelines + - '(by(namespace) | count()) * 2 > 2' + - '2 < (by(namespace) | count())' # static value needs to be on the RHS to remove conflicts with scalar expressions # validate_fails parse correctly and return an error **besides unsupported** when calling .validate() validate_fails: # span expressions must evaluate to a boolean - - "{ status }" - - "{ kind }" - - "{ ok }" - - "{ 1.1 }" - - "{ 1h }" + - '{ status }' + - '{ kind }' + - '{ ok }' + - '{ 1.1 }' + - '{ 1h }' - '{ "foo" }' - - "{ 1 + 1 }" + - '{ 1 + 1 }' # binary operators - incorrect types - '{ 1 + "foo" = 1 }' - - "{ 1 - true = 1 }" - - "{ 1 / ok = 1 }" - - "{ 1 ^ name = 1 }" + - '{ 1 - true = 1 }' + - '{ 1 / ok = 1 }' + - '{ 1 ^ name = 1 }' - '{ 1 = "foo" }' - - "{ 1 != true }" - - "{ 1 > ok }" - - "{ 1 = name }" - - "{ 1 =~ 2}" + - '{ 1 != true }' + - '{ 1 > ok }' + - '{ 1 = name }' + - '{ 1 =~ 2}' + - '{ 1 !~ 2}' - '{ 1 && "foo" }' - - "{ 1 || ok }" - - "{ true || 1.1 }" - - "{ status > ok }" - - "{ kind < consumer }" + - '{ 1 || ok }' + - '{ true || 1.1 }' + - '{ status > ok }' + - '{ kind < consumer }' # unary operators - incorrect types - - "{ -true }" + - '{ -true }' - '{ -"foo" = "bar" }' - - "{ -ok = status }" + - '{ -ok = status }' - '{ -name = "foo" }' - '{ !"foo" = "bar" }' - - "{ !ok = status }" - - "{ !consumer = kind }" + - '{ !ok = status }' + - '{ !consumer = kind }' - '{ !name = "foo" }' - - "{ !1 = 1 }" - - "{ !1h = 1 }" - - "{ !1.1 = 1.1 }" + - '{ !1 = 1 }' + - '{ !1h = 1 }' + - '{ !1.1 = 1.1 }' # scalar expressions must evaluate to a number - - "min(1 = 3) = 1" + - 'min(1 = 3) = 1' # scalar expressions must reference the span - - "sum(3) = 2" - - "sum(3) = min(14)" - - "min(2h) < max(duration)" - - "min(3) = max(duration)" - - "min(1) = max(2) + 3" - - "min(1.1 - 3) > 1" - - "max(1h + 2h) > 1" + - 'sum(3) = 2' + - 'sum(3) = min(14)' + - 'min(2h) < max(duration)' + - 'min(3) = max(duration)' + - 'min(1) = max(2) + 3' + - 'min(1.1 - 3) > 1' + - 'max(1h + 2h) > 1' # unsupported parse correctly and return an unsupported error when calling .validate() unsupported: # coalesce - will be valid when supported - - "{ true } | coalesce()" - - "{ true } | by(1 + .a) | coalesce()" + - '{ true } | coalesce()' + - '{ true } | by(1 + .a) | coalesce()' # by - will be valid when supported - - "{ true } | by(.a)" - - "{ true } | by(1 + .a)" - - "by(.a) | { true }" - - "{ true } | by(name) | count() > 2" - - "{ true } | by(.field) | avg(.b) = 2" + - '{ true } | by(.a)' + - '{ true } | by(1 + .a)' + - 'by(.a) | { true }' + - '{ true } | by(name) | count() > 2' + - '{ true } | by(.field) | avg(.b) = 2' # by - will *not* be valid when supported - group expressions must reference the span - - "{ true } | by(1)" + - '{ true } | by(1)' - '{ true } | by("foo")' # complex scalar filters - will be valid when supported - - "min(.field) < max(duration)" - - "sum(.field) = min(.field)" - - "min(.field) + max(.field) > 1" - - "min(.field) + max(childCount) > max(duration) - min(.field)" - - "min(childCount) < 2 / 6" - - "max(1 - (2 + .field)) < avg(3 * duration ^ 2)" + - 'min(.field) < max(duration)' + - 'sum(.field) = min(.field)' + - 'min(.field) + max(.field) > 1' + - 'min(.field) + max(childCount) > max(duration) - min(.field)' + - 'min(childCount) < 2 / 6' + - 'max(1 - (2 + .field)) < avg(3 * duration ^ 2)' # aggregates - will be valid when supported - - "min(childCount) < 2" - - "{ true } | max(parent.a) = 1" - - "{ true } | by(3 * .field - 2) | max(duration) < 1s" - - "{ .http.status = 200 } | max(.field) - min(.field) > 3" + - 'min(childCount) < 2' + - '{ true } | max(parent.a) = 1' + - '{ true } | by(3 * .field - 2) | max(duration) < 1s' + - '{ .http.status = 200 } | max(.field) - min(.field) > 3' # parent - will be valid when supported - - "{ parent.a != 3 }" - - "{ parent.resource.a && true }" - - "{ parent.span.a > 3 }" - - "{ parent.duration = 1h }" - - "{ parent = nil }" - - "{ (-(3 / 2) * .test - parent.blerg + .other)^3 = 2 }" + - '{ parent.a != 3 }' + - '{ parent.resource.a && true }' + - '{ parent.span.a > 3 }' + - '{ parent.duration = 1h }' + - '{ parent = nil }' + - '{ (-(3 / 2) * .test - parent.blerg + .other)^3 = 2 }' # parent - will not be valid when supported - - "{ parent }" - - "{ 1 % parent = 1 }" - - "{ 1 >= parent }" - - "{ -parent = nil }" - - "{ !parent = nil }" + - '{ parent }' + - '{ 1 % parent = 1 }' + - '{ 1 >= parent }' + - '{ -parent = nil }' + - '{ !parent = nil }' # nil - will be valid when supported - - "{ .foo = nil }" + - '{ .foo = nil }' # childCount - will be valid when supported - - "{ 1 = childCount }" + - '{ 1 = childCount }' # childCount - will be invalid when supported - '{ "foo" = childCount }' # spanset operations - will be valid when supported - - "{ true } >> { true }" - - "{ true } > { true }" - - "{ true } ~ { true }" - - "({ true } | count() > 1 | { false }) >> ({ true } | count() > 1 | { false })" - - "({ true } | count() > 1 | { false }) > ({ true } | count() > 1 | { false })" - - "({ true } | count() > 1 | { false }) ~ ({ true } | count() > 1 | { false })" + - '{ true } >> { true }' + - '{ true } > { true }' + - '{ true } ~ { true }' + - '({ true } | count() > 1 | { false }) >> ({ true } | count() > 1 | { false })' + - '({ true } | count() > 1 | { false }) > ({ true } | count() > 1 | { false })' + - '({ true } | count() > 1 | { false }) ~ ({ true } | count() > 1 | { false })' # spanset pipelines + scalar filters - will be valid when supported - - "{ true } | count() + count() = 1" - - "({ true } | count()) + ({ true } | count()) = 1" - - "({ true } | count()) - ({ true } | count()) <= 1" - - "({ true } | count()) / ({ true } | count()) > ({ true } | count()) / ({ true } | count())" - - "({ true } | count()) * ({ true } | count()) < ({ true } | count()) / ({ true } | count())" - - "({ .http.status = 200 } | count()) + ({ name = `foo` } | avg(duration)) = 2" - - "({ .a } | count()) > ({ .b } | count())" + - '{ true } | count() + count() = 1' + - '({ true } | count()) + ({ true } | count()) = 1' + - '({ true } | count()) - ({ true } | count()) <= 1' + - '({ true } | count()) / ({ true } | count()) > ({ true } | count()) / ({ true } | count())' + - '({ true } | count()) * ({ true } | count()) < ({ true } | count()) / ({ true } | count())' + - '({ .http.status = 200 } | count()) + ({ name = `foo` } | avg(duration)) = 2' + - '({ .a } | count()) > ({ .b } | count())' # other scalar filters. no idea if these should be supported - - "3 = 2" # naked scalar filter, technically allowed - - "avg(.field) > 1 - 3" # scalar expressions in scalar filters are currently not allowed. possible future addition + - '3 = 2' # naked scalar filter, technically allowed + - 'avg(.field) > 1 - 3' # scalar expressions in scalar filters are currently not allowed. possible future addition # parsed and the ast is dumped to stdout. this is a debugging tool -dump: +dump: \ No newline at end of file