From f8449c8c60430e6668e5bbda45c372d7b6ab69ef Mon Sep 17 00:00:00 2001 From: Andrew Wilkins Date: Wed, 26 Jun 2024 18:32:41 +0800 Subject: [PATCH] [exporter/elasticsearch] remove dedup config Remove the dedup configuration setting, and always deduplicate. Elasticsearch does not permit duplicate keys in JSON objects, and this configuration is adding more complexity to the code than it's worth. --- .chloggen/elasticsearch-always-dedup.yaml | 29 ++++ cmd/otelcontribcol/go.mod | 2 +- cmd/otelcontribcol/go.sum | 4 +- exporter/elasticsearchexporter/README.md | 4 - exporter/elasticsearchexporter/config.go | 10 -- exporter/elasticsearchexporter/config_test.go | 3 - exporter/elasticsearchexporter/exporter.go | 1 - exporter/elasticsearchexporter/factory.go | 1 - .../elasticsearchexporter/factory_test.go | 31 ---- exporter/elasticsearchexporter/go.mod | 3 + exporter/elasticsearchexporter/go.sum | 6 + .../integrationtest/go.sum | 6 + .../internal/objmodel/objmodel.go | 16 +- .../internal/objmodel/objmodel_test.go | 36 ----- exporter/elasticsearchexporter/model.go | 20 +-- exporter/elasticsearchexporter/model_test.go | 137 ++++++++++-------- 16 files changed, 135 insertions(+), 174 deletions(-) create mode 100644 .chloggen/elasticsearch-always-dedup.yaml diff --git a/.chloggen/elasticsearch-always-dedup.yaml b/.chloggen/elasticsearch-always-dedup.yaml new file mode 100644 index 000000000000..bee8a0bf6107 --- /dev/null +++ b/.chloggen/elasticsearch-always-dedup.yaml @@ -0,0 +1,29 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: breaking + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: exporter/elasticsearch + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Remove "dedup" configuration, always de-duplicate. + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [33773] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + Elasticsearch does not permit duplicate keys in JSON objects, + so there is no value in being able to disable deduplication. + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/cmd/otelcontribcol/go.mod b/cmd/otelcontribcol/go.mod index cd3a3c24359f..c4c2c9c65a68 100644 --- a/cmd/otelcontribcol/go.mod +++ b/cmd/otelcontribcol/go.mod @@ -723,7 +723,7 @@ require ( github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common v1.0.949 // indirect github.com/tg123/go-htpasswd v1.2.2 // indirect - github.com/tidwall/gjson v1.14.2 // indirect + github.com/tidwall/gjson v1.17.1 // indirect github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.0 // indirect github.com/tidwall/tinylru v1.1.0 // indirect diff --git a/cmd/otelcontribcol/go.sum b/cmd/otelcontribcol/go.sum index da4f22eb0c82..87bd2db94d60 100644 --- a/cmd/otelcontribcol/go.sum +++ b/cmd/otelcontribcol/go.sum @@ -2220,8 +2220,8 @@ github.com/testcontainers/testcontainers-go v0.31.0/go.mod h1:D2lAoA0zUFiSY+eAfl github.com/tg123/go-htpasswd v1.2.2 h1:tmNccDsQ+wYsoRfiONzIhDm5OkVHQzN3w4FOBAlN6BY= github.com/tg123/go-htpasswd v1.2.2/go.mod h1:FcIrK0J+6zptgVwK1JDlqyajW/1B4PtuJ/FLWl7nx8A= github.com/tidwall/gjson v1.10.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= -github.com/tidwall/gjson v1.14.2 h1:6BBkirS0rAHjumnjHF6qgy5d2YAJ1TLIaFE2lzfOLqo= -github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/gjson v1.17.1 h1:wlYEnwqAHgzmhNUFfw7Xalt2JzQvsMx2Se4PcoFCT/U= +github.com/tidwall/gjson v1.17.1/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= diff --git a/exporter/elasticsearchexporter/README.md b/exporter/elasticsearchexporter/README.md index 6c2db78b52da..970362bd4f44 100644 --- a/exporter/elasticsearchexporter/README.md +++ b/exporter/elasticsearchexporter/README.md @@ -131,10 +131,6 @@ behaviours, which may be configured through the following settings: field names for span events. - `fields` (optional): Configure additional fields mappings. - `file` (optional): Read additional field mappings from the provided YAML file. - - `dedup` (default=true; DEPRECATED, in future deduplication will always be enabled): - Try to find and remove duplicate fields/attributes from events before publishing - to Elasticsearch. Some structured logging libraries can produce duplicate fields - (for example zap). Elasticsearch will reject documents that have duplicate fields. - `dedot` (default=true; DEPRECATED, in future dedotting will always be enabled for ECS mode, and never for other modes): When enabled attributes with `.` will be split into proper json objects. diff --git a/exporter/elasticsearchexporter/config.go b/exporter/elasticsearchexporter/config.go index 1bcabb99a325..8fff91d7a5fe 100644 --- a/exporter/elasticsearchexporter/config.go +++ b/exporter/elasticsearchexporter/config.go @@ -157,13 +157,6 @@ type MappingsSettings struct { // File to read additional fields mappings from. File string `mapstructure:"file"` - // Try to find and remove duplicate fields - // - // Deprecated: [v0.104.0] deduplication will always be applied in future, - // with no option to disable. Disabling deduplication is not meaningful, - // as Elasticsearch will reject documents with duplicate JSON object keys. - Dedup bool `mapstructure:"dedup"` - // Deprecated: [v0.104.0] dedotting will always be applied for ECS mode // in future, and never for other modes. Elasticsearch's "dot_expander" // Ingest processor may be used as an alternative for non-ECS modes. @@ -319,9 +312,6 @@ func (cfg *Config) MappingMode() MappingMode { } func logConfigDeprecationWarnings(cfg *Config, logger *zap.Logger) { - if !cfg.Mapping.Dedup { - logger.Warn("dedup has been deprecated, and will always be enabled in future") - } if cfg.Mapping.Dedot && cfg.MappingMode() != MappingECS || !cfg.Mapping.Dedot && cfg.MappingMode() == MappingECS { logger.Warn("dedot has been deprecated: in the future, dedotting will always be performed in ECS mode only") } diff --git a/exporter/elasticsearchexporter/config_test.go b/exporter/elasticsearchexporter/config_test.go index c409f175497e..b6268b154545 100644 --- a/exporter/elasticsearchexporter/config_test.go +++ b/exporter/elasticsearchexporter/config_test.go @@ -100,7 +100,6 @@ func TestConfig(t *testing.T) { }, Mapping: MappingsSettings{ Mode: "none", - Dedup: true, Dedot: true, }, LogstashFormat: LogstashFormatSettings{ @@ -162,7 +161,6 @@ func TestConfig(t *testing.T) { }, Mapping: MappingsSettings{ Mode: "none", - Dedup: true, Dedot: true, }, LogstashFormat: LogstashFormatSettings{ @@ -224,7 +222,6 @@ func TestConfig(t *testing.T) { }, Mapping: MappingsSettings{ Mode: "none", - Dedup: true, Dedot: true, }, LogstashFormat: LogstashFormatSettings{ diff --git a/exporter/elasticsearchexporter/exporter.go b/exporter/elasticsearchexporter/exporter.go index 6cb64da0983d..d092fb6545ed 100644 --- a/exporter/elasticsearchexporter/exporter.go +++ b/exporter/elasticsearchexporter/exporter.go @@ -44,7 +44,6 @@ func newExporter( } model := &encodeModel{ - dedup: cfg.Mapping.Dedup, dedot: cfg.Mapping.Dedot, mode: cfg.MappingMode(), } diff --git a/exporter/elasticsearchexporter/factory.go b/exporter/elasticsearchexporter/factory.go index 7826fb59a47e..d0fc449352f3 100644 --- a/exporter/elasticsearchexporter/factory.go +++ b/exporter/elasticsearchexporter/factory.go @@ -76,7 +76,6 @@ func createDefaultConfig() component.Config { }, Mapping: MappingsSettings{ Mode: "none", - Dedup: true, Dedot: true, }, LogstashFormat: LogstashFormatSettings{ diff --git a/exporter/elasticsearchexporter/factory_test.go b/exporter/elasticsearchexporter/factory_test.go index 9425ec2723d7..5d407de25c9b 100644 --- a/exporter/elasticsearchexporter/factory_test.go +++ b/exporter/elasticsearchexporter/factory_test.go @@ -97,37 +97,6 @@ func TestFactory_CreateLogsAndTracesExporterWithDeprecatedIndexOption(t *testing require.NoError(t, tracesExporter.Shutdown(context.Background())) } -func TestFactory_DedupDeprecated(t *testing.T) { - factory := NewFactory() - cfg := withDefaultConfig(func(cfg *Config) { - cfg.Endpoint = "http://testing.invalid:9200" - cfg.Mapping.Dedup = false - cfg.Mapping.Dedot = false // avoid dedot warnings - }) - - loggerCore, logObserver := observer.New(zap.WarnLevel) - set := exportertest.NewNopSettings() - set.Logger = zap.New(loggerCore) - - logsExporter, err := factory.CreateLogsExporter(context.Background(), set, cfg) - require.NoError(t, err) - require.NoError(t, logsExporter.Shutdown(context.Background())) - - tracesExporter, err := factory.CreateTracesExporter(context.Background(), set, cfg) - require.NoError(t, err) - require.NoError(t, tracesExporter.Shutdown(context.Background())) - - metricsExporter, err := factory.CreateMetricsExporter(context.Background(), set, cfg) - require.NoError(t, err) - require.NoError(t, metricsExporter.Shutdown(context.Background())) - - records := logObserver.AllUntimed() - assert.Len(t, records, 3) - assert.Equal(t, "dedup has been deprecated, and will always be enabled in future", records[0].Message) - assert.Equal(t, "dedup has been deprecated, and will always be enabled in future", records[1].Message) - assert.Equal(t, "dedup has been deprecated, and will always be enabled in future", records[2].Message) -} - func TestFactory_DedotDeprecated(t *testing.T) { loggerCore, logObserver := observer.New(zap.WarnLevel) set := exportertest.NewNopSettings() diff --git a/exporter/elasticsearchexporter/go.mod b/exporter/elasticsearchexporter/go.mod index 9395eeb59866..370bdd3cf2a9 100644 --- a/exporter/elasticsearchexporter/go.mod +++ b/exporter/elasticsearchexporter/go.mod @@ -11,6 +11,7 @@ require ( github.com/open-telemetry/opentelemetry-collector-contrib/internal/common v0.104.0 github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal v0.104.0 github.com/stretchr/testify v1.9.0 + github.com/tidwall/gjson v1.17.1 go.opentelemetry.io/collector/component v0.104.0 go.opentelemetry.io/collector/config/configauth v0.104.0 go.opentelemetry.io/collector/config/configcompression v1.11.0 @@ -63,6 +64,8 @@ require ( github.com/prometheus/common v0.54.0 // indirect github.com/prometheus/procfs v0.15.0 // indirect github.com/rs/cors v1.10.1 // indirect + github.com/tidwall/match v1.1.1 // indirect + github.com/tidwall/pretty v1.2.0 // indirect go.elastic.co/apm/module/apmzap/v2 v2.6.0 // indirect go.elastic.co/apm/v2 v2.6.0 // indirect go.elastic.co/fastjson v1.3.0 // indirect diff --git a/exporter/elasticsearchexporter/go.sum b/exporter/elasticsearchexporter/go.sum index 49f99f1f8356..4ca1a81c8123 100644 --- a/exporter/elasticsearchexporter/go.sum +++ b/exporter/elasticsearchexporter/go.sum @@ -104,6 +104,12 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/tidwall/gjson v1.17.1 h1:wlYEnwqAHgzmhNUFfw7Xalt2JzQvsMx2Se4PcoFCT/U= +github.com/tidwall/gjson v1.17.1/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs= +github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= go.elastic.co/apm/module/apmelasticsearch/v2 v2.6.0 h1:ukMcwyMaDXsS1dRK2qRYXT2AsfwaUy74TOOYCqkWJow= diff --git a/exporter/elasticsearchexporter/integrationtest/go.sum b/exporter/elasticsearchexporter/integrationtest/go.sum index 86989a9f2eaf..aff06da6dff2 100644 --- a/exporter/elasticsearchexporter/integrationtest/go.sum +++ b/exporter/elasticsearchexporter/integrationtest/go.sum @@ -234,6 +234,12 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= +github.com/tidwall/gjson v1.17.1 h1:wlYEnwqAHgzmhNUFfw7Xalt2JzQvsMx2Se4PcoFCT/U= +github.com/tidwall/gjson v1.17.1/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs= +github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tilinna/clock v1.1.0 h1:6IQQQCo6KoBxVudv6gwtY8o4eDfhHo8ojA5dP0MfhSs= github.com/tilinna/clock v1.1.0/go.mod h1:ZsP7BcY7sEEz7ktc0IVy8Us6boDrK8VradlKRUGfOao= github.com/tklauser/go-sysconf v0.3.13 h1:GBUpcahXSpR2xN01jhkNAbTLRk2Yzgggk8IM08lq3r4= diff --git a/exporter/elasticsearchexporter/internal/objmodel/objmodel.go b/exporter/elasticsearchexporter/internal/objmodel/objmodel.go index ef80136ed395..120e78608558 100644 --- a/exporter/elasticsearchexporter/internal/objmodel/objmodel.go +++ b/exporter/elasticsearchexporter/internal/objmodel/objmodel.go @@ -180,15 +180,14 @@ func (doc *Document) AddEvents(key string, events ptrace.SpanEventSlice) { } } -// Sort sorts all fields in the document by key name. -func (doc *Document) Sort() { +func (doc *Document) sort() { sort.SliceStable(doc.fields, func(i, j int) bool { return doc.fields[i].key < doc.fields[j].key }) for i := range doc.fields { fld := &doc.fields[i] - fld.value.Sort() + fld.value.sort() } } @@ -199,7 +198,7 @@ func (doc *Document) Sort() { func (doc *Document) Dedup() { // 1. Always ensure the fields are sorted, Dedup support requires // Fields to be sorted. - doc.Sort() + doc.sort() // 2. rename fields if a primitive value is overwritten by an object. // For example the pair (path.x=1, path.x.a="test") becomes: @@ -223,7 +222,7 @@ func (doc *Document) Dedup() { } } if renamed { - doc.Sort() + doc.sort() } // 3. mark duplicates as 'ignore' @@ -423,14 +422,13 @@ func ValueFromAttribute(attr pcommon.Value) Value { } } -// Sort recursively sorts all keys in docuemts held by the value. -func (v *Value) Sort() { +func (v *Value) sort() { switch v.kind { case KindObject: - v.doc.Sort() + v.doc.sort() case KindArr: for i := range v.arr { - v.arr[i].Sort() + v.arr[i].sort() } } } diff --git a/exporter/elasticsearchexporter/internal/objmodel/objmodel_test.go b/exporter/elasticsearchexporter/internal/objmodel/objmodel_test.go index 320a769f6419..ab22115cf30f 100644 --- a/exporter/elasticsearchexporter/internal/objmodel/objmodel_test.go +++ b/exporter/elasticsearchexporter/internal/objmodel/objmodel_test.go @@ -80,46 +80,11 @@ func TestObjectModel_CreateMap(t *testing.T) { for name, test := range tests { t.Run(name, func(t *testing.T) { doc := test.build() - doc.Sort() assert.Equal(t, test.want, doc) }) } } -func TestDocument_Sort(t *testing.T) { - tests := map[string]struct { - build func() Document - want Document - }{ - "keys are sorted": { - build: func() (doc Document) { - doc.AddInt("z", 26) - doc.AddInt("a", 1) - return doc - }, - want: Document{[]field{{"a", IntValue(1)}, {"z", IntValue(26)}}}, - }, - "sorting is stable": { - build: func() (doc Document) { - doc.AddInt("a", 1) - doc.AddInt("c", 3) - doc.AddInt("a", 2) - return doc - }, - want: Document{[]field{{"a", IntValue(1)}, {"a", IntValue(2)}, {"c", IntValue(3)}}}, - }, - } - - for name, test := range tests { - t.Run(name, func(t *testing.T) { - doc := test.build() - doc.Sort() - assert.Equal(t, test.want, doc) - }) - } - -} - func TestObjectModel_Dedup(t *testing.T) { tests := map[string]struct { build func() Document @@ -200,7 +165,6 @@ func TestObjectModel_Dedup(t *testing.T) { for name, test := range tests { t.Run(name, func(t *testing.T) { doc := test.build() - doc.Sort() doc.Dedup() assert.Equal(t, test.want, doc) }) diff --git a/exporter/elasticsearchexporter/model.go b/exporter/elasticsearchexporter/model.go index ccf76b5afdf6..737eec44903f 100644 --- a/exporter/elasticsearchexporter/model.go +++ b/exporter/elasticsearchexporter/model.go @@ -76,7 +76,6 @@ type mappingModel interface { // // See: https://github.com/open-telemetry/oteps/blob/master/text/logs/0097-log-data-model.md type encodeModel struct { - dedup bool dedot bool mode MappingMode } @@ -95,13 +94,9 @@ func (m *encodeModel) encodeLog(resource pcommon.Resource, record plog.LogRecord default: document = m.encodeLogDefaultMode(resource, record, scope) } + document.Dedup() var buf bytes.Buffer - if m.dedup { - document.Dedup() - } else if m.dedot { - document.Sort() - } err := document.Serialize(&buf, m.dedot) return buf.Bytes(), err } @@ -171,11 +166,7 @@ func (m *encodeModel) encodeLogECSMode(resource pcommon.Resource, record plog.Lo } func (m *encodeModel) encodeDocument(document objmodel.Document) ([]byte, error) { - if m.dedup { - document.Dedup() - } else if m.dedot { - document.Sort() - } + document.Dedup() var buf bytes.Buffer err := document.Serialize(&buf, m.dedot) @@ -225,12 +216,7 @@ func (m *encodeModel) encodeSpan(resource pcommon.Resource, span ptrace.Span, sc m.encodeEvents(&document, span.Events()) document.AddInt("Duration", durationAsMicroseconds(span.StartTimestamp().AsTime(), span.EndTimestamp().AsTime())) // unit is microseconds document.AddAttributes("Scope", scopeToAttributes(scope)) - - if m.dedup { - document.Dedup() - } else if m.dedot { - document.Sort() - } + document.Dedup() var buf bytes.Buffer err := document.Serialize(&buf, m.dedot) diff --git a/exporter/elasticsearchexporter/model_test.go b/exporter/elasticsearchexporter/model_test.go index d3784e5081f1..2a9be043fef3 100644 --- a/exporter/elasticsearchexporter/model_test.go +++ b/exporter/elasticsearchexporter/model_test.go @@ -4,15 +4,18 @@ package elasticsearchexporter import ( + "bytes" "fmt" "os" "sort" + "strconv" "strings" "testing" "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/tidwall/gjson" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/plog" "go.opentelemetry.io/collector/pdata/pmetric" @@ -47,7 +50,7 @@ var expectedLogBodyWithEmptyTimestamp = `{"@timestamp":"1970-01-01T00:00:00.0000 var expectedLogBodyDeDottedWithEmptyTimestamp = `{"@timestamp":"1970-01-01T00:00:00.000000000Z","Attributes":{"log-attr1":"value1"},"Body":"log-body","Resource":{"foo":{"bar":"baz"},"key1":"value1"},"Scope":{"name":"","version":""},"SeverityNumber":0,"TraceFlags":0}` func TestEncodeSpan(t *testing.T) { - model := &encodeModel{dedup: true, dedot: false} + model := &encodeModel{dedot: false} td := mockResourceSpans() spanByte, err := model.encodeSpan(td.ResourceSpans().At(0).Resource(), td.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0), td.ResourceSpans().At(0).ScopeSpans().At(0).Scope()) assert.NoError(t, err) @@ -56,7 +59,7 @@ func TestEncodeSpan(t *testing.T) { func TestEncodeLog(t *testing.T) { t.Run("empty timestamp with observedTimestamp override", func(t *testing.T) { - model := &encodeModel{dedup: true, dedot: false} + model := &encodeModel{dedot: false} td := mockResourceLogs() td.ScopeLogs().At(0).LogRecords().At(0).SetObservedTimestamp(pcommon.NewTimestampFromTime(time.Date(2023, 4, 19, 3, 4, 5, 6, time.UTC))) logByte, err := model.encodeLog(td.Resource(), td.ScopeLogs().At(0).LogRecords().At(0), td.ScopeLogs().At(0).Scope()) @@ -65,7 +68,7 @@ func TestEncodeLog(t *testing.T) { }) t.Run("both timestamp and observedTimestamp empty", func(t *testing.T) { - model := &encodeModel{dedup: true, dedot: false} + model := &encodeModel{dedot: false} td := mockResourceLogs() logByte, err := model.encodeLog(td.Resource(), td.ScopeLogs().At(0).LogRecords().At(0), td.ScopeLogs().At(0).Scope()) assert.NoError(t, err) @@ -73,7 +76,7 @@ func TestEncodeLog(t *testing.T) { }) t.Run("dedot true", func(t *testing.T) { - model := &encodeModel{dedup: true, dedot: true} + model := &encodeModel{dedot: true} td := mockResourceLogs() td.Resource().Attributes().PutStr("foo.bar", "baz") logByte, err := model.encodeLog(td.Resource(), td.ScopeLogs().At(0).LogRecords().At(0), td.ScopeLogs().At(0).Scope()) @@ -89,7 +92,6 @@ func TestEncodeMetric(t *testing.T) { // Encode the metrics. model := &encodeModel{ dedot: true, - dedup: true, mode: MappingECS, } @@ -325,7 +327,6 @@ func TestEncodeLogECSModeDuplication(t *testing.T) { m := encodeModel{ mode: MappingECS, dedot: true, - dedup: true, } doc, err := m.encodeLog(resource, record, scope) require.NoError(t, err) @@ -391,11 +392,13 @@ func TestEncodeLogECSMode(t *testing.T) { observedTimestamp := pcommon.Timestamp(1710273641123456789) record.SetObservedTimestamp(observedTimestamp) + var buf bytes.Buffer m := encodeModel{} doc := m.encodeLogECSMode(resource, record, scope) + require.NoError(t, doc.Serialize(&buf, false)) - expectedDocFields := pcommon.NewMap() - err = expectedDocFields.FromRaw(map[string]any{ + require.JSONEq(t, `{ + "@timestamp": "2024-03-12T20:00:41.123456789Z", "service.name": "foo.bar", "service.version": "1.1.0", "service.node.name": "i-103de39e0a", @@ -409,6 +412,7 @@ func TestEncodeLogECSMode(t *testing.T) { "container.name": "happy-seger", "container.id": "e69cc5d3dda", "container.image.name": "my-app", + "container.image.tag": ["v3.4.0"], "container.runtime": "docker", "host.hostname": "i-103de39e0a.gke.us-west-1b.cloud.google.com", "host.name": "i-103de39e0a.gke.us-west-1b.cloud.google.com", @@ -434,18 +438,8 @@ func TestEncodeLogECSMode(t *testing.T) { "kubernetes.node.name": "node-1", "kubernetes.pod.name": "opentelemetry-pod-autoconf", "kubernetes.pod.uid": "275ecb36-5aa8-4c2a-9c47-d8bb681b9aff", - "kubernetes.deployment.name": "coredns", - }) - require.NoError(t, err) - - expectedDoc := objmodel.Document{} - expectedDoc.AddAttributes("", expectedDocFields) - expectedDoc.AddTimestamp("@timestamp", observedTimestamp) - expectedDoc.Add("container.image.tag", objmodel.ArrValue(objmodel.StringValue("v3.4.0"))) - - doc.Sort() - expectedDoc.Sort() - require.Equal(t, expectedDoc, doc) + "kubernetes.deployment.name": "coredns" + }`, buf.String()) } func TestEncodeLogECSModeAgentName(t *testing.T) { @@ -522,16 +516,14 @@ func TestEncodeLogECSModeAgentName(t *testing.T) { timestamp := pcommon.Timestamp(1710373859123456789) record.SetTimestamp(timestamp) + var buf bytes.Buffer m := encodeModel{} doc := m.encodeLogECSMode(resource, record, scope) - - expectedDoc := objmodel.Document{} - expectedDoc.AddTimestamp("@timestamp", timestamp) - expectedDoc.AddString("agent.name", test.expectedAgentName) - - doc.Sort() - expectedDoc.Sort() - require.Equal(t, expectedDoc, doc) + require.NoError(t, doc.Serialize(&buf, false)) + require.JSONEq(t, fmt.Sprintf(`{ + "@timestamp": "2024-03-13T23:50:59.123456789Z", + "agent.name": %q + }`, test.expectedAgentName), buf.String()) }) } } @@ -576,17 +568,23 @@ func TestEncodeLogECSModeAgentVersion(t *testing.T) { timestamp := pcommon.Timestamp(1710373859123456789) record.SetTimestamp(timestamp) + var buf bytes.Buffer m := encodeModel{} doc := m.encodeLogECSMode(resource, record, scope) - - expectedDoc := objmodel.Document{} - expectedDoc.AddTimestamp("@timestamp", timestamp) - expectedDoc.AddString("agent.name", "otlp") - expectedDoc.AddString("agent.version", test.expectedAgentVersion) - - doc.Sort() - expectedDoc.Sort() - require.Equal(t, expectedDoc, doc) + require.NoError(t, doc.Serialize(&buf, false)) + + if test.expectedAgentVersion == "" { + require.JSONEq(t, `{ + "@timestamp": "2024-03-13T23:50:59.123456789Z", + "agent.name": "otlp" + }`, buf.String()) + } else { + require.JSONEq(t, fmt.Sprintf(`{ + "@timestamp": "2024-03-13T23:50:59.123456789Z", + "agent.name": "otlp", + "agent.version": %q + }`, test.expectedAgentVersion), buf.String()) + } }) } } @@ -677,25 +675,23 @@ func TestEncodeLogECSModeHostOSType(t *testing.T) { timestamp := pcommon.Timestamp(1710373859123456789) record.SetTimestamp(timestamp) + var buf bytes.Buffer m := encodeModel{} doc := m.encodeLogECSMode(resource, record, scope) + require.NoError(t, doc.Serialize(&buf, false)) - expectedDoc := objmodel.Document{} - expectedDoc.AddTimestamp("@timestamp", timestamp) - expectedDoc.AddString("agent.name", "otlp") + expectedJSON := `{"@timestamp":"2024-03-13T23:50:59.123456789Z", "agent.name":"otlp"` if test.expectedHostOsName != "" { - expectedDoc.AddString("host.os.name", test.expectedHostOsName) + expectedJSON += `, "host.os.name":` + strconv.Quote(test.expectedHostOsName) } if test.expectedHostOsType != "" { - expectedDoc.AddString("host.os.type", test.expectedHostOsType) + expectedJSON += `, "host.os.type":` + strconv.Quote(test.expectedHostOsType) } if test.expectedHostOsPlatform != "" { - expectedDoc.AddString("host.os.platform", test.expectedHostOsPlatform) + expectedJSON += `, "host.os.platform":` + strconv.Quote(test.expectedHostOsPlatform) } - - doc.Sort() - expectedDoc.Sort() - require.Equal(t, expectedDoc, doc) + expectedJSON += "}" + require.JSONEq(t, expectedJSON, buf.String()) }) } } @@ -704,16 +700,16 @@ func TestEncodeLogECSModeTimestamps(t *testing.T) { tests := map[string]struct { timeUnixNano int64 observedTimeUnixNano int64 - expectedTimestamp time.Time + expectedTimestamp string }{ "only_observed_set": { observedTimeUnixNano: 1710273641123456789, - expectedTimestamp: time.Unix(0, 1710273641123456789), + expectedTimestamp: "2024-03-12T20:00:41.123456789Z", }, "both_set": { timeUnixNano: 1710273639345678901, observedTimeUnixNano: 1710273641123456789, - expectedTimestamp: time.Unix(0, 1710273639345678901), + expectedTimestamp: "2024-03-12T20:00:39.345678901Z", }, } @@ -730,16 +726,14 @@ func TestEncodeLogECSModeTimestamps(t *testing.T) { record.SetObservedTimestamp(pcommon.Timestamp(test.observedTimeUnixNano)) } + var buf bytes.Buffer m := encodeModel{} doc := m.encodeLogECSMode(resource, record, scope) + require.NoError(t, doc.Serialize(&buf, false)) - expectedDoc := objmodel.Document{} - expectedDoc.AddTimestamp("@timestamp", pcommon.NewTimestampFromTime(test.expectedTimestamp)) - expectedDoc.AddString("agent.name", "otlp") - - doc.Sort() - expectedDoc.Sort() - require.Equal(t, expectedDoc, doc) + require.JSONEq(t, fmt.Sprintf( + `{"@timestamp":%q,"agent.name":"otlp"}`, test.expectedTimestamp, + ), buf.String()) }) } } @@ -879,10 +873,35 @@ func TestMapLogAttributesToECS(t *testing.T) { var doc objmodel.Document encodeAttributesECSMode(&doc, test.attrs(), test.conversionMap, test.preserveMap) - doc.Sort() expectedDoc := test.expectedDoc() - expectedDoc.Sort() require.Equal(t, expectedDoc, doc) }) } } + +func TestEncodeLogScalarObjectConflict(t *testing.T) { + // If there is an attribute named "foo", and another called "foo.bar", + // then "foo" will be renamed to "foo.value". + model := &encodeModel{} + td := mockResourceLogs() + td.ScopeLogs().At(0).LogRecords().At(0).Attributes().PutStr("foo", "scalar") + td.ScopeLogs().At(0).LogRecords().At(0).Attributes().PutStr("foo.bar", "baz") + encoded, err := model.encodeLog(td.Resource(), td.ScopeLogs().At(0).LogRecords().At(0), td.ScopeLogs().At(0).Scope()) + assert.NoError(t, err) + + assert.True(t, gjson.ValidBytes(encoded)) + assert.False(t, gjson.GetBytes(encoded, "Attributes\\.foo").Exists()) + fooValue := gjson.GetBytes(encoded, "Attributes\\.foo\\.value") + fooBar := gjson.GetBytes(encoded, "Attributes\\.foo\\.bar") + assert.Equal(t, "scalar", fooValue.Str) + assert.Equal(t, "baz", fooBar.Str) + + // If there is an attribute named "foo.value", then "foo" would be omitted rather than renamed. + td.ScopeLogs().At(0).LogRecords().At(0).Attributes().PutStr("foo.value", "foovalue") + encoded, err = model.encodeLog(td.Resource(), td.ScopeLogs().At(0).LogRecords().At(0), td.ScopeLogs().At(0).Scope()) + assert.NoError(t, err) + + assert.False(t, gjson.GetBytes(encoded, "Attributes\\.foo").Exists()) + fooValue = gjson.GetBytes(encoded, "Attributes\\.foo\\.value") + assert.Equal(t, "foovalue", fooValue.Str) +}