diff --git a/CHANGELOG.md b/CHANGELOG.md index 40dd0aff37..85c4d3c296 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - feat: add option to specify additionalEndpoints for metrics [#2788] - chore: upgrade kubernetes-setup to v3.5.0 [#2785] - feat(logs): parse JSON logs [#2773] +- feat(logs): add format setting [#2794] ### Fixed @@ -50,6 +51,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [#2791]: https://github.com/SumoLogic/sumologic-kubernetes-collection/pull/2791 [#2773]: https://github.com/SumoLogic/sumologic-kubernetes-collection/pull/2773 [#2790]: https://github.com/SumoLogic/sumologic-kubernetes-collection/pull/2790 +[#2794]: https://github.com/SumoLogic/sumologic-kubernetes-collection/pull/2794 [v1.15.3-sumo-0]: https://github.com/SumoLogic/sumologic-kubernetes-fluentd/releases/tag/v1.15.3-sumo-0 [Unreleased]: https://github.com/SumoLogic/sumologic-kubernetes-collection/compare/v3.0.0-beta.0...main diff --git a/deploy/helm/sumologic/README.md b/deploy/helm/sumologic/README.md index a6874f84ee..81abd24778 100644 --- a/deploy/helm/sumologic/README.md +++ b/deploy/helm/sumologic/README.md @@ -35,6 +35,7 @@ The following table lists the configurable parameters of the Sumo Logic chart an | `sumologic.logs.collector.allowSideBySide` | Allow running otel and Fluent Bit side by side. This will result in duplicated logs being ingested. Only enable this if you're **certain** it's what you want. | `false` | | `sumologic.logs.collector.otelcol.enabled` | Enable OpenTelemtry logs collector. | `true` | | `sumologic.logs.container.enabled` | Enable collecting logs from Kubernetes containers. | `true` | +| `sumologic.logs.container.format` | Format for container logs. | `fields` | | `sumologic.logs.multiline.enabled` | Enable multiline detection for Kubernetes container logs. | `true` | | `sumologic.logs.multiline.first_line_regex` | Regular expression to match first line of multiline logs. | `^\[?\d{4}-\d{1,2}-\d{1,2}.\d{2}:\d{2}:\d{2}` | | `sumologic.logs.systemd.enabled` | Enable collecting systemd logs from Kubernets nodes. | `true` | @@ -195,7 +196,6 @@ The following table lists the configurable parameters of the Sumo Logic chart an | `fluentd.logs.autoscaling.targetCPUUtilizationPercentage` | The desired target CPU utilization for autoscaling. | `100` | | `fluentd.logs.autoscaling.targetMemoryUtilizationPercentage` | The desired target memory utilization for autoscaling. | `Nil` | | `fluentd.logs.rawConfig` | Default log configuration. | See [values.yaml] | -| `fluentd.logs.output.logFormat` | Format to post logs into Sumo: fields, json, json_merge, or text. | `fields` | | `fluentd.logs.output.addTimestamp` | Option to control adding timestamp to logs. | `true` | | `fluentd.logs.output.timestampKey` | Field name when add_timestamp is on. | `timestamp` | | `fluentd.logs.output.pluginLogLevel` | Option to give plugin specific log level. | `error` | diff --git a/deploy/helm/sumologic/conf/logs/fluentd/logs.kubernetes.sumologic.filter.conf b/deploy/helm/sumologic/conf/logs/fluentd/logs.kubernetes.sumologic.filter.conf index 20fc9c9a01..7a44a680b2 100644 --- a/deploy/helm/sumologic/conf/logs/fluentd/logs.kubernetes.sumologic.filter.conf +++ b/deploy/helm/sumologic/conf/logs/fluentd/logs.kubernetes.sumologic.filter.conf @@ -1,6 +1,6 @@ source_name {{ .Values.sumologic.logs.container.sourceName | quote }} source_host {{ .Values.sumologic.logs.container.sourceHost | quote }} -log_format {{ .Values.fluentd.logs.output.logFormat | quote }} +log_format {{ .Values.sumologic.logs.container.format | quote }} source_category {{ .Values.sumologic.logs.container.sourceCategory | quote }} source_category_prefix {{ .Values.sumologic.logs.container.sourceCategoryPrefix | quote }} source_category_replace_dash {{ .Values.sumologic.logs.container.sourceCategoryReplaceDash | quote }} diff --git a/deploy/helm/sumologic/conf/logs/fluentd/logs.output.conf b/deploy/helm/sumologic/conf/logs/fluentd/logs.output.conf index 06ceb466e3..dd8d4f0060 100644 --- a/deploy/helm/sumologic/conf/logs/fluentd/logs.output.conf +++ b/deploy/helm/sumologic/conf/logs/fluentd/logs.output.conf @@ -2,7 +2,7 @@ data_type logs log_key log endpoint "#{ENV['SUMO_ENDPOINT_DEFAULT_LOGS_SOURCE']}" verify_ssl {{ .Values.fluentd.verifySsl | quote }} -log_format {{ .Values.fluentd.logs.output.logFormat | quote }} +log_format {{ .Values.sumologic.logs.container.format | quote }} add_timestamp {{ .Values.fluentd.logs.output.addTimestamp | quote }} timestamp_key {{ .Values.fluentd.logs.output.timestampKey | quote }} proxy_uri {{ .Values.fluentd.proxyUri | quote }} diff --git a/deploy/helm/sumologic/conf/logs/otelcol/config.yaml b/deploy/helm/sumologic/conf/logs/otelcol/config.yaml index 3b43cdd6b9..5b6d49c39e 100644 --- a/deploy/helm/sumologic/conf/logs/otelcol/config.yaml +++ b/deploy/helm/sumologic/conf/logs/otelcol/config.yaml @@ -27,10 +27,11 @@ extensions: exporters: {{ if .Values.sumologic.logs.container.enabled }} sumologic/containers: - log_format: json + log_format: {{ include "logs.otelcol.container.exporter.format" . }} json_logs: add_timestamp: true timestamp_key: timestamp + flatten_body: {{ eq .Values.sumologic.logs.container.format "json_merge" }} endpoint: ${SUMO_ENDPOINT_DEFAULT_LOGS_SOURCE} ## Configuration for sending queue ## ref: https://github.com/open-telemetry/opentelemetry-collector/tree/release/v0.37.x/exporter/exporterhelper#configuration diff --git a/deploy/helm/sumologic/templates/_helpers.tpl b/deploy/helm/sumologic/templates/_helpers.tpl index b074f7636c..acbea59efd 100644 --- a/deploy/helm/sumologic/templates/_helpers.tpl +++ b/deploy/helm/sumologic/templates/_helpers.tpl @@ -1576,3 +1576,19 @@ Generate list of remoteWrite endpoints for telegraf configuration {{- $endpoints := sortAlpha $endpoints -}} {{ $endpoints | join ",\n" }} {{- end -}} + +{{/* +Return the log format for the Sumologic exporter for container logs. + +'{{ include "logs.otelcol.container.exporter.format" . }}' +*/}} +{{- define "logs.otelcol.container.exporter.format" -}} +{{- $jsonFormats := list "json" "fields" "json_merge" -}} +{{- if has .Values.sumologic.logs.container.format $jsonFormats -}} +{{- "json" -}} +{{- else if eq .Values.sumologic.logs.container.format "text" -}} +{{- "text" -}} +{{- else -}} +{{- fail "`sumologic.logs.container.format` can only be `json`, `text`, `json_merge` or `fields`" -}} +{{- end -}} +{{- end -}} diff --git a/deploy/helm/sumologic/values.yaml b/deploy/helm/sumologic/values.yaml index 6d22b8d49a..b6aa33586e 100644 --- a/deploy/helm/sumologic/values.yaml +++ b/deploy/helm/sumologic/values.yaml @@ -252,6 +252,11 @@ sumologic: container: enabled: true + ## Format to post logs into Sumo: fields, json, json_merge, or text. + ## NOTE: json is an alias for fields + ## NOTE: Multiline log detection works differently for `text` format. See below link for full reference: + ## https://github.com/SumoLogic/sumologic-kubernetes-collection/blob/main/docs/troubleshoot-collection.md#using-text-format + format: fields otelcol: ## Extra processors for container logs. See [/docs/collecting-container-logs.md](/docs/collecting-container-logs.md) for details. @@ -806,11 +811,6 @@ fluentd: ## https://github.com/SumoLogic/fluentd-output-sumologic ## https://github.com/SumoLogic/sumologic-kubernetes-collection/blob/main/deploy/helm/sumologic/conf/logs/fluentd/logs.output.conf output: - ## Format to post logs into Sumo: fields, json, json_merge, or text. - ## NOTE: for logs metadata, fields is required. - ## NOTE: Multiline log detection works differently for `text` format. See below link for full reference: - ## https://github.com/SumoLogic/sumologic-kubernetes-collection/blob/main/docs/troubleshoot-collection.md#using-text-format - logFormat: fields ## Option to control adding timestamp to logs. addTimestamp: true ## Field name when add_timestamp is on. diff --git a/docs/collecting-container-logs.md b/docs/collecting-container-logs.md index 347fab6200..e0d83cba68 100644 --- a/docs/collecting-container-logs.md +++ b/docs/collecting-container-logs.md @@ -59,6 +59,111 @@ This feature is enabled by default and the default regex will catch logs startin This feature can rarely cause problems by merging together lines which are supposed to be separate. In that case, feel free to disable it. +### Log format + +There are three log formats available: `fields`, `json_merge` and `text`. `fields` is the default. + +You can change it by setting: + +```yaml +sumologic: + logs: + container: + format: fields +``` + +We're going to demonstrate the differences between them on two example log lines: + +1. A plain text log + + ```text + 2007-03-01T13:00:00Z I am a log line + ``` + +1. A JSON log + + ```json + {"log_property": "value","text": "I am a json log"} + ``` + +#### `fields` log format + +Logs formatted as `fields` are wrapped in a JSON object with additional properties, with the log body residing under the `log` key. + +For example, log line 1 will show up in Sumo Logic as: + +```javascript +{ + log: "2007-03-01T13:00:00Z I am a log line", + stream: "stdout", + timestamp: 1673627100045 +} +``` + +If the log line contains json, as log line 2 does, it will be displayed as a nested object inside the `log` key: + +```javascript +{ + log: { + log_property: "value", + text: "I am a json log" + }, + stream: "stdout", + timestamp: 1673627100045 +} +``` + +#### `json_merge` log format + +`json_merge` is identical to `fields` for non-JSON logs, but behaves differently for JSON logs. If the log is JSON, it +gets merged into the top-level object. + +Log line 1 will show up the same way as it did for `fields`: + +```javascript +{ + log: "2007-03-01T13:00:00Z I am a log line", + stream: "stdout", + timestamp: 1673627100045 +} +``` + +However, the attributes from log line 2 will show up at the top level: + +```javascript +{ + log: { + log_property: "value", + text: "I am a json log" + }, + stream: "stdout", + timestamp: 1673627100045 + log_property: "value", + text: "I am a json log" +} +``` + +#### `text` log format + +The `text` log format sends the log line as-is without any additional wrappers. + +Log line 1 will therefore show up as plain text: + +```text +2007-03-01T13:00:00Z I am a log line +``` + +Whereas log line 2 will be displayed as JSON: + +```javascript +{ + log_property: "value", + text: "I am a json log" +} +``` + +__warning__: Setting the format to `text` has certain consequences for multiline detection. See [here][troubleshooting_text_format] for more details. + ### Setting source name and other built-in metadata It's possible to customize the built-in Sumo Logic metadata (like [source name][source_name] for example) for container logs: @@ -309,3 +414,4 @@ sumologic: [transform_processor_docs]: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/v0.69.0/processor/transformprocessor/README.md [sumo_fields]: https://help.sumologic.com/docs/manage/fields/ [sumo_add_fields]: https://help.sumologic.com/docs/manage/fields/#add-field +[troubleshooting_text_format]: fluent/troubleshoot-collection.md#using-text-format diff --git a/docs/fluent/fluentd-otc-comparison.md b/docs/fluent/fluentd-otc-comparison.md index 22839a5e55..0a035afb35 100644 --- a/docs/fluent/fluentd-otc-comparison.md +++ b/docs/fluent/fluentd-otc-comparison.md @@ -320,7 +320,6 @@ Events are not supported by `Opentelemetry Collector` | [fluentd.logs.podDisruptionBudget][readme] | [metadata.logs.podDisruptionBudget][readme] | | [fluentd.logs.rawConfig][readme] | [metadata.logs.config.merge][readme]; mind that configuration is going to be merged unless you use `null` | | [fluentd.logs.input.forwardExtraConf][readme] | [metadata.logs.config.merge][readme]; mind that configuration is going to be merged unless you use `null` | -| [fluentd.logs.output.logFormat][readme] | [metadata.logs.config.merge.exporters.sumologic/containers.log_format](#sumologic-output-plugin), [metadata.logs.config.merge.exporters.sumologic/systemd.log_format](#sumologic-output-plugin) | | [fluentd.logs.output.addTimestamp][readme] | [metadata.logs.config.merge.exporters.sumologic/containers.json_logs.add_timestamp](#sumologic-output-plugin), [metadata.logs.config.merge.exporters.sumologic/systemd.json_logs.add_timestamp](#sumologic-output-plugin) | | [fluentd.logs.output.timestampKey][readme] | [metadata.logs.config.merge.exporters.sumologic/containers.json_logs.timestamp_key](#sumologic-output-plugin), [metadata.logs.config.merge.exporters.sumologic/systemd.json_logs.timestamp_key](#sumologic-output-plugin) | | [fluentd.logs.output.pluginLogLevel][readme] | Not supported | diff --git a/docs/v3-migration-doc.md b/docs/v3-migration-doc.md index 33f6e18816..9c4da24491 100644 --- a/docs/v3-migration-doc.md +++ b/docs/v3-migration-doc.md @@ -453,3 +453,5 @@ metadata: `kube-prometheus-stack.prometheus.additionalServiceMonitors` configuration - Adding `sumologic.metrics.otelcol.extraProcessors` to make metrics modification easy + +- Moved `fluentd.logs.output.logFormat` to `sumologic.logs.container.format` diff --git a/tests/helm/logs_test.go b/tests/helm/logs_test.go index 91b2951f4e..7a3e16f362 100644 --- a/tests/helm/logs_test.go +++ b/tests/helm/logs_test.go @@ -1,6 +1,7 @@ package helm import ( + "fmt" "testing" "github.com/stretchr/testify/require" @@ -199,6 +200,69 @@ fluent-bit: require.Contains(t, containersPipeline.Processors, "filter/include-host") } +func TestMetadataLogFormat(t *testing.T) { + t.Parallel() + templatePath := "templates/logs/otelcol/configmap.yaml" + + type OtelConfig struct { + Exporters struct { + Containers struct { + LogFormat string `yaml:"log_format"` + JsonLogs struct { + FlattenBody bool `yaml:"flatten_body"` + } `yaml:"json_logs"` + } `yaml:"sumologic/containers"` + } + } + + testCases := []struct { + logFormat string + expectedExporterLogFormat string + expectedExporterFlattenBody bool + }{ + { + logFormat: "json", + expectedExporterLogFormat: "json", + expectedExporterFlattenBody: false, + }, + { + logFormat: "fields", + expectedExporterLogFormat: "json", + expectedExporterFlattenBody: false, + }, + { + logFormat: "json_merge", + expectedExporterLogFormat: "json", + expectedExporterFlattenBody: true, + }, + { + logFormat: "text", + expectedExporterLogFormat: "text", + expectedExporterFlattenBody: false, + }, + } + + for _, testCase := range testCases { + testCase := testCase + t.Run(testCase.logFormat, func(t *testing.T) { + t.Parallel() + var otelConfig OtelConfig + valuesYamlTemplate := ` +sumologic: + logs: + container: + format: %s +` + valuesYaml := fmt.Sprintf(valuesYamlTemplate, testCase.logFormat) + otelConfigYaml := GetOtelConfigYaml(t, valuesYaml, templatePath) + err := yaml.Unmarshal([]byte(otelConfigYaml), &otelConfig) + require.NoError(t, err) + require.Equal(t, testCase.expectedExporterLogFormat, otelConfig.Exporters.Containers.LogFormat) + require.Equal(t, testCase.expectedExporterFlattenBody, otelConfig.Exporters.Containers.JsonLogs.FlattenBody) + }) + } +} + func TestCollectorOtelConfigMerge(t *testing.T) { t.Parallel() templatePath := "templates/logs/collector/otelcol/configmap.yaml" diff --git a/tests/helm/metadata_logs_otc/static/otel.output.yaml b/tests/helm/metadata_logs_otc/static/otel.output.yaml index 207e2e28c1..e79464c0cc 100644 --- a/tests/helm/metadata_logs_otc/static/otel.output.yaml +++ b/tests/helm/metadata_logs_otc/static/otel.output.yaml @@ -16,6 +16,7 @@ data: endpoint: ${SUMO_ENDPOINT_DEFAULT_LOGS_SOURCE} json_logs: add_timestamp: true + flatten_body: false timestamp_key: timestamp log_format: json sending_queue: diff --git a/tests/helm/metadata_logs_otc/static/templates.output.yaml b/tests/helm/metadata_logs_otc/static/templates.output.yaml index 5a4ce34d41..56fddfad6e 100644 --- a/tests/helm/metadata_logs_otc/static/templates.output.yaml +++ b/tests/helm/metadata_logs_otc/static/templates.output.yaml @@ -16,6 +16,7 @@ data: endpoint: ${SUMO_ENDPOINT_DEFAULT_LOGS_SOURCE} json_logs: add_timestamp: true + flatten_body: false timestamp_key: timestamp log_format: json sending_queue: