From 44678750daa7fd837cd8ddc3d1fc9ff817f0c615 Mon Sep 17 00:00:00 2001 From: Jean Mertz Date: Mon, 14 Sep 2020 10:32:49 +0200 Subject: [PATCH] Squashed commit of the following: commit 2b08b6dea596b2efa7d740b018a2f2d798211bb6 Author: Nell Shamrell-Harrington Date: Sun Sep 13 23:10:18 2020 -0700 docs: Updates reference to gitter to point to the Discord server (#3858) Signed-off-by: Nell Shamrell commit 27ba8b950b752dcb5316bacd40247dbf05437386 Author: James Turnbull Date: Sat Sep 12 17:26:13 2020 -0400 chore: Removing CI references to Nix (#3844) Signed-off-by: James Turnbull commit 7c95aa97d68dbd4417c5ebe6ba5c4787e1e26030 Author: James Turnbull Date: Sat Sep 12 17:25:54 2020 -0400 chore: Fix make prepare-environment (#3841) commit cc00cf0722c0c254a9cdd25ee08300526bcb429b Author: James Turnbull Date: Sat Sep 12 13:38:39 2020 -0400 chore: Added workflow_dispatch for the env workflow (#3842) commit b7f656ee6984daa94acdc8e4f0797812854340f6 Author: James Turnbull Date: Sat Sep 12 12:14:21 2020 -0400 chore: Fix Debian post-install process to reload systemd daemons (#3832) commit c2078b7026dba766e2fe40abf1ff65354397fe23 Author: binarylogic Date: Sat Sep 12 10:28:41 2020 -0400 chore: Remove check-code.sh since it is no longer used Signed-off-by: binarylogic commit 7887d4e21883d2524fae79cd6684ce3e172c13a4 Author: James Turnbull Date: Fri Sep 11 23:50:56 2020 -0400 chore: Added cross installation to fix nightly builds (#3831) commit 648e82ca32acd1d3b08ff949983d55c3ad97f2e1 Author: Ana Hobden Date: Fri Sep 11 17:33:29 2020 -0700 chore(platforms): Migrate x86 gnu target to cross, add new aarch64 gnu target (no sasl) (#3657) * Add initial cross adoption Signed-off-by: Ana Hobden * Add archiving Signed-off-by: Ana Hobden * Clean up the makefile a bit and get tarballs working Signed-off-by: Ana Hobden * checkpoint Signed-off-by: Ana Hobden * wip Signed-off-by: Ana Hobden * Add to CI Signed-off-by: Ana Hobden * Add autoinstall to CI Signed-off-by: Ana Hobden * Remove cruft Signed-off-by: Ana Hobden * Can't test yet Signed-off-by: Ana Hobden * Autoinstall in e2e Signed-off-by: Ana Hobden * Readd docker image Signed-off-by: Ana Hobden * More comments Signed-off-by: Ana Hobden * Set right CI task Signed-off-by: Ana Hobden * Add note about TZ issue Signed-off-by: Ana Hobden commit b28eb18d9e7374f415db2071718ad36b23a89e77 Author: Bruce Guenter Date: Fri Sep 11 16:22:58 2020 -0600 chore(networking): Rework internal auto concurrency tests (#3802) * Move auto concurrency test case data into TOML files * Use tokio manual time tracking to speed up tests By pausing and then manually advancing the clock, this provides for both very fast running tests and nearly perfect time measurements for highly repeatable test results. * Collect all the failures for each test This has the collateral benefit of being able to drop the large `assert_within` macro that was only used here. * The tests should now work predictably on MacOS Signed-off-by: Bruce Guenter commit b37dea7079e6a56606076e17339f77b603f80c7f Author: Ana Hobden Date: Fri Sep 11 14:53:14 2020 -0700 chore: Add some additional Event impls (#3621) * Move log_event to new file Signed-off-by: Ana Hobden * Move value to new file Signed-off-by: Ana Hobden * Move log_schema to config Signed-off-by: Ana Hobden * chore: Add some event impls Signed-off-by: Ana Hobden * Do some namespace squashing Signed-off-by: Ana Hobden * Fixup lints Signed-off-by: Ana Hobden * Remove From for Vec Signed-off-by: Ana Hobden * Fixup leveldb Signed-off-by: Ana Hobden * fmt Signed-off-by: Ana Hobden * Jean's feedback Signed-off-by: Ana Hobden commit f03898e1b6a9d78ce40c48b19675d8e7cb67f8c5 Author: Luke Steensen Date: Fri Sep 11 15:53:51 2020 -0500 enhancement(tokenizer transform): add internal events (#3807) Signed-off-by: Luke Steensen commit ff555aaed1208742ffb71be5242b52ed258eaa06 Author: Luke Steensen Date: Fri Sep 11 15:53:27 2020 -0500 enhancement(dedupe transform): add internal events (#3809) Closes #3393 Signed-off-by: Luke Steensen Co-authored-by: Jean Mertz commit 54a4d8e30dff0149e8b719a1aff6f7608fe206be Author: MOZGIII Date: Fri Sep 11 17:09:57 2020 +0300 chore: Generate Kubernetes distribution YAMLs from Helm Chart (#3614) * Add a script to manage the Kubernetes YAML configs Signed-off-by: MOZGIII * Update Helm Chart to generate config more like YAML files Signed-off-by: MOZGIII * Switch to generated YAML Signed-off-by: MOZGIII * Define check-all target in multiple lines Signed-off-by: MOZGIII * Add Kubernetes YAML tasks to Makefile Signed-off-by: MOZGIII * Add check-kubernetes-yaml to CI Signed-off-by: MOZGIII * Add kubernetes-yaml.sh to .github/CODEOWNERS Signed-off-by: MOZGIII * Add /scripts/kubernetes-yaml/ to .github/CODEOWNERS Signed-off-by: MOZGIII * Add a TODO for passing the app-version Signed-off-by: MOZGIII commit 68a5c43d0b54be3a1473f57077f6c4405da7c05c Author: Do Duy Date: Fri Sep 11 10:23:03 2020 +0700 docs: Add docs for retry_backoff_secs option (#3819) Signed-off-by: Duy Do commit 1a1352b0bdea7bf5c937e9d0508dd4f31832e0cb Author: James Turnbull Date: Thu Sep 10 17:34:16 2020 -0400 chore: Remove docker-compose from integration tests (#3622) commit 94def0fb9ec971539984fdcf42dacdcfd0aa3200 Author: Luke Steensen Date: Thu Sep 10 16:17:03 2020 -0500 enhancement(reduce transform): add internal events (#3812) Closes #3401 Signed-off-by: Luke Steensen commit 74b43b7ff208ae9531729acdd5331eb89a49851b Author: Bruce Guenter Date: Thu Sep 10 14:05:52 2020 -0600 fix(networking): Rework auto concurrency backpressure logic (#3804) Previously, all responses were marked as not being backpressure. This changes that logic to: 1. Treat RetryAction::Retry responses as backpressure 2. Only use the RTT values from RetryAction::Successful responses Signed-off-by: Bruce Guenter commit eb09b92c3e88b40df2ba140d1e9c02fc4ef54d00 Author: Ana Hobden Date: Thu Sep 10 13:01:23 2020 -0700 chore: Split up event (#3619) * Move log_event to new file Signed-off-by: Ana Hobden * Move value to new file Signed-off-by: Ana Hobden * Move log_schema to config Signed-off-by: Ana Hobden * Do some namespace squashing Signed-off-by: Ana Hobden * Fixup lints Signed-off-by: Ana Hobden * Fixup leveldb Signed-off-by: Ana Hobden * fmt Signed-off-by: Ana Hobden commit 2fd9ccf11cc9c761c0930303bd91c1c2a10b8db4 Author: Jesse Szwedko Date: Thu Sep 10 15:34:52 2020 -0400 chore: Add expected review counts to CONTRIBUTING.md (#3666) * chore: Add expected review counts to CONTRIBUTING.md Signed-off-by: Jesse Szwedko --- .github/CODEOWNERS | 2 + .github/workflows/e2e.yml | 4 +- .github/workflows/environment.yml | 9 +- .github/workflows/install-sh.yml | 1 - .github/workflows/lints.yml | 2 + .github/workflows/nightly.yml | 7 +- .github/workflows/release.yml | 10 +- .github/workflows/tests.yml | 45 +- .meta/sources/docker.toml.erb | 8 + CONTRIBUTING.md | 20 +- Cargo.toml | 12 + Makefile | 485 +++++++++++++-- benches/bench.rs | 8 +- benches/event.rs | 5 +- distribution/debian/scripts/postinst | 7 + distribution/debian/scripts/preinst | 0 .../helm/vector/templates/configmap.yaml | 9 +- .../helm/vector/templates/daemonset.yaml | 23 +- distribution/helm/vector/templates/rbac.yaml | 2 + distribution/kubernetes/kustomization.yaml | 3 +- distribution/kubernetes/vector-global.yaml | 14 - .../kubernetes/vector-namespaced.yaml | 100 --- distribution/kubernetes/vector.yaml | 188 ++++++ distribution/rpm/vector.spec | 4 +- docker-compose.yml | 246 -------- kustomization.yaml | 5 +- scripts/Gemfile | 1 - scripts/Gemfile.lock | 15 - scripts/check-code.sh | 10 - scripts/deploy-kubernetes-test.sh | 18 +- scripts/docker-compose-run.sh | 12 +- scripts/docker-run.sh | 7 +- scripts/environment/bootstrap-ubuntu-20.04.sh | 3 + scripts/kubernetes-yaml.sh | 73 +++ scripts/kubernetes-yaml/values.yaml | 18 + scripts/run.sh | 59 -- scripts/test-e2e-kubernetes.sh | 2 +- scripts/test-integration-aws.sh | 11 - scripts/test-integration-clickhouse.sh | 11 - scripts/test-integration-docker.sh | 10 - scripts/test-integration-elasticsearch.sh | 11 - scripts/test-integration-gcp.sh | 11 - scripts/test-integration-humio.sh | 11 - scripts/test-integration-influxdb.sh | 11 - scripts/test-integration-kafka.sh | 11 - scripts/test-integration-loki.sh | 11 - scripts/test-integration-pulsar.sh | 11 - scripts/test-integration-splunk.sh | 11 - src/{event => config}/log_schema.rs | 54 +- src/config/mod.rs | 12 +- src/config/unit_test.rs | 2 +- src/event/log_event.rs | 179 ++++++ src/event/mod.rs | 390 ++---------- src/event/value.rs | 185 ++++++ src/internal_events/dedupe.rs | 36 ++ src/internal_events/mod.rs | 12 + src/internal_events/reduce.rs | 26 + src/internal_events/tokenizer.rs | 63 ++ src/main.rs | 2 +- src/sinks/aws_cloudwatch_logs/mod.rs | 27 +- src/sinks/aws_kinesis_firehose.rs | 11 +- src/sinks/aws_kinesis_streams.rs | 13 +- src/sinks/aws_s3.rs | 12 +- src/sinks/blackhole.rs | 4 +- src/sinks/clickhouse.rs | 11 +- src/sinks/console.rs | 4 +- src/sinks/datadog/logs.rs | 4 +- src/sinks/elasticsearch.rs | 3 +- src/sinks/file/mod.rs | 31 +- src/sinks/gcp/cloud_storage.rs | 8 +- src/sinks/honeycomb.rs | 4 +- src/sinks/http.rs | 4 +- src/sinks/influxdb/logs.rs | 4 +- src/sinks/kafka.rs | 13 +- src/sinks/logdna.rs | 6 +- src/sinks/loki.rs | 10 +- src/sinks/papertrail.rs | 3 +- src/sinks/pulsar.rs | 8 +- src/sinks/sematext_logs.rs | 4 +- src/sinks/splunk_hec.rs | 14 +- src/sinks/util/auto_concurrency/controller.rs | 14 +- src/sinks/util/auto_concurrency/tests.rs | 586 +++++++----------- src/sinks/util/encoding/mod.rs | 7 +- src/sinks/util/mod.rs | 4 +- src/sources/docker.rs | 44 +- src/sources/file.rs | 61 +- src/sources/generator.rs | 6 +- src/sources/http.rs | 66 +- src/sources/journald.rs | 20 +- src/sources/kafka.rs | 22 +- src/sources/kubernetes_logs/mod.rs | 4 +- src/sources/kubernetes_logs/parser/cri.rs | 2 +- src/sources/kubernetes_logs/parser/docker.rs | 14 +- src/sources/kubernetes_logs/parser/picker.rs | 4 +- .../kubernetes_logs/partial_events_merger.rs | 2 +- src/sources/logplex.rs | 55 +- src/sources/socket/mod.rs | 75 +-- src/sources/socket/tcp.rs | 12 +- src/sources/socket/udp.rs | 4 +- src/sources/socket/unix.rs | 9 +- src/sources/splunk_hec.rs | 143 ++--- src/sources/stdin.rs | 21 +- src/sources/syslog.rs | 36 +- src/template.rs | 17 +- src/test_util/mod.rs | 62 -- src/transforms/ansi_stripper.rs | 8 +- src/transforms/dedupe.rs | 17 +- src/transforms/grok_parser.rs | 19 +- src/transforms/json_parser.rs | 28 +- src/transforms/log_to_metric.rs | 15 +- src/transforms/logfmt_parser.rs | 4 +- src/transforms/merge.rs | 2 +- src/transforms/reduce/mod.rs | 4 + src/transforms/regex_parser.rs | 4 +- src/transforms/sampler.rs | 14 +- src/transforms/split.rs | 4 +- src/transforms/tokenizer.rs | 23 +- src/validate.rs | 3 +- tests/data/auto-concurrency-template.toml | 35 ++ .../data/auto-concurrency/constant-link.toml | 29 + .../defers-at-high-concurrency.toml | 36 ++ .../drops-at-high-concurrency.toml | 33 + .../auto-concurrency/fixed-concurrency.toml | 22 + .../auto-concurrency/jittery-link-small.toml | 25 + tests/data/auto-concurrency/medium-send.toml | 29 + tests/data/auto-concurrency/slow-link.toml | 30 + tests/data/auto-concurrency/slow-send-1.toml | 29 + tests/data/auto-concurrency/slow-send-2.toml | 29 + tests/support/mod.rs | 9 +- tests/topology.rs | 11 +- 130 files changed, 2446 insertions(+), 2022 deletions(-) create mode 100755 distribution/debian/scripts/postinst mode change 100644 => 100755 distribution/debian/scripts/preinst delete mode 100644 distribution/kubernetes/vector-global.yaml delete mode 100644 distribution/kubernetes/vector-namespaced.yaml create mode 100644 distribution/kubernetes/vector.yaml delete mode 100755 scripts/check-code.sh create mode 100755 scripts/kubernetes-yaml.sh create mode 100644 scripts/kubernetes-yaml/values.yaml delete mode 100755 scripts/run.sh delete mode 100755 scripts/test-integration-aws.sh delete mode 100755 scripts/test-integration-clickhouse.sh delete mode 100755 scripts/test-integration-docker.sh delete mode 100755 scripts/test-integration-elasticsearch.sh delete mode 100755 scripts/test-integration-gcp.sh delete mode 100755 scripts/test-integration-humio.sh delete mode 100755 scripts/test-integration-influxdb.sh delete mode 100755 scripts/test-integration-kafka.sh delete mode 100755 scripts/test-integration-loki.sh delete mode 100755 scripts/test-integration-pulsar.sh delete mode 100755 scripts/test-integration-splunk.sh rename src/{event => config}/log_schema.rs (70%) create mode 100644 src/event/log_event.rs create mode 100644 src/event/value.rs create mode 100644 src/internal_events/dedupe.rs create mode 100644 src/internal_events/reduce.rs create mode 100644 src/internal_events/tokenizer.rs create mode 100644 tests/data/auto-concurrency-template.toml create mode 100644 tests/data/auto-concurrency/constant-link.toml create mode 100644 tests/data/auto-concurrency/defers-at-high-concurrency.toml create mode 100644 tests/data/auto-concurrency/drops-at-high-concurrency.toml create mode 100644 tests/data/auto-concurrency/fixed-concurrency.toml create mode 100644 tests/data/auto-concurrency/jittery-link-small.toml create mode 100644 tests/data/auto-concurrency/medium-send.toml create mode 100644 tests/data/auto-concurrency/slow-link.toml create mode 100644 tests/data/auto-concurrency/slow-send-1.toml create mode 100644 tests/data/auto-concurrency/slow-send-2.toml diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 6d7addcdbda18b..f2e2be11e28774 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -38,6 +38,8 @@ /scripts/release-helm.sh @MOZGIII /scripts/deploy-kubernetes-test.sh @MOZGIII @JeanMertz /scripts/test-e2e-kubernetes.sh @MOZGIII @JeanMertz +/scripts/kubernetes-yaml.sh @MOZGIII +/scripts/kubernetes-yaml/ @MOZGIII /src/metrics.rs @lukesteensen /src/main.rs @lukesteensen diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 7d794efc1b60eb..34cdf18967dd6c 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -40,11 +40,13 @@ on: env: VERBOSE: true + AUTOINSTALL: true RUST_TEST_THREADS: 1 TEST_LOG: vector=debug RUST_BACKTRACE: full AWS_ACCESS_KEY_ID: "dummy" AWS_SECRET_ACCESS_KEY: "dummy" + CONTAINER_TOOL: "docker" jobs: cancel-previous: @@ -152,7 +154,7 @@ jobs: KUBERNETES_VERSION: ${{ matrix.kubernetes_version.version }} MINIKUBE_VERSION: ${{ matrix.minikube_version }} CONTAINER_RUNTIME: ${{ matrix.container_runtime }} - - run: USE_CONTAINER=none make slim-builds + - run: make slim-builds - run: make test-e2e-kubernetes env: USE_MINIKUBE_CACHE: "true" diff --git a/.github/workflows/environment.yml b/.github/workflows/environment.yml index 3bf0b1d7bd3a35..8651562f75646c 100644 --- a/.github/workflows/environment.yml +++ b/.github/workflows/environment.yml @@ -4,6 +4,7 @@ on: push: branches: - master + workflow_dispatch: env: VERBOSE: true @@ -17,5 +18,11 @@ jobs: steps: - run: docker login https://docker.pkg.github.com -u ${{ github.actor }} -p ${{ secrets.GITHUB_TOKEN }} - uses: actions/checkout@v2 + - name: free disk space + run: | + sudo swapoff -a + sudo rm -f /swapfile + sudo apt clean + docker rmi $(docker image ls -aq) + df -h - run: make environment-prepare - - run: docker push docker.pkg.github.com/timberio/vector/environment:${{ github.sha }} diff --git a/.github/workflows/install-sh.yml b/.github/workflows/install-sh.yml index b5f95b15474a8c..56f98b0a37c187 100644 --- a/.github/workflows/install-sh.yml +++ b/.github/workflows/install-sh.yml @@ -14,5 +14,4 @@ jobs: - env: AWS_ACCESS_KEY_ID: "${{ secrets.CI_AWS_ACCESS_KEY_ID }}" AWS_SECRET_ACCESS_KEY: "${{ secrets.CI_AWS_SECRET_ACCESS_KEY }}" - USE_CONTAINER: none run: make sync-install diff --git a/.github/workflows/lints.yml b/.github/workflows/lints.yml index d03f6d92864084..6c9be40a8a7de2 100644 --- a/.github/workflows/lints.yml +++ b/.github/workflows/lints.yml @@ -13,6 +13,7 @@ env: RUST_BACKTRACE: full AWS_ACCESS_KEY_ID: "dummy" AWS_SECRET_ACCESS_KEY: "dummy" + CONTAINER_TOOL: "docker" jobs: cancel-previous: @@ -52,6 +53,7 @@ jobs: - run: make check-version - run: make check-scripts - run: make check-helm + - run: make check-kubernetes-yaml - uses: EmbarkStudios/cargo-deny-action@v1 with: command: check advisories diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index dbd6c5dd584f39..62c961eee771d8 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -6,7 +6,9 @@ on: - cron: "0 4 * * *" env: + AUTOINSTALL: true CHANNEL: nightly + VERBOSE: true jobs: build-x86_64-unknown-linux-musl-packages: @@ -73,7 +75,6 @@ jobs: - name: "Build archive" env: TARGET: "x86_64-apple-darwin" - USE_CONTAINER: none NATIVE_BUILD: true run: | export PATH="$HOME/.cargo/bin:$PATH" @@ -130,7 +131,6 @@ jobs: export RUST_LTO="" export TARGET="x86_64-pc-windows-msvc" export NATIVE_BUILD="true" - export USE_CONTAINER="none" make package-archive - name: "Build package" shell: bash @@ -173,7 +173,6 @@ jobs: DOCKER_USERNAME: "${{ secrets.CI_DOCKER_USERNAME }}" DOCKER_PASSWORD: "${{ secrets.CI_DOCKER_PASSWORD }}" PLATFORM: "linux/amd64,linux/arm64" - USE_CONTAINER: none run: | ./scripts/upgrade-docker.sh export VERSION=$(make version) @@ -232,7 +231,6 @@ jobs: - env: AWS_ACCESS_KEY_ID: "${{ secrets.CI_AWS_ACCESS_KEY_ID }}" AWS_SECRET_ACCESS_KEY: "${{ secrets.CI_AWS_SECRET_ACCESS_KEY }}" - USE_CONTAINER: none run: make release-s3 release-helm: @@ -249,5 +247,4 @@ jobs: env: AWS_ACCESS_KEY_ID: "${{ secrets.CI_AWS_ACCESS_KEY_ID }}" AWS_SECRET_ACCESS_KEY: "${{ secrets.CI_AWS_SECRET_ACCESS_KEY }}" - USE_CONTAINER: none run: make release-helm diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 0d88d25e137af9..93e4756ea14f55 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -6,6 +6,11 @@ on: - v0.* - v1.* + +env: + AUTOINSTALL: true + VERBOSE: true + jobs: build-x86_64-unknown-linux-musl-packages: runs-on: ubuntu-latest @@ -71,7 +76,6 @@ jobs: - name: "Build archive" env: TARGET: "x86_64-apple-darwin" - USE_CONTAINER: none NATIVE_BUILD: true run: | export PATH="$HOME/.cargo/bin:$PATH" @@ -95,7 +99,6 @@ jobs: export RUST_LTO="" export TARGET="x86_64-pc-windows-msvc" export NATIVE_BUILD="true" - export USE_CONTAINER="none" make package-archive - name: "Build package" shell: bash @@ -138,7 +141,6 @@ jobs: DOCKER_USERNAME: "${{ secrets.CI_DOCKER_USERNAME }}" DOCKER_PASSWORD: "${{ secrets.CI_DOCKER_PASSWORD }}" PLATFORM: "linux/amd64,linux/arm64,linux/arm" - USE_CONTAINER: none run: | ./scripts/upgrade-docker.sh export VERSION=$(make version) @@ -197,7 +199,6 @@ jobs: - env: AWS_ACCESS_KEY_ID: "${{ secrets.CI_AWS_ACCESS_KEY_ID }}" AWS_SECRET_ACCESS_KEY: "${{ secrets.CI_AWS_SECRET_ACCESS_KEY }}" - USE_CONTAINER: none run: make release-s3 release-github: @@ -329,5 +330,4 @@ jobs: env: AWS_ACCESS_KEY_ID: "${{ secrets.CI_AWS_ACCESS_KEY_ID }}" AWS_SECRET_ACCESS_KEY: "${{ secrets.CI_AWS_SECRET_ACCESS_KEY }}" - USE_CONTAINER: none run: make release-helm diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 640711b40edd21..acb7e620414ebb 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -36,11 +36,13 @@ on: env: VERBOSE: true + AUTOINSTALL: true RUST_TEST_THREADS: 1 TEST_LOG: vector=debug RUST_BACKTRACE: full AWS_ACCESS_KEY_ID: "dummy" AWS_SECRET_ACCESS_KEY: "dummy" + CONTAINER_TOOL: "docker" jobs: cancel-previous: @@ -54,7 +56,7 @@ jobs: access_token: ${{ secrets.GITHUB_TOKEN }} test-linux: - name: Unit - Linux + name: Unit - x86_64-unknown-linux-gnu runs-on: ubuntu-20.04 if: | !contains(github.event.pull_request.labels.*.name, 'ci-condition: skip') @@ -71,6 +73,46 @@ jobs: - run: make slim-builds - run: make test + # TODO: Upgrade to test once the tz issues are solved. + # https://github.com/timberio/vector/issues/3771 + build-x86_64-unknown-linux-gnu: + name: Unit - x86_64-unknown-linux-gnu + runs-on: ubuntu-20.04 + if: | + !contains(github.event.pull_request.labels.*.name, 'ci-condition: skip') + steps: + - uses: actions/checkout@v2 + - uses: actions/cache@v2 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + - run: sudo bash scripts/environment/bootstrap-ubuntu-20.04.sh + - run: bash scripts/environment/prepare.sh + - run: make slim-builds + - run: make build-x86_64-unknown-linux-gnu + + # TODO: Upgrade to test once the tz issues are solved. + # https://github.com/timberio/vector/issues/3771 + build-aarch64-unknown-linux-gnu: + name: Unit - aarch64-unknown-linux-gnu + runs-on: ubuntu-20.04 + if: | + !contains(github.event.pull_request.labels.*.name, 'ci-condition: skip') + steps: + - uses: actions/checkout@v2 + - uses: actions/cache@v2 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + - run: sudo bash scripts/environment/bootstrap-ubuntu-20.04.sh + - run: bash scripts/environment/prepare.sh + - run: make slim-builds + - run: make build-aarch64-unknown-linux-gnu + test-mac: name: Unit - Mac # Full CI suites for this platform were only recently introduced. @@ -103,7 +145,6 @@ jobs: steps: - uses: actions/checkout@v1 - env: - USE_CONTAINER: none RUSTFLAGS: "-D warnings" run: cargo test --no-default-features --features default-msvc diff --git a/.meta/sources/docker.toml.erb b/.meta/sources/docker.toml.erb index 401b19a8f00fc2..b5dbc87fba3730 100644 --- a/.meta/sources/docker.toml.erb +++ b/.meta/sources/docker.toml.erb @@ -81,6 +81,14 @@ incomplete message (i.e. partial events). If set to `""`, no field will be \ added to partial event. This allows to opt-out of partial event detection.\ """ +[sources.docker.options.retry_backoff_secs] +type = "uint" +common = false +default = 1 +description = """\ +The amount of time to wait before retrying after an error.\ +""" + [sources.docker.fields.log.fields.container_created_at] type = "timestamp" examples = ["2019-11-01T21:15:47.443232Z"] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5076fd14c16024..e6658e4b09e363 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -165,14 +165,24 @@ docs: fix typos #### Reviews & Approvals -All pull requests must be reviewed and approved by at least one Vector team -member. The review process is outlined in the [Review guide](REVIEWING.md). +All pull requests should be reviewed by: + +- No review required for cosmetic changes like whitespace, typos, and spelling + by a maintainer +- One Vector team member for minor changes or trivial changes from contributors +- Two Vector team members for major changes +- Three Vector team members for RFCs + +If there are any CODEOWNERs automatically assigned, you should also wait for +their review. + +The review process is outlined in the [Review guide](REVIEWING.md). #### Merge Style All pull requests are squashed and merged. We generally discourage large pull requests that are over 300-500 lines of diff. If you would like to propose a -change that is larger we suggest coming onto our gitter channel and discuss it +change that is larger we suggest coming onto our [Discord server](https://chat.vector.dev/) and discuss it with one of our engineers. This way we can talk through the solution and discuss if a change that large is even needed! This will produce a quicker response to the change and likely produce code that aligns better with our @@ -209,13 +219,13 @@ $ curl -u "$GITHUB_USERNAME:$GITHUB_TOKEN" \ We're super excited to have you interested in working on Vector! Before you start you should pick how you want to develop. -For small or first-time contributions, we recommend the Docker method. If you do a lot of contributing, try adopting the Nix method! It'll be way faster and feel more smooth. Prefer to do it yourself? That's fine too! +For small or first-time contributions, we recommend the Docker method. Prefer to do it yourself? That's fine too! #### Using a Docker or Podman environment > **Targets:** You can use this method to produce AARCH64, Arm6/7, as well as x86/64 Linux builds. -Since not everyone has a full working native environment, or can use Nix, we took our Nix environment and stuffed it into a Docker (or Podman) container! +Since not everyone has a full working native environment, we took our environment and stuffed it into a Docker (or Podman) container! This is ideal for users who want it to "Just work" and just want to start contributing. It's also what we use for our CI, so you know if it breaks we can't do anything else until we fix it. 😉 diff --git a/Cargo.toml b/Cargo.toml index a406ba1658383f..991c8585a3c4f3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,12 @@ publish = false [profile.bench] debug = true +[profile.release] +opt-level = 3 +debug = false +lto = "fat" +codegen-units = 1 + [package.metadata.deb] maintainer-scripts = "distribution/debian/scripts/" conf-files = ["/etc/vector/vector.toml"] @@ -211,6 +217,12 @@ default-cmake = ["sources", "transforms", "sinks", "vendor-all", "unix", "leveld # TODO: Enable SASL https://github.com/timberio/vector/pull/3081#issuecomment-659298042 default-msvc = ["sources", "transforms", "sinks", "vendor-openssl", "vendor-libz", "leveldb-cmake", "rdkafka-cmake"] +# Target specific release features. +# The `make` tasks will select this according to the appropriate triple. +# Use this section to turn off or on specific features for specific triples. +target-x86_64-unknown-linux-gnu = ["sources", "transforms", "sinks", "vendor-all", "unix", "leveldb-cmake", "rdkafka-cmake"] +target-aarch64-unknown-linux-gnu = ["sources", "transforms", "sinks", "vendor-openssl", "vendor-libz", "unix", "leveldb-cmake", "rdkafka-cmake"] + # Enables features that work only on systems providing `cfg(unix)` unix = ["jemallocator"] # These are **very** useful on Cross compilations! diff --git a/Makefile b/Makefile index 166a39d1841c6c..88c3d15b6d8cd8 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ -.PHONY: $(MAKECMDGOALS) all +# .PHONY: $(MAKECMDGOALS) all .DEFAULT_GOAL := help -RUN := $(shell realpath $(shell dirname $(firstword $(MAKEFILE_LIST)))/scripts/run.sh) +RUN := $(shell realpath $(shell dirname $(firstword $(MAKEFILE_LIST)))/scripts/docker-compose-run.sh) # Begin OS detection ifeq ($(OS),Windows_NT) # is Windows_NT on XP, 2000, 7, Vista, 10... @@ -19,13 +19,24 @@ export SCOPE ?= "" export AUTOSPAWN ?= true # Override to control if services are turned off after integration tests. export AUTODESPAWN ?= ${AUTOSPAWN} +# Override autoinstalling of tools. (Eg `cargo install`) +export AUTOINSTALL ?= false # Override to true for a bit more log output in your environment building (more coming!) export VERBOSE ?= false # Override to set a different Rust toolchain export RUST_TOOLCHAIN ?= $(shell cat rust-toolchain) -# Override the container tool. -# TODO: We're working on first class `podman` support for integration tests! We need to move away from compose though: https://github.com/containers/podman-compose/issues/125 -export CONTAINER_TOOL ?= docker +# Override the container tool. Tries docker first and then tries podman. +export CONTAINER_TOOL ?= auto +ifeq ($(CONTAINER_TOOL),auto) + override CONTAINER_TOOL = $(shell docker version >/dev/null 2>&1 && echo docker || echo podman) +endif +# If we're using podman create pods else if we're using docker create networks. +ifeq ($(CONTAINER_TOOL),podman) + export CONTAINER_ENCLOSURE = "pod" +else + export CONTAINER_ENCLOSURE = "network" +endif + # Override this to automatically enter a container containing the correct, full, official build environment for Vector, ready for development export ENVIRONMENT ?= false # The upstream container we publish artifacts to on a successful master build. @@ -33,20 +44,25 @@ export ENVIRONMENT_UPSTREAM ?= docker.pkg.github.com/timberio/vector/environment # Override to disable building the container, having it pull from the Github packages repo instead # TODO: Disable this by default. Blocked by `docker pull` from Github Packages requiring authenticated login export ENVIRONMENT_AUTOBUILD ?= true -# Override this when appropriate to disable a TTY being available in commands with `ENVIRONMENT=true` (Useful for CI, but CI uses Nix!) +# Override this when appropriate to disable a TTY being available in commands with `ENVIRONMENT=true` export ENVIRONMENT_TTY ?= true # A list of WASM modules by name export WASM_MODULES = $(patsubst tests/data/wasm/%/,%,$(wildcard tests/data/wasm/*/)) # The same WASM modules, by output path. export WASM_MODULE_OUTPUTS = $(patsubst %,/target/wasm32-wasi/%,$(WASM_MODULES)) - # Deprecated. -export USE_CONTAINER ?= $(CONTAINER_TOOL) +# Set dummy AWS credentials if not present - used for AWS and ES integration tests +export AWS_ACCESS_KEY_ID ?= "dummy" +export AWS_SECRET_ACCESS_KEY ?= "dummy" FORMATTING_BEGIN_YELLOW = \033[0;33m FORMATTING_BEGIN_BLUE = \033[36m FORMATTING_END = \033[0m +# "One weird trick!" https://www.gnu.org/software/make/manual/make.html#Syntax-of-Functions +EMPTY:= +SPACE:= ${EMPTY} ${EMPTY} + help: @printf -- "${FORMATTING_BEGIN_BLUE} __ __ __${FORMATTING_END}\n" @printf -- "${FORMATTING_BEGIN_BLUE} \ \ / / / /${FORMATTING_END}\n" @@ -56,7 +72,6 @@ help: @printf -- " V E C T O R\n" @printf -- "\n" @printf -- "---------------------------------------------------------------------------------------\n" - @printf -- "Nix user? You can use ${FORMATTING_BEGIN_YELLOW}\`direnv allow .\`${FORMATTING_END} or ${FORMATTING_BEGIN_YELLOW}\`nix-shell --pure\`${FORMATTING_END}\n" @printf -- "Want to use ${FORMATTING_BEGIN_YELLOW}\`docker\`${FORMATTING_END} or ${FORMATTING_BEGIN_YELLOW}\`podman\`${FORMATTING_END}? See ${FORMATTING_BEGIN_YELLOW}\`ENVIRONMENT=true\`${FORMATTING_END} commands. (Default ${FORMATTING_BEGIN_YELLOW}\`CONTAINER_TOOL=docker\`${FORMATTING_END})\n" @printf -- "\n" @awk 'BEGIN {FS = ":.*##"; printf "Usage: make ${FORMATTING_BEGIN_BLUE}${FORMATTING_END}\n"} /^[a-zA-Z0-9_-]+:.*?##/ { printf " ${FORMATTING_BEGIN_BLUE}%-46s${FORMATTING_END} %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) @@ -137,6 +152,10 @@ define ENVIRONMENT_PREPARE endef endif +check-container-tool: ## Checks what container tool is installed + @echo -n "Checking if $(CONTAINER_TOOL) is available..." && \ + $(CONTAINER_TOOL) version 1>/dev/null && echo "yes" + environment: export ENVIRONMENT_TTY = true ## Enter a full Vector dev shell in $CONTAINER_TOOL, binding this folder to the container. environment: ${ENVIRONMENT_EXEC} @@ -161,8 +180,11 @@ build-dev: ## Build the project in development mode (Supports `ENVIRONMENT=true` build-all: build-x86_64-unknown-linux-musl build-aarch64-unknown-linux-musl ## Build the project in release mode for all supported platforms -build-x86_64-unknown-linux-gnu: ## Build dynamically linked binary in release mode for the x86_64 architecture - $(RUN) build-x86_64-unknown-linux-gnu +build-x86_64-unknown-linux-gnu: target/x86_64-unknown-linux-gnu/release/vector ## Build a release binary for the x86_64-unknown-linux-gnu triple. + @echo "Output to ${<}" + +build-aarch64-unknown-linux-gnu: target/aarch64-unknown-linux-gnu/release/vector ## Build a release binary for the aarch64-unknown-linux-gnu triple. + @echo "Output to ${<}" build-x86_64-unknown-linux-musl: ## Build static binary in release mode for the x86_64 architecture $(RUN) build-x86_64-unknown-linux-musl @@ -170,12 +192,70 @@ build-x86_64-unknown-linux-musl: ## Build static binary in release mode for the build-aarch64-unknown-linux-musl: load-qemu-binfmt ## Build static binary in release mode for the aarch64 architecture $(RUN) build-aarch64-unknown-linux-musl +##@ Cross Compiling +.PHONY: cross-enable +cross-enable: cargo-install-cross + +.PHONY: CARGO_HANDLES_FRESHNESS +CARGO_HANDLES_FRESHNESS: + ${EMPTY} + +# This is basically a shorthand for folks. +# `cross-anything-triple` will call `cross anything --target triple` with the right features. +.PHONY: cross-% +cross-%: export PAIR =$(subst -, ,$($(strip @):cross-%=%)) +cross-%: export COMMAND ?=$(word 1,${PAIR}) +cross-%: export TRIPLE ?=$(subst ${SPACE},-,$(wordlist 2,99,${PAIR})) +cross-%: export PROFILE ?= release +cross-%: export RUSTFLAGS += -C link-arg=-s +cross-%: cargo-install-cross + cross ${COMMAND} \ + $(if $(findstring release,$(PROFILE)),--release,) \ + --target ${TRIPLE} \ + --no-default-features \ + --features target-${TRIPLE} + +target/%/vector: export PAIR =$(subst /, ,$(@:target/%/vector=%)) +target/%/vector: export TRIPLE ?=$(word 1,${PAIR}) +target/%/vector: export PROFILE ?=$(word 2,${PAIR}) +target/%/vector: export RUSTFLAGS += -C link-arg=-s +target/%/vector: cargo-install-cross CARGO_HANDLES_FRESHNESS + cross build \ + $(if $(findstring release,$(PROFILE)),--release,) \ + --target ${TRIPLE} \ + --no-default-features \ + --features target-${TRIPLE} + +target/%/vector.tar.gz: export PAIR =$(subst /, ,$(@:target/%/vector.tar.gz=%)) +target/%/vector.tar.gz: export TRIPLE ?=$(word 1,${PAIR}) +target/%/vector.tar.gz: export PROFILE ?=$(word 2,${PAIR}) +target/%/vector.tar.gz: target/%/vector CARGO_HANDLES_FRESHNESS + tar --create \ + --gzip \ + --verbose \ + --file target/${TRIPLE}/${PROFILE}/vector.tar.gz \ + --transform='s|target/${TRIPLE}/${PROFILE}/|bin/|' \ + --transform='s|distribution/|etc/|' \ + --transform 's|^|vector-${TRIPLE}/|' \ + target/${TRIPLE}/${PROFILE}/vector \ + README.md \ + LICENSE \ + config \ + distribution/init.d \ + distribution/systemd + ##@ Testing (Supports `ENVIRONMENT=true`) -test: ## Run the test suite +test: ## Run the unit test suite ${MAYBE_ENVIRONMENT_EXEC} cargo test --no-default-features --features ${DEFAULT_FEATURES} ${SCOPE} -- --nocapture -test-all: test-behavior test-integration test-unit ## Runs all tests, unit, behaviorial, and integration. +test-all: test test-behavior test-integration ## Runs all tests, unit, behaviorial, and integration. + +test-x86_64-unknown-linux-gnu: cross-test-x86_64-unknown-linux-gnu ## Runs unit tests on the x86_64-unknown-linux-gnu triple + ${EMPTY} + +test-aarch64-unknown-linux-gnu: cross-test-aarch64-unknown-linux-gnu ## Runs unit tests on the aarch64-unknown-linux-gnu triple + ${EMPTY} test-behavior: ## Runs behaviorial test ${MAYBE_ENVIRONMENT_EXEC} cargo run -- test tests/behavior/**/*.toml @@ -185,121 +265,387 @@ test-integration: test-integration-aws test-integration-clickhouse test-integrat test-integration: test-integration-gcp test-integration-influxdb test-integration-kafka test-integration-loki test-integration: test-integration-pulsar test-integration-splunk +start-test-integration: ## Starts all integration test infrastructure +start-test-integration: start-integration-aws start-integration-clickhouse start-integration-elasticsearch +start-test-integration: start-integration-gcp start-integration-influxdb start-integration-kafka start-integration-loki +start-test-integration: start-integration-pulsar start-integration-splunk + +stop-test-integration: ## Stops all integration test infrastructure +stop-test-integration: stop-integration-aws stop-integration-clickhouse stop-integration-elasticsearch +stop-test-integration: stop-integration-gcp stop-integration-influxdb stop-integration-kafka stop-integration-loki +stop-test-integration: stop-integration-pulsar stop-integration-splunk + +start-integration-aws: +ifeq ($(CONTAINER_TOOL),podman) + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) create --replace --name vector-test-integration-aws -p 8111:8111 -p 4568:4568 -p 4572:4572 -p 4582:4582 -p 4571:4571 -p 4573:4573 -p 6000:6000 + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-aws --name vector_ec2_metadata \ + timberiodev/mock-ec2-metadata:latest + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-aws --name vector_localstack_aws \ + -e SERVICES=kinesis:4568,s3:4572,cloudwatch:4582,elasticsearch:4571,firehose:4573 \ + localstack/localstack@sha256:f21f1fc770ee4bfd5012afdc902154c56b7fb18c14cf672de151b65569c8251e + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-aws --name vector_mockwatchlogs \ + -e RUST_LOG=trace luciofranco/mockwatchlogs:latest +else + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) create vector-test-integration-aws + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-aws -p 8111:8111 --name vector_ec2_metadata \ + timberiodev/mock-ec2-metadata:latest + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-aws -p 4568:4568 -p 4572:4572 \ + -p 4582:4582 -p 4571:4571 -p 4573:4573 --name vector_localstack_aws \ + -e SERVICES=kinesis:4568,s3:4572,cloudwatch:4582,elasticsearch:4571,firehose:4573 \ + localstack/localstack@sha256:f21f1fc770ee4bfd5012afdc902154c56b7fb18c14cf672de151b65569c8251e + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-aws -p 6000:6000 --name vector_mockwatchlogs \ + -e RUST_LOG=trace luciofranco/mockwatchlogs:latest +endif + +stop-integration-aws: + $(CONTAINER_TOOL) rm --force vector_ec2_metadata vector_mockwatchlogs vector_localstack_aws 2>/dev/null; true +ifeq ($(CONTAINER_TOOL),podman) + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) stop --name=vector-test-integration-aws 2>/dev/null; true + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) rm --force --name=vector-test-integration-aws 2>/dev/null; true +else + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) rm vector-test-integration-aws 2>/dev/null; true +endif + test-integration-aws: ## Runs AWS integration tests ifeq ($(AUTOSPAWN), true) - ${MAYBE_ENVIRONMENT_EXEC} $(CONTAINER_TOOL)-compose up -d dependencies-aws - sleep 5 # Many services are very lazy... Give them a sec... + -$(MAKE) -k stop-integration-aws + $(MAKE) start-integration-aws + sleep 5 # Many services are very slow... Give them a sec... endif ${MAYBE_ENVIRONMENT_EXEC} cargo test --no-default-features --features aws-integration-tests --lib ::aws_ -- --nocapture ifeq ($(AUTODESPAWN), true) - ${MAYBE_ENVIRONMENT_EXEC} $(CONTAINER_TOOL)-compose stop + $(MAKE) -k stop-integration-aws +endif + +start-integration-clickhouse: +ifeq ($(CONTAINER_TOOL),podman) + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) create --replace --name vector-test-integration-clickhouse -p 8123:8123 + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-clickhouse --name vector_clickhouse yandex/clickhouse-server:19 +else + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) create vector-test-integration-clickhouse + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-clickhouse -p 8123:8123 --name vector_clickhouse yandex/clickhouse-server:19 +endif + +stop-integration-clickhouse: + $(CONTAINER_TOOL) rm --force vector_clickhouse 2>/dev/null; true +ifeq ($(CONTAINER_TOOL),podman) + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) stop --name=vector-test-integration-clickhouse 2>/dev/null; true + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) rm --force --name vector-test-integration-clickhouse 2>/dev/null; true +else + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) rm vector-test-integration-clickhouse 2>/dev/null; true endif test-integration-clickhouse: ## Runs Clickhouse integration tests ifeq ($(AUTOSPAWN), true) - ${MAYBE_ENVIRONMENT_EXEC} $(CONTAINER_TOOL)-compose up -d dependencies-clickhouse - sleep 5 # Many services are very lazy... Give them a sec... + -$(MAKE) -k stop-integration-clickhouse + $(MAKE) start-integration-clickhouse + sleep 5 # Many services are very slow... Give them a sec... endif ${MAYBE_ENVIRONMENT_EXEC} cargo test --no-default-features --features clickhouse-integration-tests --lib ::clickhouse:: -- --nocapture ifeq ($(AUTODESPAWN), true) - ${MAYBE_ENVIRONMENT_EXEC} $(CONTAINER_TOOL)-compose stop + $(MAKE) -k stop-integration-clickhouse endif test-integration-docker: ## Runs Docker integration tests ${MAYBE_ENVIRONMENT_EXEC} cargo test --no-default-features --features docker-integration-tests --lib ::docker:: -- --nocapture +start-integration-elasticsearch: +ifeq ($(CONTAINER_TOOL),podman) + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) create --replace --name vector-test-integration-elasticsearch -p 4571:4571 -p 9200:9200 -p 9300:9300 -p 9201:9200 -p 9301:9300 + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-elasticsearch --name vector_localstack_es \ + -e SERVICES=elasticsearch:4571 localstack/localstack@sha256:f21f1fc770ee4bfd5012afdc902154c56b7fb18c14cf672de151b65569c8251e + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-elasticsearch \ + --name vector_elasticsearch -e discovery.type=single-node -e ES_JAVA_OPTS="-Xms400m -Xmx400m" elasticsearch:6.6.2 + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-elasticsearch \ + --name vector_elasticsearch-tls -e discovery.type=single-node -e xpack.security.enabled=true \ + -e xpack.security.http.ssl.enabled=true -e xpack.security.transport.ssl.enabled=true \ + -e xpack.ssl.certificate=certs/localhost.crt -e xpack.ssl.key=certs/localhost.key \ + -e ES_JAVA_OPTS="-Xms400m -Xmx400m" \ + -v $(PWD)/tests/data:/usr/share/elasticsearch/config/certs:ro elasticsearch:6.6.2 +else + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) create vector-test-integration-elasticsearch + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-elasticsearch -p 4571:4571 --name vector_localstack_es \ + -e SERVICES=elasticsearch:4571 localstack/localstack@sha256:f21f1fc770ee4bfd5012afdc902154c56b7fb18c14cf672de151b65569c8251e + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-elasticsearch -p 9200:9200 -p 9300:9300 \ + --name vector_elasticsearch -e discovery.type=single-node -e ES_JAVA_OPTS="-Xms400m -Xmx400m" elasticsearch:6.6.2 + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-elasticsearch -p 9201:9200 -p 9301:9300 \ + --name vector_elasticsearch-tls -e discovery.type=single-node -e xpack.security.enabled=true \ + -e xpack.security.http.ssl.enabled=true -e xpack.security.transport.ssl.enabled=true \ + -e xpack.ssl.certificate=certs/localhost.crt -e xpack.ssl.key=certs/localhost.key \ + -e ES_JAVA_OPTS="-Xms400m -Xmx400m" \ + -v $(PWD)/tests/data:/usr/share/elasticsearch/config/certs:ro elasticsearch:6.6.2 +endif + +stop-integration-elasticsearch: + $(CONTAINER_TOOL) rm --force vector_localstack_es vector_elasticsearch vector_elasticsearch-tls 2>/dev/null; true +ifeq ($(CONTAINER_TOOL),podman) + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) stop --name=vector-test-integration-elasticsearch 2>/dev/null; true + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) rm vector-test-integration-elasticsearch 2>/dev/null; true +else + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) rm vector-test-integration-elasticsearch 2>/dev/null; true +endif + test-integration-elasticsearch: ## Runs Elasticsearch integration tests ifeq ($(AUTOSPAWN), true) - ${MAYBE_ENVIRONMENT_EXEC} $(CONTAINER_TOOL)-compose up -d dependencies-elasticsearch - sleep 20 # Elasticsearch is incredibly slow to start up, be very generous... + -$(MAKE) -k stop-integration-elasticsearch + $(MAKE) start-integration-elasticsearch + sleep 60 # Many services are very slow... Give them a sec... endif ${MAYBE_ENVIRONMENT_EXEC} cargo test --no-default-features --features es-integration-tests --lib ::elasticsearch:: -- --nocapture ifeq ($(AUTODESPAWN), true) - ${MAYBE_ENVIRONMENT_EXEC} $(CONTAINER_TOOL)-compose stop + $(MAKE) -k stop-integration-elasticsearch +endif + +start-integration-gcp: +ifeq ($(CONTAINER_TOOL),podman) + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) create --replace --name vector-test-integration-gcp -p 8681-8682:8681-8682 + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-gcp --name vector_cloud-pubsub \ + -e PUBSUB_PROJECT1=testproject,topic1:subscription1 messagebird/gcloud-pubsub-emulator +else + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) create vector-test-integration-gcp + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-gcp -p 8681-8682:8681-8682 --name vector_cloud-pubsub \ + -e PUBSUB_PROJECT1=testproject,topic1:subscription1 messagebird/gcloud-pubsub-emulator +endif + +stop-integration-gcp: + $(CONTAINER_TOOL) rm --force vector_cloud-pubsub 2>/dev/null; true +ifeq ($(CONTAINER_TOOL),podman) + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) stop --name=vector-test-integration-gcp 2>/dev/null; true + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) rm --force --name vector-test-integration-gcp 2>/dev/null; true +else + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) rm vector-test-integration-gcp 2>/dev/null; true endif test-integration-gcp: ## Runs GCP integration tests ifeq ($(AUTOSPAWN), true) - ${MAYBE_ENVIRONMENT_EXEC} $(CONTAINER_TOOL)-compose up -d dependencies-gcp - sleep 5 # Many services are very lazy... Give them a sec... + -$(MAKE) -k stop-integration-gcp + $(MAKE) start-integration-gcp + sleep 10 # Many services are very slow... Give them a sec.. endif ${MAYBE_ENVIRONMENT_EXEC} cargo test --no-default-features --features gcp-integration-tests --lib ::gcp:: -- --nocapture ifeq ($(AUTODESPAWN), true) - ${MAYBE_ENVIRONMENT_EXEC} $(CONTAINER_TOOL)-compose stop + $(MAKE) -k stop-integration-gcp +endif + +start-integration-humio: +ifeq ($(CONTAINER_TOOL),podman) + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) create --replace --name vector-test-integration-humio -p 8080:8080 + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-humio --name vector_humio humio/humio:1.13.1 +else + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) create vector-test-integration-humio + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-humio -p 8080:8080 --name vector_humio humio/humio:1.13.1 +endif + +stop-integration-humio: + $(CONTAINER_TOOL) rm --force vector_humio 2>/dev/null; true +ifeq ($(CONTAINER_TOOL),podman) + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) stop --name=vector-test-integration-humio 2>/dev/null; true + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) rm --force --name vector-test-integration-humio 2>/dev/null; true +else + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) rm vector-test-integration-humio 2>/dev/null; true endif test-integration-humio: ## Runs Humio integration tests ifeq ($(AUTOSPAWN), true) - ${MAYBE_ENVIRONMENT_EXEC} $(CONTAINER_TOOL)-compose up -d dependencies-humio + -$(MAKE) -k stop-integration-humio + $(MAKE) start-integration-humio + sleep 10 # Many services are very slow... Give them a sec.. endif ${MAYBE_ENVIRONMENT_EXEC} cargo test --no-default-features --features humio-integration-tests --lib ::humio:: -- --nocapture ifeq ($(AUTODESPAWN), true) - ${MAYBE_ENVIRONMENT_EXEC} $(CONTAINER_TOOL)-compose stop + $(MAKE) -k stop-integration-humio endif +start-integration-influxdb: +ifeq ($(CONTAINER_TOOL),podman) + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) create --replace --name vector-test-integration-influxdb -p 8086:8086 -p 9999:9999 + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-influxdb --name vector_influxdb_v1 \ + -e INFLUXDB_REPORTING_DISABLED=true influxdb:1.7 + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-influxdb --name vector_influxdb_v2 \ + -e INFLUXDB_REPORTING_DISABLED=true quay.io/influxdb/influxdb:2.0.0-beta influxd --reporting-disabled +else + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) create vector-test-integration-influxdb + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-influxdb -p 8086:8086 --name vector_influxdb_v1 \ + -e INFLUXDB_REPORTING_DISABLED=true influxdb:1.7 + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-influxdb -p 9999:9999 --name vector_influxdb_v2 \ + -e INFLUXDB_REPORTING_DISABLED=true quay.io/influxdb/influxdb:2.0.0-beta influxd --reporting-disabled +endif + +stop-integration-influxdb: + $(CONTAINER_TOOL) rm --force vector_influxdb_v1 vector_influxdb_v2 2>/dev/null; true +ifeq ($(CONTAINER_TOOL),podman) + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) stop --name=vector-test-integration-influxdb 2>/dev/null; true + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) rm --force --name vector-test-integration-influxdb 2>/dev/null; true +else + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) rm vector-test-integration-influxdb 2>/dev/null; true +endif test-integration-influxdb: ## Runs InfluxDB integration tests ifeq ($(AUTOSPAWN), true) - ${MAYBE_ENVIRONMENT_EXEC} $(CONTAINER_TOOL)-compose up -d dependencies-influxdb - sleep 5 # Many services are very lazy... Give them a sec... + -$(MAKE) -k stop-integration-influxdb + $(MAKE) start-integration-influxdb + sleep 10 # Many services are very slow... Give them a sec.. endif ${MAYBE_ENVIRONMENT_EXEC} cargo test --no-default-features --features influxdb-integration-tests --lib ::influxdb::integration_tests:: -- --nocapture ifeq ($(AUTODESPAWN), true) - ${MAYBE_ENVIRONMENT_EXEC} $(CONTAINER_TOOL)-compose stop + $(MAKE) -k stop-integration-influxdb +endif + +start-integration-kafka: +ifeq ($(CONTAINER_TOOL),podman) + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) create --replace --name vector-test-integration-kafka -p 2181:2181 -p 9091-9093:9091-9093 + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-kafka --name vector_zookeeper wurstmeister/zookeeper + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-kafka -e KAFKA_BROKER_ID=1 \ + -e KAFKA_ZOOKEEPER_CONNECT=vector_zookeeper:2181 -e KAFKA_LISTENERS=PLAINTEXT://:9091,SSL://:9092,SASL_PLAINTEXT://:9093 \ + -e KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://localhost:9091,SSL://localhost:9092,SASL_PLAINTEXT://localhost:9093 \ + -e KAFKA_SSL_KEYSTORE_LOCATION=/certs/localhost.p12 -e KAFKA_SSL_KEYSTORE_PASSWORD=NOPASS \ + -e KAFKA_SSL_TRUSTSTORE_LOCATION=/certs/localhost.p12 -e KAFKA_SSL_TRUSTSTORE_PASSWORD=NOPASS \ + -e KAFKA_SSL_KEY_PASSWORD=NOPASS -e KAFKA_SSL_ENDPOINT_IDENTIFICATION_ALGORITHM=none \ + -e KAFKA_OPTS="-Djava.security.auth.login.config=/etc/kafka/kafka_server_jaas.conf" \ + -e KAFKA_INTER_BROKER_LISTENER_NAME=SASL_PLAINTEXT -e KAFKA_SASL_ENABLED_MECHANISMS=PLAIN \ + -e KAFKA_SASL_MECHANISM_INTER_BROKER_PROTOCOL=PLAIN -v $(PWD)/tests/data/localhost.p12:/certs/localhost.p12:ro \ + -v $(PWD)/tests/data/kafka_server_jaas.conf:/etc/kafka/kafka_server_jaas.conf --name vector_kafka wurstmeister/kafka +else + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) create vector-test-integration-kafka + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-kafka -p 2181:2181 --name vector_zookeeper wurstmeister/zookeeper + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-kafka -p 9091-9093:9091-9093 -e KAFKA_BROKER_ID=1 \ + -e KAFKA_ZOOKEEPER_CONNECT=vector_zookeeper:2181 -e KAFKA_LISTENERS=PLAINTEXT://:9091,SSL://:9092,SASL_PLAINTEXT://:9093 \ + -e KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://localhost:9091,SSL://localhost:9092,SASL_PLAINTEXT://localhost:9093 \ + -e KAFKA_SSL_KEYSTORE_LOCATION=/certs/localhost.p12 -e KAFKA_SSL_KEYSTORE_PASSWORD=NOPASS \ + -e KAFKA_SSL_TRUSTSTORE_LOCATION=/certs/localhost.p12 -e KAFKA_SSL_TRUSTSTORE_PASSWORD=NOPASS \ + -e KAFKA_SSL_KEY_PASSWORD=NOPASS -e KAFKA_SSL_ENDPOINT_IDENTIFICATION_ALGORITHM=none \ + -e KAFKA_OPTS="-Djava.security.auth.login.config=/etc/kafka/kafka_server_jaas.conf" \ + -e KAFKA_INTER_BROKER_LISTENER_NAME=SASL_PLAINTEXT -e KAFKA_SASL_ENABLED_MECHANISMS=PLAIN \ + -e KAFKA_SASL_MECHANISM_INTER_BROKER_PROTOCOL=PLAIN -v $(PWD)/tests/data/localhost.p12:/certs/localhost.p12:ro \ + -v $(PWD)/tests/data/kafka_server_jaas.conf:/etc/kafka/kafka_server_jaas.conf --name vector_kafka wurstmeister/kafka +endif + +stop-integration-kafka: + $(CONTAINER_TOOL) rm --force vector_kafka vector_zookeeper 2>/dev/null; true +ifeq ($(CONTAINER_TOOL),podman) + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) stop --name=vector-test-integration-kafka 2>/dev/null; true + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) rm --force --name vector-test-integration-kafka 2>/dev/null; true +else + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) rm vector-test-integration-kafka 2>/dev/null; true endif test-integration-kafka: ## Runs Kafka integration tests ifeq ($(AUTOSPAWN), true) - ${MAYBE_ENVIRONMENT_EXEC} $(CONTAINER_TOOL)-compose up -d dependencies-kafka - sleep 5 # Many services are very lazy... Give them a sec... + -$(MAKE) -k stop-integration-kafka + $(MAKE) start-integration-kafka + sleep 10 # Many services are very slow... Give them a sec.. endif ${MAYBE_ENVIRONMENT_EXEC} cargo test --no-default-features --features "kafka-integration-tests rdkafka-plain" --lib ::kafka:: -- --nocapture ifeq ($(AUTODESPAWN), true) - ${MAYBE_ENVIRONMENT_EXEC} $(CONTAINER_TOOL)-compose stop + $(MAKE) -k stop-integration-kafka +endif + +start-integration-loki: +ifeq ($(CONTAINER_TOOL),podman) + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) create --replace --name vector-test-integration-loki -p 3100:3100 + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-loki -v $(PWD)/tests/data:/etc/loki \ + --name vector_loki grafana/loki:master -config.file=/etc/loki/loki-config.yaml +else + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) create vector-test-integration-loki + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-loki -p 3100:3100 -v $(PWD)/tests/data:/etc/loki \ + --name vector_loki grafana/loki:master -config.file=/etc/loki/loki-config.yaml +endif + +stop-integration-loki: + $(CONTAINER_TOOL) rm --force vector_loki 2>/dev/null; true +ifeq ($(CONTAINER_TOOL),podman) + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) stop --name=vector-test-integration-loki 2>/dev/null; true + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) rm --force --name vector-test-integration-loki 2>/dev/null; true +else + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) rm vector-test-integration-loki 2>/dev/null; true endif test-integration-loki: ## Runs Loki integration tests ifeq ($(AUTOSPAWN), true) - ${MAYBE_ENVIRONMENT_EXEC} $(CONTAINER_TOOL)-compose up -d dependencies-loki - sleep 5 # Many services are very lazy... Give them a sec... + -$(MAKE) -k stop-integration-loki + $(MAKE) start-integration-loki + sleep 10 # Many services are very slow... Give them a sec.. endif ${MAYBE_ENVIRONMENT_EXEC} cargo test --no-default-features --features loki-integration-tests --lib ::loki:: -- --nocapture ifeq ($(AUTODESPAWN), true) - ${MAYBE_ENVIRONMENT_EXEC} $(CONTAINER_TOOL)-compose stop + $(MAKE) -k stop-integration-loki +endif + +start-integration-pulsar: +ifeq ($(CONTAINER_TOOL),podman) + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) create --replace --name vector-test-integration-pulsar -p 6650:6650 + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-pulsar --name vector_pulsar \ + apachepulsar/pulsar bin/pulsar standalone +else + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) create vector-test-integration-pulsar + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-pulsar -p 6650:6650 --name vector_pulsar \ + apachepulsar/pulsar bin/pulsar standalone +endif + +stop-integration-pulsar: + $(CONTAINER_TOOL) rm --force vector_pulsar 2>/dev/null; true +ifeq ($(CONTAINER_TOOL),podman) + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) stop --name=vector-test-integration-pulsar 2>/dev/null; true + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) rm --force --name vector-test-integration-pulsar 2>/dev/null; true +else + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) rm vector-test-integration-pulsar 2>/dev/null; true endif test-integration-pulsar: ## Runs Pulsar integration tests ifeq ($(AUTOSPAWN), true) - ${MAYBE_ENVIRONMENT_EXEC} $(CONTAINER_TOOL)-compose up -d dependencies-pulsar - sleep 5 # Many services are very lazy... Give them a sec... + -$(MAKE) -k stop-integration-pulsar + $(MAKE) start-integration-pulsar + sleep 10 # Many services are very slow... Give them a sec.. endif ${MAYBE_ENVIRONMENT_EXEC} cargo test --no-default-features --features pulsar-integration-tests --lib ::pulsar:: -- --nocapture ifeq ($(AUTODESPAWN), true) - ${MAYBE_ENVIRONMENT_EXEC} $(CONTAINER_TOOL)-compose stop + $(MAKE) -k stop-integration-pulsar +endif + +start-integration-splunk: +# TODO Replace timberio/splunk-hec-test:minus_compose image with production image once merged +ifeq ($(CONTAINER_TOOL),podman) + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) create --replace --name vector-test-integration-splunk -p 8088:8088 -p 8000:8000 -p 8089:8089 + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-splunk \ + --name splunk timberio/splunk-hec-test:minus_compose +else + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) create vector-test-integration-splunk + $(CONTAINER_TOOL) run -d --$(CONTAINER_ENCLOSURE)=vector-test-integration-splunk -p 8088:8088 -p 8000:8000 -p 8089:8089 \ + --name splunk timberio/splunk-hec-test:minus_compose +endif + +stop-integration-splunk: + $(CONTAINER_TOOL) rm --force splunk 2>/dev/null; true +ifeq ($(CONTAINER_TOOL),podman) + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) stop --name=vector-test-integration-splunk 2>/dev/null; true + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) rm --force --name vector-test-integration-splunk 2>/dev/null; true +else + $(CONTAINER_TOOL) $(CONTAINER_ENCLOSURE) rm vector-test-integration-splunk 2>/dev/null; true endif test-integration-splunk: ## Runs Splunk integration tests ifeq ($(AUTOSPAWN), true) - ${MAYBE_ENVIRONMENT_EXEC} $(CONTAINER_TOOL)-compose up -d dependencies-splunk - sleep 5 # Many services are very lazy... Give them a sec... + -$(MAKE) -k stop-integration-splunk + $(MAKE) start-integration-splunk + sleep 10 # Many services are very slow... Give them a sec.. endif ${MAYBE_ENVIRONMENT_EXEC} cargo test --no-default-features --features splunk-integration-tests --lib ::splunk_hec:: -- --nocapture ifeq ($(AUTODESPAWN), true) - ${MAYBE_ENVIRONMENT_EXEC} $(CONTAINER_TOOL)-compose stop + $(MAKE) -k stop-integration-splunk endif -PACKAGE_DEB_USE_CONTAINER ?= $(USE_CONTAINER) test-e2e-kubernetes: ## Runs Kubernetes E2E tests (Sorry, no `ENVIRONMENT=true` support) - PACKAGE_DEB_USE_CONTAINER="$(PACKAGE_DEB_USE_CONTAINER)" scripts/test-e2e-kubernetes.sh + @scripts/test-e2e-kubernetes.sh test-shutdown: ## Runs shutdown tests ifeq ($(AUTOSPAWN), true) - ${MAYBE_ENVIRONMENT_EXEC} $(CONTAINER_TOOL)-compose up -d dependencies-kafka - sleep 5 # Many services are very lazy... Give them a sec... + -$(MAKE) -k stop-integration-kafka + $(MAKE) start-integration-kafka + sleep 30 # Many services are very slow... Give them a sec.. endif ${MAYBE_ENVIRONMENT_EXEC} cargo test --no-default-features --features shutdown-tests --test shutdown -- --test-threads 4 ifeq ($(AUTODESPAWN), true) - ${MAYBE_ENVIRONMENT_EXEC} $(CONTAINER_TOOL)-compose stop + $(MAKE) -k stop-integration-kafka endif test-cli: ## Runs cli tests @@ -339,7 +685,10 @@ bench-wasm: $(WASM_MODULE_OUTPUTS) ### Run WASM benches check: ## Run prerequisite code checks ${MAYBE_ENVIRONMENT_EXEC} cargo check --all --no-default-features --features ${DEFAULT_FEATURES} -check-all: check-fmt check-clippy check-style check-markdown check-meta check-version check-examples check-component-features check-scripts ## Check everything +check-all: ## Check everything +check-all: check-fmt check-clippy check-style check-markdown check-meta +check-all: check-version check-examples check-component-features +check-all: check-scripts check-kubernetes-yaml check-component-features: ## Check that all component features are setup properly ${MAYBE_ENVIRONMENT_EXEC} ./scripts/check-component-features.sh @@ -371,6 +720,9 @@ check-scripts: ## Check that scipts do not have common mistakes check-helm: ## Check that the Helm Chart passes helm lint ${MAYBE_ENVIRONMENT_EXEC} helm lint distribution/helm/vector +check-kubernetes-yaml: ## Check that the generated Kubernetes YAML config is up to date + ${MAYBE_ENVIRONMENT_EXEC} ./scripts/kubernetes-yaml.sh check + ##@ Packaging package-all: package-archive-all package-deb-all package-rpm-all ## Build all packages @@ -385,21 +737,39 @@ package-x86_64-unknown-linux-gnu-all: package-archive-x86_64-unknown-linux-gnu p package-aarch64-unknown-linux-musl-all: package-archive-aarch64-unknown-linux-musl package-deb-aarch64 package-rpm-aarch64 # Build all aarch64 MUSL packages # archives +.PHONY: package-archive + +target/artifacts/vector-%.tar.gz: export TRIPLE :=$(@:target/artifacts/vector-%.tar.gz=%) +target/artifacts/vector-%.tar.gz: target/%/release/vector.tar.gz + @echo "Built to ${<}, relocating to ${@}" + @mkdir -p target/artifacts/ + @cp -v \ + ${<} \ + ${@} package-archive: build ## Build the Vector archive - $(RUN) package-archive + ${MAYBE_ENVIRONMENT_EXEC} ./scripts/package-archive.sh +.PHONY: package-archive-all package-archive-all: package-archive-x86_64-unknown-linux-musl package-archive-x86_64-unknown-linux-gnu package-archive-aarch64-unknown-linux-musl ## Build all archives +.PHONY: package-archive-x86_64-unknown-linux-musl package-archive-x86_64-unknown-linux-musl: build-x86_64-unknown-linux-musl ## Build the x86_64 archive $(RUN) package-archive-x86_64-unknown-linux-musl -package-archive-x86_64-unknown-linux-gnu: build-x86_64-unknown-linux-gnu ## Build the x86_64 archive - $(RUN) package-archive-x86_64-unknown-linux-gnu +.PHONY: package-archive-x86_64-unknown-linux-gnu +package-archive-x86_64-unknown-linux-gnu: target/artifacts/vector-x86_64-unknown-linux-gnu.tar.gz ## Build an archive of the x86_64-unknown-linux-gnu triple. + @echo "Output to ${<}." -package-archive-aarch64-unknown-linux-musl: build-aarch64-unknown-linux-musl ## Build the aarch64 archive +.PHONY: package-archive-aarch64-unknown-linux-musl +package-archive-aarch64-unknown-linux-musl: build-aarch64-unknown-linux-musl ## Build an archive of the aarch64-unknown-linux-gnu triple. $(RUN) package-archive-aarch64-unknown-linux-musl +.PHONY: package-archive-aarch64-unknown-linux-gnu +package-archive-aarch64-unknown-linux-gnu: target/artifacts/vector-aarch64-unknown-linux-gnu.tar.gz ## Build the aarch64 archive + @echo "Output to ${<}." + + # debs package-deb: ## Build the deb package @@ -500,9 +870,6 @@ verify-deb-artifact-on-ubuntu-18-04: package-deb-x86_64 ## Verify the deb packag verify-deb-artifact-on-ubuntu-20-04: package-deb-x86_64 ## Verify the deb package on Ubuntu 20.04 $(RUN) verify-deb-artifact-on-ubuntu-20-04 -verify-nixos: ## Verify that Vector can be built on NixOS - $(RUN) verify-nixos - ##@ Utility build-ci-docker-images: ## Rebuilds all Docker images used in CI @@ -536,6 +903,13 @@ version: ## Get the current Vector version git-hooks: ## Add Vector-local git hooks for commit sign-off @scripts/install-git-hooks.sh +update-kubernetes-yaml: ## Regenerate the Kubernetes YAML config + ${MAYBE_ENVIRONMENT_EXEC} ./scripts/kubernetes-yaml.sh update + +cargo-install-%: override TOOL = $(@:cargo-install-%=%) +cargo-install-%: + $(if $(findstring true,$(AUTOINSTALL)),${MAYBE_ENVIRONMENT_EXEC} cargo install ${TOOL} --quiet,) + .PHONY: ensure-has-wasm-toolchain ### Configures a wasm toolchain for test artifact building, if required ensure-has-wasm-toolchain: target/wasm32-wasi/.obtained target/wasm32-wasi/.obtained: @@ -544,4 +918,3 @@ target/wasm32-wasi/.obtained: ${MAYBE_ENVIRONMENT_EXEC} rustup target add wasm32-wasi @mkdir -p target/wasm32-wasi @touch target/wasm32-wasi/.obtained - diff --git a/benches/bench.rs b/benches/bench.rs index 64dd4d059f02c8..0c1713ec1b102b 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -15,7 +15,7 @@ use vector::transforms::{ Transform, }; use vector::{ - config::{self, TransformConfig, TransformContext}, + config::{self, log_schema, TransformConfig, TransformContext}, event::Event, sinks, sources, test_util::{next_addr, runtime, send_lines, start_topology, wait_for_tcp, CountReceiver}, @@ -638,7 +638,7 @@ fn benchmark_complex(c: &mut Criterion) { fn bench_elasticsearch_index(c: &mut Criterion) { use chrono::Utc; - use vector::{event, template::Template}; + use vector::template::Template; c.bench( "elasticsearch_indexes", @@ -648,7 +648,7 @@ fn bench_elasticsearch_index(c: &mut Criterion) { let mut event = Event::from("hello world"); event .as_mut_log() - .insert(event::log_schema().timestamp_key().clone(), Utc::now()); + .insert(log_schema().timestamp_key().clone(), Utc::now()); (Template::try_from("index-%Y.%m.%d").unwrap(), event) }, @@ -665,7 +665,7 @@ fn bench_elasticsearch_index(c: &mut Criterion) { let mut event = Event::from("hello world"); event .as_mut_log() - .insert(event::log_schema().timestamp_key().clone(), Utc::now()); + .insert(log_schema().timestamp_key().clone(), Utc::now()); (Template::try_from("index").unwrap(), event) }, diff --git a/benches/event.rs b/benches/event.rs index 794f71be34de19..e7352db9e5b82d 100644 --- a/benches/event.rs +++ b/benches/event.rs @@ -2,7 +2,8 @@ use bytes::Bytes; use criterion::{criterion_group, Criterion}; use serde_json::{json, Value}; use vector::{ - event::{self, Event, LogEvent}, + config::log_schema, + event::{Event, LogEvent}, transforms::{ json_parser::{JsonParser, JsonParserConfig}, Transform, @@ -88,7 +89,7 @@ fn create_event(json: Value) -> LogEvent { let mut event = Event::new_empty_log(); event .as_mut_log() - .insert(event::log_schema().message_key().clone(), s); + .insert(log_schema().message_key().clone(), s); let mut parser = JsonParser::from(JsonParserConfig::default()); parser.transform(event).unwrap().into_log() diff --git a/distribution/debian/scripts/postinst b/distribution/debian/scripts/postinst new file mode 100755 index 00000000000000..173eb54147ea60 --- /dev/null +++ b/distribution/debian/scripts/postinst @@ -0,0 +1,7 @@ +#!/bin/sh +set -e + +if command -v systemctl >/dev/null 2>&1 +then + systemctl daemon-reload +fi diff --git a/distribution/debian/scripts/preinst b/distribution/debian/scripts/preinst old mode 100644 new mode 100755 diff --git a/distribution/helm/vector/templates/configmap.yaml b/distribution/helm/vector/templates/configmap.yaml index ecc145ee24e0fa..3e99a3fa2d3fdb 100644 --- a/distribution/helm/vector/templates/configmap.yaml +++ b/distribution/helm/vector/templates/configmap.yaml @@ -6,7 +6,13 @@ metadata: labels: {{- include "vector.labels" . | nindent 4 }} data: - vector.toml: | + # We leave `vector.toml` file name available to let externally managed config + # maps to provide it. + managed.toml: | + # Configuration for vector. + # Docs: https://vector.dev/docs/ + + # Data dir is location controlled at the `DaemonSet`. data_dir = "{{ .Values.globalOptions.dataDir }}" {{- with .Values.logSchema }} @@ -18,6 +24,7 @@ data: {{- end }} {{- if .Values.kubernetesLogsSource.enabled }} + # Ingest logs from Kubernetes. [sources.{{ .Values.kubernetesLogsSource.sourceId }}] type = "kubernetes_logs" diff --git a/distribution/helm/vector/templates/daemonset.yaml b/distribution/helm/vector/templates/daemonset.yaml index 98d270d63cbe96..a5c6415555d862 100644 --- a/distribution/helm/vector/templates/daemonset.yaml +++ b/distribution/helm/vector/templates/daemonset.yaml @@ -37,6 +37,9 @@ spec: {{- toYaml .Values.securityContext | nindent 12 }} image: "{{ .Values.image.repository }}:{{ include "vector.imageTag" . }}" imagePullPolicy: "{{ .Values.image.pullPolicy }}" + args: + - --config + - /etc/vector/*.toml env: - name: VECTOR_SELF_NODE_NAME valueFrom: @@ -71,8 +74,8 @@ spec: - name: config-dir mountPath: /etc/vector readOnly: true - # Extra volumes. {{- with .Values.extraVolumeMounts }} + # Extra volumes. {{- toYaml . | nindent 12 }} {{- end }} terminationGracePeriodSeconds: 60 @@ -103,9 +106,21 @@ spec: path: /var/lib/vector/ # Vector config dir with a managed config map. - name: config-dir - configMap: - name: {{ include "vector.configMapName" . }} - # Extra volumes. + projected: + sources: + # Managed `ConfigMap`. + - configMap: + name: {{ include "vector.configMapName" . }} + optional: true + # Custom `ConfigMap`. + - configMap: + name: vector-config + optional: true + # Custom `Secret`. + - secret: + name: vector-config + optional: true {{- with .Values.extraVolumes }} + # Extra volumes. {{- toYaml . | nindent 8 }} {{- end }} diff --git a/distribution/helm/vector/templates/rbac.yaml b/distribution/helm/vector/templates/rbac.yaml index 6524384767bc3b..9143b1ddedecf5 100644 --- a/distribution/helm/vector/templates/rbac.yaml +++ b/distribution/helm/vector/templates/rbac.yaml @@ -1,4 +1,6 @@ {{- if .Values.rbac.enabled -}} +# Permissions to use Kubernetes API. +# Requires that RBAC authorization is enabled. apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: diff --git a/distribution/kubernetes/kustomization.yaml b/distribution/kubernetes/kustomization.yaml index 429692c75e4247..8ac8670ade83d3 100644 --- a/distribution/kubernetes/kustomization.yaml +++ b/distribution/kubernetes/kustomization.yaml @@ -1,5 +1,4 @@ namespace: vector resources: - - vector-global.yaml - - vector-namespaced.yaml + - vector.yaml diff --git a/distribution/kubernetes/vector-global.yaml b/distribution/kubernetes/vector-global.yaml deleted file mode 100644 index e391bce43829e7..00000000000000 --- a/distribution/kubernetes/vector-global.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Permissions to use Kubernetes API. -# Requires that RBAC authorization is enabled. -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: vector -subjects: - - kind: ServiceAccount - name: default - namespace: vector -roleRef: - kind: ClusterRole - name: view - apiGroup: rbac.authorization.k8s.io diff --git a/distribution/kubernetes/vector-namespaced.yaml b/distribution/kubernetes/vector-namespaced.yaml deleted file mode 100644 index c92e2bc33c5cb3..00000000000000 --- a/distribution/kubernetes/vector-namespaced.yaml +++ /dev/null @@ -1,100 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: vector-daemonset-managed-config -data: - managed.toml: | - # Configuration for vector. - # Docs: https://vector.dev/docs/ - - # Data dir is location controlled at the `DaemonSet`. - data_dir = "/vector-data-dir" - - # Ingest logs from Kubernetes. - [sources.kubernetes_logs] - type = "kubernetes_logs" ---- -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: vector - labels: - k8s-app: vector -spec: - selector: - matchLabels: - name: vector - template: - metadata: - labels: - name: vector - vector.dev/exclude: "true" - spec: - containers: - - name: vector - image: timberio/vector:latest-alpine - args: - - --config - - /etc/vector/*.toml - env: - - name: VECTOR_SELF_NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: VECTOR_SELF_POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: VECTOR_SELF_POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - # Set a reasonable log level to avoid issues with internal logs - # overwriting console output at E2E tests. Feel free to change at - # a real deployment. - - name: LOG - value: info - volumeMounts: - - name: var-log - mountPath: /var/log/ - readOnly: true - - name: var-lib - mountPath: /var/lib/ - readOnly: true - - name: data-dir - mountPath: /vector-data-dir/ - - name: config-dir - mountPath: /etc/vector/ - readOnly: true - terminationGracePeriodSeconds: 60 - tolerations: - # This toleration is to have the daemonset runnable on master nodes. - # Remove it if your masters can't run pods. - - key: node-role.kubernetes.io/master - effect: NoSchedule - volumes: - # Log directory. - - name: var-log - hostPath: - path: /var/log/ - # Docker and containerd log files in Kubernetes are symlinks to this folder. - - name: var-lib - hostPath: - path: /var/lib/ - # Vector will store it's data here. - - name: data-dir - hostPath: - path: /var/lib/vector/ - # Vector config files. - - name: config-dir - projected: - sources: - - configMap: - name: vector-daemonset-managed-config - optional: true - - configMap: - name: vector-config - optional: true - - secret: - name: vector-config - optional: true diff --git a/distribution/kubernetes/vector.yaml b/distribution/kubernetes/vector.yaml new file mode 100644 index 00000000000000..91da12a2c67ec1 --- /dev/null +++ b/distribution/kubernetes/vector.yaml @@ -0,0 +1,188 @@ +# This file is generated from the Helm Chart by "scripts/kubernetes-yaml.sh". +# You might want to use the Helm Chart, see "distribution/helm/vector" or the +# documentation on our website at https://vector.dev/docs. +# If you copied this file into you local setup - feel free to change it however +# you please. +# If you want to create a PR to the Vector's Kubernetes config - please do not +# edit this file directly. Instead, apply your changes to the Helm Chart. +--- +# Source: vector/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: vector + labels: + helm.sh/chart: vector-0.0.0 + app.kubernetes.io/name: vector + app.kubernetes.io/instance: vector + app.kubernetes.io/version: "0.0.0" + app.kubernetes.io/managed-by: Helm +--- +# Source: vector/templates/configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: vector + labels: + helm.sh/chart: vector-0.0.0 + app.kubernetes.io/name: vector + app.kubernetes.io/instance: vector + app.kubernetes.io/version: "0.0.0" + app.kubernetes.io/managed-by: Helm +data: + # We leave `vector.toml` file name available to let externally managed config + # maps to provide it. + managed.toml: | + # Configuration for vector. + # Docs: https://vector.dev/docs/ + + # Data dir is location controlled at the `DaemonSet`. + data_dir = "/vector-data-dir" + [log_schema] + host_key = "host" + message_key = "message" + source_type_key = "source_type" + timestamp_key = "timestamp" + # Ingest logs from Kubernetes. + [sources.kubernetes_logs] + type = "kubernetes_logs" +--- +# Source: vector/templates/rbac.yaml +# Permissions to use Kubernetes API. +# Requires that RBAC authorization is enabled. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: vector +rules: + - apiGroups: + - "" + resources: + - pods + verbs: + - watch +--- +# Source: vector/templates/rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: vector + labels: + helm.sh/chart: vector-0.0.0 + app.kubernetes.io/name: vector + app.kubernetes.io/instance: vector + app.kubernetes.io/version: "0.0.0" + app.kubernetes.io/managed-by: Helm +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: vector +subjects: + - kind: ServiceAccount + name: vector + namespace: vector +--- +# Source: vector/templates/daemonset.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: vector + labels: + helm.sh/chart: vector-0.0.0 + app.kubernetes.io/name: vector + app.kubernetes.io/instance: vector + app.kubernetes.io/version: "0.0.0" + app.kubernetes.io/managed-by: Helm +spec: + selector: + matchLabels: + app.kubernetes.io/name: vector + app.kubernetes.io/instance: vector + minReadySeconds: 1 + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + app.kubernetes.io/name: vector + app.kubernetes.io/instance: vector + vector.dev/exclude: "true" + spec: + serviceAccountName: vector + securityContext: + {} + containers: + - name: vector + securityContext: + {} + image: "timberio/vector:latest-alpine" + imagePullPolicy: "IfNotPresent" + args: + - --config + - /etc/vector/*.toml + env: + - name: VECTOR_SELF_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: VECTOR_SELF_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: VECTOR_SELF_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: LOG + value: info + resources: + {} + volumeMounts: + # Host log directory mount. + - name: var-log + mountPath: /var/log/ + readOnly: true + # Host mount for docker and containerd log file symlinks. + - name: var-lib + mountPath: /var/lib + readOnly: true + # Vector data dir mount. + - name: data-dir + mountPath: "/vector-data-dir" + # Vector config dir mount. + - name: config-dir + mountPath: /etc/vector + readOnly: true + terminationGracePeriodSeconds: 60 + tolerations: + - effect: NoSchedule + key: node-role.kubernetes.io/master + volumes: + # Log directory. + - name: var-log + hostPath: + path: /var/log/ + # Docker and containerd log files in Kubernetes are symlinks to this folder. + - name: var-lib + hostPath: + path: /var/lib/ + # Vector will store it's data here. + - name: data-dir + hostPath: + path: /var/lib/vector/ + # Vector config dir with a managed config map. + - name: config-dir + projected: + sources: + # Managed `ConfigMap`. + - configMap: + name: vector + optional: true + # Custom `ConfigMap`. + - configMap: + name: vector-config + optional: true + # Custom `Secret`. + - secret: + name: vector-config + optional: true diff --git a/distribution/rpm/vector.spec b/distribution/rpm/vector.spec index c5c818f5264e22..5da98c1154c597 100644 --- a/distribution/rpm/vector.spec +++ b/distribution/rpm/vector.spec @@ -22,7 +22,7 @@ URL: %{_url} %prep # We are currently in the BUILD dir -tar -xvf %{_sourcedir}/%{_source} --strip-components=2 +tar -xvf %{_sourcedir}/%{_source} --strip-components=1 cp -a %{_sourcedir}/systemd/. systemd %install @@ -33,7 +33,7 @@ mkdir -p %{buildroot}%{_bindir} mkdir -p %{buildroot}%{_sysconfdir}/%{_name} mkdir -p %{buildroot}%{_sharedstatedir}/%{_name} mkdir -p %{buildroot}%{_unitdir} -cp -a %{_builddir}/bin/. %{buildroot}%{_bindir} +cp -a %{_builddir}/bin/vector %{buildroot}%{_bindir} cp -a %{_builddir}/config/vector.toml %{buildroot}%{_sysconfdir}/%{_name}/vector.toml cp -a %{_builddir}/config/vector.spec.toml %{buildroot}%{_sysconfdir}/%{_name}/vector.spec.toml cp -a %{_builddir}/config/examples/. %{buildroot}%{_sysconfdir}/%{_name}/examples diff --git a/docker-compose.yml b/docker-compose.yml index 2f32403b9e0d20..c38501e80ede82 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -22,26 +22,6 @@ services: user: $USER command: scripts/build.sh - build-x86_64-unknown-linux-gnu: - build: - context: . - dockerfile: scripts/ci-docker-images/builder-x86_64-unknown-linux-gnu/Dockerfile - args: - USER: $USER - environment: - NATIVE_BUILD: "false" - TARGET: x86_64-unknown-linux-gnu - FEATURES: default - CARGO_TERM_COLOR: always - volumes: - - $PWD:$PWD - - ./target/x86_64-unknown-linux-gnu/cargo/registry:/usr/local/cargo/registry - - ./target/x86_64-unknown-linux-gnu/cargo/git:/usr/local/cargo/git - - ./target/x86_64-unknown-linux-gnu/rustup/tmp:/usr/local/rustup/tmp - working_dir: $PWD - user: $USER - command: scripts/build.sh - build-aarch64-unknown-linux-musl: build: context: . @@ -78,26 +58,6 @@ services: user: $USER command: ./scripts/package-archive.sh - package-archive-x86_64-unknown-linux-gnu: - build: - context: . - dockerfile: scripts/ci-docker-images/builder-x86_64-unknown-linux-gnu/Dockerfile - args: - USER: $USER - environment: - NATIVE_BUILD: "false" - TARGET: x86_64-unknown-linux-gnu - FEATURES: "" - CARGO_TERM_COLOR: always - volumes: - - $PWD:$PWD - - ./target/x86_64-unknown-linux-gnu/cargo/registry:/usr/local/cargo/registry - - ./target/x86_64-unknown-linux-gnu/cargo/git:/usr/local/cargo/git - - ./target/x86_64-unknown-linux-gnu/rustup/tmp:/usr/local/rustup/tmp - working_dir: $PWD - user: $USER - command: ./scripts/package-archive.sh - package-archive-aarch64-unknown-linux-musl: build: context: . @@ -297,209 +257,3 @@ services: dockerfile: scripts/ci-docker-images/loader-qemu-binfmt/Dockerfile privileged: true command: dpkg-reconfigure qemu-user-binfmt - - # - # Dependencies - # - - dependencies-all: - image: ubuntu:18.04 - command: sleep infinity - depends_on: - - dependencies-aws - - dependencies-clickhouse - - dependencies-elasticsearch - - dependencies-gcp - - dependencies-humio - - dependencies-influxdb - - dependencies-kafka - - dependencies-loki - - dependencies-pulsar - - dependencies-splunk - - dependencies-aws: - image: ubuntu:18.04 - command: sleep infinity - depends_on: - - ec2_metadata - - localstack - - mockwatchlogs - - dependencies-clickhouse: - image: ubuntu:18.04 - command: sleep infinity - depends_on: - - clickhouse - - dependencies-elasticsearch: - image: ubuntu:18.04 - command: sleep infinity - depends_on: - - elasticsearch - - elasticsearch-tls - - localstack - - dependencies-gcp: - image: ubuntu:18.04 - command: sleep infinity - depends_on: - - gcloud-pubsub - - dependencies-humio: - image: ubuntu:18.04 - command: sleep infinity - depends_on: - - humio - - dependencies-influxdb: - image: ubuntu:18.04 - command: sleep infinity - depends_on: - - influxdb_v1 - - influxdb_v2 - - dependencies-kafka: - image: ubuntu:18.04 - command: sleep infinity - depends_on: - - kafka - - zookeeper - - dependencies-loki: - image: ubuntu:18.04 - command: sleep infinity - depends_on: - - loki - - dependencies-pulsar: - image: ubuntu:18.04 - command: sleep infinity - depends_on: - - pulsar - - dependencies-splunk: - image: ubuntu:18.04 - command: sleep infinity - depends_on: - - splunk - - # - # Services - # - - localstack: - image: localstack/localstack@sha256:f21f1fc770ee4bfd5012afdc902154c56b7fb18c14cf672de151b65569c8251e - ports: - - "4568:4568" - - "4572:4572" - - "4582:4582" - - "4571:4571" - - "4573:4573" - environment: - SERVICES: kinesis:4568,s3:4572,cloudwatch:4582,elasticsearch:4571,firehose:4573 - mockwatchlogs: - image: luciofranco/mockwatchlogs:latest - ports: - - "6000:6000" - environment: - RUST_LOG: trace - zookeeper: - image: wurstmeister/zookeeper - ports: - - "2181:2181" - kafka: - image: wurstmeister/kafka - ports: - - "9091-9093:9091-9093" - environment: - KAFKA_BROKER_ID: 1 - KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 - KAFKA_LISTENERS: PLAINTEXT://:9091,SSL://:9092,SASL_PLAINTEXT://:9093 - KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://localhost:9091,SSL://localhost:9092,SASL_PLAINTEXT://localhost:9093 - KAFKA_SSL_KEYSTORE_LOCATION: /certs/localhost.p12 - KAFKA_SSL_KEYSTORE_PASSWORD: NOPASS - KAFKA_SSL_TRUSTSTORE_LOCATION: /certs/localhost.p12 - KAFKA_SSL_TRUSTSTORE_PASSWORD: NOPASS - KAFKA_SSL_KEY_PASSWORD: NOPASS - KAFKA_SSL_ENDPOINT_IDENTIFICATION_ALGORITHM: none - KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/kafka_server_jaas.conf" - KAFKA_INTER_BROKER_LISTENER_NAME: SASL_PLAINTEXT - KAFKA_SASL_ENABLED_MECHANISMS: PLAIN - KAFKA_SASL_MECHANISM_INTER_BROKER_PROTOCOL: PLAIN - volumes: - - ./tests/data/localhost.p12:/certs/localhost.p12:ro - - ./tests/data/kafka_server_jaas.conf:/etc/kafka/kafka_server_jaas.conf - pulsar: - image: apachepulsar/pulsar - command: bin/pulsar standalone - ports: - - "6650:6650" - splunk: - image: timberio/splunk-hec-test:latest - ports: - - "8088:8088" - - "8000:8000" - - "8089:8089" - entrypoint: - [ - "sh", - "-c", - "./bin/splunk add index custom_index && ./bin/splunk set minfreemb 5 && ./entrypoint.sh", - ] - elasticsearch: - image: elasticsearch:6.6.2 - ports: - - "9200:9200" - - "9300:9300" - environment: - - discovery.type=single-node - elasticsearch-tls: - image: elasticsearch:6.6.2 - ports: - - "9201:9200" - - "9301:9300" - environment: - - discovery.type=single-node - - xpack.security.enabled=true - - xpack.security.http.ssl.enabled=true - - xpack.security.transport.ssl.enabled=true - - xpack.ssl.certificate=certs/localhost.crt - - xpack.ssl.key=certs/localhost.key - volumes: - - ./tests/data:/usr/share/elasticsearch/config/certs:ro - clickhouse: - image: yandex/clickhouse-server:19 - ports: - - "8123:8123" - ec2_metadata: - image: timberiodev/mock-ec2-metadata:latest - ports: - - "8111:8111" - gcloud-pubsub: - image: messagebird/gcloud-pubsub-emulator - ports: - - 8681-8682:8681-8682 - environment: - - PUBSUB_PROJECT1=testproject,topic1:subscription1 - loki: - image: grafana/loki:master - ports: - - "3100:3100" - command: -config.file=/etc/loki/loki-config.yaml - volumes: - - ./tests/data:/etc/loki - influxdb_v1: - image: influxdb:1.7 - ports: - - "8086:8086" - environment: - - INFLUXDB_REPORTING_DISABLED=true - influxdb_v2: - image: quay.io/influxdb/influxdb:2.0.0-beta - ports: - - "9999:9999" - command: influxd --reporting-disabled - humio: - image: humio/humio:1.13.1 - ports: - - "8080:8080" diff --git a/kustomization.yaml b/kustomization.yaml index 0ad86d586077de..f8976946473736 100644 --- a/kustomization.yaml +++ b/kustomization.yaml @@ -3,11 +3,12 @@ namespace: vector +bases: + - distribution/kubernetes + resources: - - distribution/kubernetes/vector-global.yaml - skaffold/manifests/namespace.yaml - skaffold/manifests/config.yaml - - distribution/kubernetes/vector-namespaced.yaml patchesStrategicMerge: - skaffold/manifests/patches/env.yaml diff --git a/scripts/Gemfile b/scripts/Gemfile index afffc9ac8e0b00..dc382354a0ebff 100644 --- a/scripts/Gemfile +++ b/scripts/Gemfile @@ -7,6 +7,5 @@ ruby '~> 2.7.0' source 'https://rubygems.org' gem 'git', '~> 1.7.0' # for scripts/check-version.rb -gem 'mdl', '~> 0.9' # for scripts/check-markdown.sh gem 'semantic', '~> 1.6.1' # for scripts/check-version.rb gem 'toml-rb', '~> 2.0' # for scripts/check-meta.rb diff --git a/scripts/Gemfile.lock b/scripts/Gemfile.lock index bfe8facf50997d..6deecf68f011ff 100644 --- a/scripts/Gemfile.lock +++ b/scripts/Gemfile.lock @@ -4,31 +4,16 @@ GEM citrus (3.0.2) git (1.7.0) rchardet (~> 1.8) - kramdown (2.3.0) - rexml - kramdown-parser-gfm (1.1.0) - kramdown (~> 2.0) - mdl (0.9.0) - kramdown (~> 2.0) - kramdown-parser-gfm (~> 1.0) - mixlib-cli (~> 2.1, >= 2.1.1) - mixlib-config (>= 2.2.1, < 4) - mixlib-cli (2.1.6) - mixlib-config (3.0.6) - tomlrb rchardet (1.8.0) - rexml (3.2.4) semantic (1.6.1) toml-rb (2.0.1) citrus (~> 3.0, > 3.0) - tomlrb (1.3.0) PLATFORMS ruby DEPENDENCIES git (~> 1.7.0) - mdl (~> 0.9) semantic (~> 1.6.1) toml-rb (~> 2.0) diff --git a/scripts/check-code.sh b/scripts/check-code.sh deleted file mode 100755 index 24945487ee3080..00000000000000 --- a/scripts/check-code.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash -set -euo pipefail - -# check-code.sh -# -# SUMMARY -# -# Checks all Vector code - -cargo check --workspace --all-targets diff --git a/scripts/deploy-kubernetes-test.sh b/scripts/deploy-kubernetes-test.sh index ef094dec529d40..d09f2d6ff40639 100755 --- a/scripts/deploy-kubernetes-test.sh +++ b/scripts/deploy-kubernetes-test.sh @@ -38,42 +38,38 @@ CUSTOM_RESOURCE_CONFIGS_FILE="${CUSTOM_RESOURCE_CONFIGS_FILE:-""}" # TODO: replace with `helm template | kubectl apply -f -` when Helm Chart is # available. -templated-config-global() { - sed "s|^ namespace: vector| namespace: $NAMESPACE|" < "distribution/kubernetes/vector-global.yaml" \ - | sed "s|^ name: vector| name: $NAMESPACE|" +templated-config() { + cat < "distribution/kubernetes/vector.yaml" \ + | sed "s|^ namespace: vector| namespace: $NAMESPACE|" } up() { # A Vector container image to use. CONTAINER_IMAGE="${CONTAINER_IMAGE:?"You must assign CONTAINER_IMAGE variable with the Vector container image name"}" - templated-config-global | $VECTOR_TEST_KUBECTL create -f - - $VECTOR_TEST_KUBECTL create namespace "$NAMESPACE" if [[ -n "$CUSTOM_RESOURCE_CONFIGS_FILE" ]]; then $VECTOR_TEST_KUBECTL create --namespace "$NAMESPACE" -f "$CUSTOM_RESOURCE_CONFIGS_FILE" fi - sed 's|image: timberio/vector:[^$]*$'"|image: $CONTAINER_IMAGE|" < "distribution/kubernetes/vector-namespaced.yaml" \ + templated-config \ + | sed -E 's|image: "?timberio/vector:[^$]*$'"|image: $CONTAINER_IMAGE|" \ | $VECTOR_TEST_KUBECTL create --namespace "$NAMESPACE" -f - } down() { - # A workaround for `kubectl` from a `snap` package. - cat < "distribution/kubernetes/vector-namespaced.yaml" | $VECTOR_TEST_KUBECTL delete --namespace "$NAMESPACE" -f - - if [[ -n "$CUSTOM_RESOURCE_CONFIGS_FILE" ]]; then $VECTOR_TEST_KUBECTL delete --namespace "$NAMESPACE" -f "$CUSTOM_RESOURCE_CONFIGS_FILE" fi - templated-config-global | $VECTOR_TEST_KUBECTL delete -f - + templated-config | $VECTOR_TEST_KUBECTL delete --namespace "$NAMESPACE" -f - $VECTOR_TEST_KUBECTL delete namespace "$NAMESPACE" } case "$COMMAND" in - up|down) + up|down|templated-config) "$COMMAND" "$@" ;; *) diff --git a/scripts/docker-compose-run.sh b/scripts/docker-compose-run.sh index 5dc1b24e337891..1a709883984815 100755 --- a/scripts/docker-compose-run.sh +++ b/scripts/docker-compose-run.sh @@ -9,12 +9,16 @@ set -euo pipefail SERVICE="$1" -DOCKER="${USE_CONTAINER:-"docker"}" -COMPOSE="${COMPOSE:-"${DOCKER}-compose"}" +cd "$(dirname "${BASH_SOURCE[0]}")/.." + +# A workaround to prevent docker from creating directories at `./target` as +# root. +# Ran unconditionally for consistency between docker and bare execution. +scripts/prepare-target-dir.sh USER="$(id -u):$(id -g)" export USER -$COMPOSE rm -svf "$SERVICE" 2>/dev/null || true -$COMPOSE up --build --abort-on-container-exit --exit-code-from "$SERVICE" "$SERVICE" \ +docker-compose rm -svf "$SERVICE" 2>/dev/null || true +docker-compose up --build --abort-on-container-exit --exit-code-from "$SERVICE" "$SERVICE" \ | sed $'s/^.*container exit...$/\033[0m\033[1A/' diff --git a/scripts/docker-run.sh b/scripts/docker-run.sh index 0cd051f697e52c..2f93356a8825ff 100755 --- a/scripts/docker-run.sh +++ b/scripts/docker-run.sh @@ -26,15 +26,14 @@ fi # Variables # -DOCKER="${USE_CONTAINER:-"docker"}" TAG="$1" IMAGE="timberiodev/vector-$TAG:latest" # # (Re)Build # -if ! $DOCKER inspect "$IMAGE" >/dev/null 2>&1 || [ "${REBUILD_CONTAINER_IMAGE:-"true"}" == "true" ]; then - $DOCKER build \ +if ! docker inspect "$IMAGE" >/dev/null 2>&1 || [ "${REBUILD_CONTAINER_IMAGE:-"true"}" == "true" ]; then + docker build \ --file "scripts/ci-docker-images/$TAG/Dockerfile" \ --tag "$IMAGE" \ . @@ -69,7 +68,7 @@ for LINE in $(env | grep '^PASS_' | sed 's/^PASS_//'); do done unset IFS -$DOCKER run \ +docker run \ "${DOCKER_FLAGS[@]}" \ -w "$PWD" \ -v "$PWD":"$PWD" \ diff --git a/scripts/environment/bootstrap-ubuntu-20.04.sh b/scripts/environment/bootstrap-ubuntu-20.04.sh index dbb8eaf7056561..03360f06ae0ec2 100755 --- a/scripts/environment/bootstrap-ubuntu-20.04.sh +++ b/scripts/environment/bootstrap-ubuntu-20.04.sh @@ -55,3 +55,6 @@ add-apt-repository \ # Install those new things apt update --yes apt install --yes yarn docker-ce docker-ce-cli containerd.io + +# Apt cleanup +apt clean diff --git a/scripts/kubernetes-yaml.sh b/scripts/kubernetes-yaml.sh new file mode 100755 index 00000000000000..00f7e619b9e350 --- /dev/null +++ b/scripts/kubernetes-yaml.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +set -euo pipefail + +# kubernetes-yaml.sh +# +# SUMMARY +# +# Manages the Kubernetes distribution YAML configs. +# See usage function in the code or run without arguments. + +cd "$(dirname "${BASH_SOURCE[0]}")/.." + +TARGET_FILE="distribution/kubernetes/vector.yaml" + +generate() { + # Print header. + cat < "$TARGET_FILE" +} + +check() { + GENERATED="$(generate)" + FILE="$(cat "$TARGET_FILE")" + + if [[ "$GENERATED" != "$FILE" ]]; then + echo "Error: Kubernetes YAML config ($TARGET_FILE) does not match the generated version" >&2 + exit 1 + fi +} + +usage() { + cat >&2 <<-EOF +Usage: $0 MODE + +Modes: + check - compare the current file contents and the generated config and + exit with non-zero exit code if they don't match + update - update the file with the generated config + generate - generate the config and print it to stdout +EOF + exit 1 +} + +MODE="${1:-}" +case "$MODE" in + update|check|generate) + "$MODE" + ;; + *) + usage + ;; +esac diff --git a/scripts/kubernetes-yaml/values.yaml b/scripts/kubernetes-yaml/values.yaml new file mode 100644 index 00000000000000..9de35849c347bb --- /dev/null +++ b/scripts/kubernetes-yaml/values.yaml @@ -0,0 +1,18 @@ +image: + # Use the latest release image. + version: latest + # Use the alpine flavor of vector image. + base: alpine + +env: + # Set a reasonable log level to avoid issues with internal logs + # overwriting console output at E2E tests. Feel free to change at + # a real deployment. + - name: LOG + value: info + +tolerations: + # This toleration is to have the daemonset runnable on master nodes. + # Remove it if your masters can't run pods. + - key: node-role.kubernetes.io/master + effect: NoSchedule diff --git a/scripts/run.sh b/scripts/run.sh deleted file mode 100755 index 9c2853c9a4334c..00000000000000 --- a/scripts/run.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# run.sh -# -# SUMMARY -# -# A simple script that runs a make target in a container environment based -# on the presence of the `USE_CONTAINER` environment variable. -# -# This helps to reduce friction for first-time contributors, since running -# basic commands through containers ensures they work. It is recommended -# that frequent contributors setup local environments to improve the speed -# of commands they are running frequently. This can be achieved by setting -# USE_CONTAINER to none: -# -# export USE_CONTAINER=none -# - -cd "$(dirname "${BASH_SOURCE[0]}")/.." - -# A workaround to prevent docker from creating directories at `./target` as -# root. -# Ran unconditionally for consistency between docker and bare execution. -scripts/prepare-target-dir.sh - -case "$USE_CONTAINER" in - docker | podman) - echo "Executing within $USE_CONTAINER. To disable set USE_CONTAINER to none" - echo "" - echo " make ... USE_CONTAINER=none" - echo "" - - scripts/docker-compose-run.sh "$1" - ;; - - *) - echo "Executing locally. To use Docker set USE_CONTAINER to docker or podman" - echo "" - echo " make ... USE_CONTAINER=docker" - echo " make ... USE_CONTAINER=podman" - echo "" - - FILE=$(find ./scripts -name "${1}.*") - - if [ -z "$FILE" ]; then - echo "Local invocation failed. Script not found!" - echo "" - echo " scripts/${1}.*" - echo "" - echo "To run the ${1} target locally you must place a script in the" - echo "/scripts folder that can be executed. Otherwise, you can use the" - echo "service defined in /docker-compose.yml." - exit 1 - fi - - ${FILE} - ;; -esac diff --git a/scripts/test-e2e-kubernetes.sh b/scripts/test-e2e-kubernetes.sh index ea29395b14e855..e67a91df000ee9 100755 --- a/scripts/test-e2e-kubernetes.sh +++ b/scripts/test-e2e-kubernetes.sh @@ -89,7 +89,7 @@ if [[ -z "${CONTAINER_IMAGE:-}" ]]; then else # Package a .deb file to build a docker container, unless skipped. if [[ -z "${SKIP_PACKAGE_DEB:-}" ]]; then - make package-deb-x86_64 USE_CONTAINER="${PACKAGE_DEB_USE_CONTAINER:-"docker"}" + make package-deb-x86_64 fi # Prepare test image parameters. diff --git a/scripts/test-integration-aws.sh b/scripts/test-integration-aws.sh deleted file mode 100755 index ce1d60c7d98d25..00000000000000 --- a/scripts/test-integration-aws.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# test-integration-aws.sh -# -# SUMMARY -# -# Run integration tests for AWS components only. - -docker-compose up -d dependencies-aws -cargo test --no-default-features --features aws-integration-tests diff --git a/scripts/test-integration-clickhouse.sh b/scripts/test-integration-clickhouse.sh deleted file mode 100755 index 86e0ba60a9cd5d..00000000000000 --- a/scripts/test-integration-clickhouse.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# test-integration-clickhouse.sh -# -# SUMMARY -# -# Run integration tests for Clickhouse components only. - -docker-compose up -d dependencies-clickhouse -cargo test --no-default-features --features clickhouse-integration-tests diff --git a/scripts/test-integration-docker.sh b/scripts/test-integration-docker.sh deleted file mode 100755 index d34ba58bb72e01..00000000000000 --- a/scripts/test-integration-docker.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# test-integration-docker.sh -# -# SUMMARY -# -# Run integration tests for Docker components only. - -cargo test --no-default-features --features docker-integration-tests diff --git a/scripts/test-integration-elasticsearch.sh b/scripts/test-integration-elasticsearch.sh deleted file mode 100755 index fc750e52c9e2bf..00000000000000 --- a/scripts/test-integration-elasticsearch.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# test-integration-elasticsearch.sh -# -# SUMMARY -# -# Run integration tests for Elasticsearch components only. - -docker-compose up -d dependencies-elasticsearch -cargo test --no-default-features --features es-integration-tests diff --git a/scripts/test-integration-gcp.sh b/scripts/test-integration-gcp.sh deleted file mode 100755 index 6373400dc13bcc..00000000000000 --- a/scripts/test-integration-gcp.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# test-integration-gcp.sh -# -# SUMMARY -# -# Run integration tests for GCP components only. - -docker-compose up -d dependencies-gcp -cargo test --no-default-features --features gcp-integration-tests diff --git a/scripts/test-integration-humio.sh b/scripts/test-integration-humio.sh deleted file mode 100755 index e77823dc20910a..00000000000000 --- a/scripts/test-integration-humio.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# test-integration-humio.sh -# -# SUMMARY -# -# Run integration tests for Humio components only. - -docker-compose up -d dependencies-humio -cargo test --no-default-features --features humio-integration-tests diff --git a/scripts/test-integration-influxdb.sh b/scripts/test-integration-influxdb.sh deleted file mode 100755 index 29bc4c34a9d39a..00000000000000 --- a/scripts/test-integration-influxdb.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# test-integration-influxdb.sh -# -# SUMMARY -# -# Run integration tests for InfluxDB components only. - -docker-compose up -d dependencies-influxdb -cargo test --no-default-features --features influxdb-integration-tests diff --git a/scripts/test-integration-kafka.sh b/scripts/test-integration-kafka.sh deleted file mode 100755 index a8e06338c41d35..00000000000000 --- a/scripts/test-integration-kafka.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# test-integration-kafka.sh -# -# SUMMARY -# -# Run integration tests for Kafka components only. - -docker-compose up -d dependencies-kafka -cargo test --no-default-features --features kafka-integration-tests,rdkafka-plain diff --git a/scripts/test-integration-loki.sh b/scripts/test-integration-loki.sh deleted file mode 100755 index 01e0a9a0555163..00000000000000 --- a/scripts/test-integration-loki.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# test-integration-loki.sh -# -# SUMMARY -# -# Run integration tests for Loki components only. - -docker-compose up -d dependencies-loki -cargo test --no-default-features --features loki-integration-tests diff --git a/scripts/test-integration-pulsar.sh b/scripts/test-integration-pulsar.sh deleted file mode 100755 index 4b93d52efc3e93..00000000000000 --- a/scripts/test-integration-pulsar.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# test-integration-pulsar.sh -# -# SUMMARY -# -# Run integration tests for Pulsar components only. - -docker-compose up -d dependencies-pulsar -cargo test --no-default-features --features pulsar-integration-tests diff --git a/scripts/test-integration-splunk.sh b/scripts/test-integration-splunk.sh deleted file mode 100755 index 38deea0bb3a5c5..00000000000000 --- a/scripts/test-integration-splunk.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# test-integration-splunk.sh -# -# SUMMARY -# -# Run integration tests for Splunk components only. - -docker-compose up -d dependencies-splunk -cargo test --no-default-features --features splunk-integration-tests diff --git a/src/event/log_schema.rs b/src/config/log_schema.rs similarity index 70% rename from src/event/log_schema.rs rename to src/config/log_schema.rs index ad9dcf0b1b15b7..b6a0e5706ea702 100644 --- a/src/event/log_schema.rs +++ b/src/config/log_schema.rs @@ -1,15 +1,18 @@ use getset::{Getters, Setters}; -use lazy_static::lazy_static; use once_cell::sync::OnceCell; use serde::{Deserialize, Serialize}; -use string_cache::DefaultAtom as Atom; +use string_cache::DefaultAtom; pub static LOG_SCHEMA: OnceCell = OnceCell::new(); -lazy_static! { - static ref LOG_SCHEMA_DEFAULT: LogSchema = LogSchema::default(); +lazy_static::lazy_static! { + static ref LOG_SCHEMA_DEFAULT: LogSchema = LogSchema { + message_key: DefaultAtom::from("message"), + timestamp_key: DefaultAtom::from("timestamp"), + host_key: DefaultAtom::from("host"), + source_type_key: DefaultAtom::from("source_type"), + }; } - pub fn log_schema() -> &'static LogSchema { LOG_SCHEMA.get().unwrap_or(&LOG_SCHEMA_DEFAULT) } @@ -19,16 +22,16 @@ pub fn log_schema() -> &'static LogSchema { pub struct LogSchema { #[serde(default = "LogSchema::default_message_key")] #[getset(get = "pub", set = "pub(crate)")] - message_key: Atom, + message_key: DefaultAtom, #[serde(default = "LogSchema::default_timestamp_key")] #[getset(get = "pub", set = "pub(crate)")] - timestamp_key: Atom, + timestamp_key: DefaultAtom, #[serde(default = "LogSchema::default_host_key")] #[getset(get = "pub", set = "pub(crate)")] - host_key: Atom, + host_key: DefaultAtom, #[serde(default = "LogSchema::default_source_type_key")] #[getset(get = "pub", set = "pub(crate)")] - source_type_key: Atom, + source_type_key: DefaultAtom, } impl Default for LogSchema { @@ -43,20 +46,17 @@ impl Default for LogSchema { } impl LogSchema { - fn default_message_key() -> Atom { - Atom::from("message") + fn default_message_key() -> DefaultAtom { + DefaultAtom::from("message") } - - fn default_timestamp_key() -> Atom { - Atom::from("timestamp") + fn default_timestamp_key() -> DefaultAtom { + DefaultAtom::from("timestamp") } - - fn default_host_key() -> Atom { - Atom::from("host") + fn default_host_key() -> DefaultAtom { + DefaultAtom::from("host") } - - fn default_source_type_key() -> Atom { - Atom::from("source_type") + fn default_source_type_key() -> DefaultAtom { + DefaultAtom::from("source_type") } pub fn merge(&mut self, other: LogSchema) -> Result<(), Vec> { @@ -94,3 +94,17 @@ impl LogSchema { } } } + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn partial_log_schema() { + let toml = r#" + message_key = "message" + timestamp_key = "timestamp" + "#; + let _ = toml::from_str::(toml).unwrap(); + } +} diff --git a/src/config/mod.rs b/src/config/mod.rs index 36f70e9949dffd..bf124f0dab1dcf 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1,10 +1,6 @@ use crate::{ - buffers::Acker, - conditions, - dns::Resolver, - event::{self, Metric}, - shutdown::ShutdownSignal, - sinks, sources, transforms, Pipeline, + buffers::Acker, conditions, dns::Resolver, event::Metric, shutdown::ShutdownSignal, sinks, + sources, transforms, Pipeline, }; use component::ComponentDescription; use indexmap::IndexMap; // IndexMap preserves insertion order, allowing us to output errors in the same order they are present in the file @@ -18,6 +14,7 @@ mod compiler; pub mod component; mod diff; mod loading; +mod log_schema; mod unit_test; mod validation; mod vars; @@ -26,6 +23,7 @@ pub mod watcher; pub use builder::ConfigBuilder; pub use diff::ConfigDiff; pub use loading::{load_from_paths, load_from_str, process_paths, CONFIG_PATHS}; +pub use log_schema::{log_schema, LogSchema, LOG_SCHEMA}; pub use unit_test::build_unit_tests_main as build_unit_tests; pub use validation::warnings; @@ -47,7 +45,7 @@ pub struct GlobalOptions { skip_serializing_if = "crate::serde::skip_serializing_if_default", default )] - pub log_schema: event::LogSchema, + pub log_schema: LogSchema, } pub fn default_data_dir() -> Option { diff --git a/src/config/unit_test.rs b/src/config/unit_test.rs index 292c873657452e..1af2b46a3ed4a3 100644 --- a/src/config/unit_test.rs +++ b/src/config/unit_test.rs @@ -14,7 +14,7 @@ pub fn build_unit_tests_main(path: PathBuf) -> Result, Vec let config = super::loading::load_builder_from_paths(&[path])?; // Ignore failures on calls other than the first - crate::event::LOG_SCHEMA + crate::config::LOG_SCHEMA .set(config.global.log_schema.clone()) .ok(); diff --git a/src/event/log_event.rs b/src/event/log_event.rs new file mode 100644 index 00000000000000..c0690805133ffd --- /dev/null +++ b/src/event/log_event.rs @@ -0,0 +1,179 @@ +use crate::event::{util, PathComponent, Value}; +use serde::{Serialize, Serializer}; +use std::collections::{BTreeMap, HashMap}; +use std::convert::{TryFrom, TryInto}; +use std::iter::FromIterator; +use string_cache::DefaultAtom; + +#[derive(PartialEq, Debug, Clone, Default)] +pub struct LogEvent { + fields: BTreeMap, +} + +impl LogEvent { + pub fn get(&self, key: &DefaultAtom) -> Option<&Value> { + util::log::get(&self.fields, key) + } + + pub fn get_flat(&self, key: impl AsRef) -> Option<&Value> { + self.fields.get(key.as_ref()) + } + + pub fn get_mut(&mut self, key: &DefaultAtom) -> Option<&mut Value> { + util::log::get_mut(&mut self.fields, key) + } + + pub fn contains(&self, key: &DefaultAtom) -> bool { + util::log::contains(&self.fields, key) + } + + pub fn insert(&mut self, key: K, value: V) -> Option + where + K: AsRef, + V: Into, + { + util::log::insert(&mut self.fields, key.as_ref(), value.into()) + } + + pub fn insert_path(&mut self, key: Vec, value: V) -> Option + where + V: Into, + { + util::log::insert_path(&mut self.fields, key, value.into()) + } + + pub fn insert_flat(&mut self, key: K, value: V) + where + K: Into, + V: Into, + { + self.fields.insert(key.into(), value.into()); + } + + pub fn try_insert(&mut self, key: &DefaultAtom, value: V) + where + V: Into, + { + if !self.contains(key) { + self.insert(key.clone(), value); + } + } + + pub fn remove(&mut self, key: &DefaultAtom) -> Option { + util::log::remove(&mut self.fields, &key, false) + } + + pub fn remove_prune(&mut self, key: &DefaultAtom, prune: bool) -> Option { + util::log::remove(&mut self.fields, &key, prune) + } + + pub fn keys<'a>(&'a self) -> impl Iterator + 'a { + util::log::keys(&self.fields) + } + + pub fn all_fields(&self) -> impl Iterator + Serialize { + util::log::all_fields(&self.fields) + } + + pub fn is_empty(&self) -> bool { + self.fields.is_empty() + } +} + +impl From> for LogEvent { + fn from(map: BTreeMap) -> Self { + LogEvent { fields: map } + } +} + +impl Into> for LogEvent { + fn into(self) -> BTreeMap { + let Self { fields } = self; + fields + } +} + +impl From> for LogEvent { + fn from(map: HashMap) -> Self { + LogEvent { + fields: map.into_iter().collect(), + } + } +} + +impl Into> for LogEvent { + fn into(self) -> HashMap { + self.fields.into_iter().collect() + } +} + +impl TryFrom for LogEvent { + type Error = crate::Error; + + fn try_from(map: serde_json::Value) -> Result { + match map { + serde_json::Value::Object(fields) => Ok(LogEvent::from( + fields + .into_iter() + .map(|(k, v)| (k, v.into())) + .collect::>(), + )), + _ => Err(crate::Error::from( + "Attempted to convert non-Object JSON into a LogEvent.", + )), + } + } +} + +impl TryInto for LogEvent { + type Error = crate::Error; + + fn try_into(self) -> Result { + let Self { fields } = self; + Ok(serde_json::to_value(fields)?) + } +} + +impl std::ops::Index<&DefaultAtom> for LogEvent { + type Output = Value; + + fn index(&self, key: &DefaultAtom) -> &Value { + self.get(key).expect("Key is not found") + } +} + +impl, V: Into> Extend<(K, V)> for LogEvent { + fn extend>(&mut self, iter: I) { + for (k, v) in iter { + self.insert(k.into(), v.into()); + } + } +} + +// Allow converting any kind of appropriate key/value iterator directly into a LogEvent. +impl, V: Into> FromIterator<(K, V)> for LogEvent { + fn from_iter>(iter: T) -> Self { + let mut log_event = LogEvent::default(); + log_event.extend(iter); + log_event + } +} + +/// Converts event into an iterator over top-level key/value pairs. +impl IntoIterator for LogEvent { + type Item = (String, Value); + type IntoIter = std::collections::btree_map::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.fields.into_iter() + } +} + +impl Serialize for LogEvent { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.collect_map(self.fields.iter()) + } +} diff --git a/src/event/mod.rs b/src/event/mod.rs index 62ada1fa80b45e..bf05976dbab17a 100644 --- a/src/event/mod.rs +++ b/src/event/mod.rs @@ -1,24 +1,27 @@ use self::proto::{event_wrapper::Event as EventProto, metric::Value as MetricProto, Log}; +use crate::config::log_schema; use bytes::Bytes; use chrono::{DateTime, SecondsFormat, TimeZone, Utc}; use lazy_static::lazy_static; use metric::{MetricKind, MetricValue}; -use serde::{Serialize, Serializer}; -use serde_json::Value as JsonValue; -use std::{collections::BTreeMap, iter::FromIterator}; +use std::collections::{BTreeMap, HashMap}; use string_cache::DefaultAtom as Atom; pub mod discriminant; -mod log_schema; pub mod merge; pub mod merge_state; pub mod metric; pub mod util; -pub use log_schema::{log_schema, LogSchema, LOG_SCHEMA}; +mod log_event; +mod value; + +pub use log_event::LogEvent; pub use metric::{Metric, StatisticKind}; +use std::convert::{TryFrom, TryInto}; pub(crate) use util::log::PathComponent; pub(crate) use util::log::PathIter; +pub use value::Value; pub mod proto { include!(concat!(env!("OUT_DIR"), "/event.proto.rs")); @@ -36,11 +39,6 @@ pub enum Event { Metric(Metric), } -#[derive(PartialEq, Debug, Clone, Default)] -pub struct LogEvent { - fields: BTreeMap, -} - impl Event { pub fn new_empty_log() -> Self { Event::Log(LogEvent::default()) @@ -89,298 +87,6 @@ impl Event { } } -impl LogEvent { - pub fn get(&self, key: &Atom) -> Option<&Value> { - util::log::get(&self.fields, key) - } - - pub fn get_flat(&self, key: impl AsRef) -> Option<&Value> { - self.fields.get(key.as_ref()) - } - - pub fn get_mut(&mut self, key: &Atom) -> Option<&mut Value> { - util::log::get_mut(&mut self.fields, key) - } - - pub fn contains(&self, key: &Atom) -> bool { - util::log::contains(&self.fields, key) - } - - pub fn insert(&mut self, key: K, value: V) -> Option - where - K: AsRef, - V: Into, - { - util::log::insert(&mut self.fields, key.as_ref(), value.into()) - } - - pub fn insert_path(&mut self, key: Vec, value: V) -> Option - where - V: Into, - { - util::log::insert_path(&mut self.fields, key, value.into()) - } - - pub fn insert_flat(&mut self, key: K, value: V) - where - K: Into, - V: Into, - { - self.fields.insert(key.into(), value.into()); - } - - pub fn try_insert(&mut self, key: &Atom, value: V) - where - V: Into, - { - if !self.contains(key) { - self.insert(key.clone(), value); - } - } - - pub fn remove(&mut self, key: &Atom) -> Option { - util::log::remove(&mut self.fields, &key, false) - } - - pub fn remove_prune(&mut self, key: &Atom, prune: bool) -> Option { - util::log::remove(&mut self.fields, &key, prune) - } - - pub fn keys<'a>(&'a self) -> impl Iterator + 'a { - util::log::keys(&self.fields) - } - - pub fn all_fields(&self) -> impl Iterator + Serialize { - util::log::all_fields(&self.fields) - } - - pub fn is_empty(&self) -> bool { - self.fields.is_empty() - } -} - -impl std::ops::Index<&Atom> for LogEvent { - type Output = Value; - - fn index(&self, key: &Atom) -> &Value { - self.get(key).expect("Key is not found") - } -} - -impl, V: Into> Extend<(K, V)> for LogEvent { - fn extend>(&mut self, iter: I) { - for (k, v) in iter { - self.insert(k.into(), v.into()); - } - } -} - -// Allow converting any kind of appropriate key/value iterator directly into a LogEvent. -impl, V: Into> FromIterator<(K, V)> for LogEvent { - fn from_iter>(iter: T) -> Self { - let mut log_event = LogEvent::default(); - log_event.extend(iter); - log_event - } -} - -/// Converts event into an iterator over top-level key/value pairs. -impl IntoIterator for LogEvent { - type Item = (String, Value); - type IntoIter = std::collections::btree_map::IntoIter; - - fn into_iter(self) -> Self::IntoIter { - self.fields.into_iter() - } -} - -impl Serialize for LogEvent { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - serializer.collect_map(self.fields.iter()) - } -} - -#[derive(PartialEq, Debug, Clone)] -pub enum Value { - Bytes(Bytes), - Integer(i64), - Float(f64), - Boolean(bool), - Timestamp(DateTime), - Map(BTreeMap), - Array(Vec), - Null, -} - -impl Serialize for Value { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - match &self { - Value::Integer(i) => serializer.serialize_i64(*i), - Value::Float(f) => serializer.serialize_f64(*f), - Value::Boolean(b) => serializer.serialize_bool(*b), - Value::Bytes(_) | Value::Timestamp(_) => { - serializer.serialize_str(&self.to_string_lossy()) - } - Value::Map(m) => serializer.collect_map(m), - Value::Array(a) => serializer.collect_seq(a), - Value::Null => serializer.serialize_none(), - } - } -} - -impl From for Value { - fn from(bytes: Bytes) -> Self { - Value::Bytes(bytes) - } -} - -impl From> for Value { - fn from(bytes: Vec) -> Self { - Value::Bytes(bytes.into()) - } -} - -impl From for Value { - fn from(string: String) -> Self { - Value::Bytes(string.into()) - } -} - -// We only enable this in testing for convenience, since `"foo"` is a `&str`. -// In normal operation, it's better to let the caller decide where to clone and when, rather than -// hiding this from them. -#[cfg(test)] -impl From<&str> for Value { - fn from(s: &str) -> Self { - Value::Bytes(Vec::from(s.as_bytes()).into()) - } -} - -impl From> for Value { - fn from(timestamp: DateTime) -> Self { - Value::Timestamp(timestamp) - } -} - -impl From for Value { - fn from(value: f32) -> Self { - Value::Float(f64::from(value)) - } -} - -impl From for Value { - fn from(value: f64) -> Self { - Value::Float(value) - } -} - -impl From> for Value { - fn from(value: BTreeMap) -> Self { - Value::Map(value) - } -} - -impl From> for Value { - fn from(value: Vec) -> Self { - Value::Array(value) - } -} - -macro_rules! impl_valuekind_from_integer { - ($t:ty) => { - impl From<$t> for Value { - fn from(value: $t) -> Self { - Value::Integer(value as i64) - } - } - }; -} - -impl_valuekind_from_integer!(i64); -impl_valuekind_from_integer!(i32); -impl_valuekind_from_integer!(i16); -impl_valuekind_from_integer!(i8); -impl_valuekind_from_integer!(isize); - -impl From for Value { - fn from(value: bool) -> Self { - Value::Boolean(value) - } -} - -impl From for Value { - fn from(json_value: JsonValue) -> Self { - match json_value { - JsonValue::Bool(b) => Value::Boolean(b), - JsonValue::Number(n) => { - if let Some(i) = n.as_i64() { - Value::Integer(i) - } else if let Some(f) = n.as_f64() { - Value::Float(f) - } else { - Value::Bytes(n.to_string().into()) - } - } - JsonValue::String(s) => Value::Bytes(Bytes::from(s)), - JsonValue::Object(obj) => Value::Map( - obj.into_iter() - .map(|(key, value)| (key, Value::from(value))) - .collect(), - ), - JsonValue::Array(arr) => Value::Array(arr.into_iter().map(Value::from).collect()), - JsonValue::Null => Value::Null, - } - } -} - -impl Value { - // TODO: return Cow - pub fn to_string_lossy(&self) -> String { - match self { - Value::Bytes(bytes) => String::from_utf8_lossy(&bytes).into_owned(), - Value::Timestamp(timestamp) => timestamp_to_string(timestamp), - Value::Integer(num) => format!("{}", num), - Value::Float(num) => format!("{}", num), - Value::Boolean(b) => format!("{}", b), - Value::Map(map) => serde_json::to_string(map).expect("Cannot serialize map"), - Value::Array(arr) => serde_json::to_string(arr).expect("Cannot serialize array"), - Value::Null => "".to_string(), - } - } - - pub fn as_bytes(&self) -> Bytes { - match self { - Value::Bytes(bytes) => bytes.clone(), // cloning a Bytes is cheap - Value::Timestamp(timestamp) => Bytes::from(timestamp_to_string(timestamp)), - Value::Integer(num) => Bytes::from(format!("{}", num)), - Value::Float(num) => Bytes::from(format!("{}", num)), - Value::Boolean(b) => Bytes::from(format!("{}", b)), - Value::Map(map) => Bytes::from(serde_json::to_vec(map).expect("Cannot serialize map")), - Value::Array(arr) => { - Bytes::from(serde_json::to_vec(arr).expect("Cannot serialize array")) - } - Value::Null => Bytes::from(""), - } - } - - pub fn into_bytes(self) -> Bytes { - self.as_bytes() - } - - pub fn as_timestamp(&self) -> Option<&DateTime> { - match &self { - Value::Timestamp(ts) => Some(ts), - _ => None, - } - } -} - fn timestamp_to_string(timestamp: &DateTime) -> String { timestamp.to_rfc3339_opts(SecondsFormat::AutoSi, true) } @@ -428,6 +134,47 @@ fn decode_value(input: proto::Value) -> Option { } } +impl From> for Event { + fn from(map: BTreeMap) -> Self { + Self::Log(LogEvent::from(map)) + } +} + +impl From> for Event { + fn from(map: HashMap) -> Self { + Self::Log(LogEvent::from(map)) + } +} + +impl TryFrom for Event { + type Error = crate::Error; + + fn try_from(map: serde_json::Value) -> Result { + match map { + serde_json::Value::Object(fields) => Ok(Event::from( + fields + .into_iter() + .map(|(k, v)| (k, v.into())) + .collect::>(), + )), + _ => Err(crate::Error::from( + "Attempted to convert non-Object JSON into an Event.", + )), + } + } +} + +impl TryInto for Event { + type Error = serde_json::Error; + + fn try_into(self) -> Result { + match self { + Event::Log(fields) => serde_json::to_value(fields), + Event::Metric(metric) => serde_json::to_value(metric), + } + } +} + impl From for Event { fn from(proto: proto::EventWrapper) -> Self { let event = proto.event.unwrap(); @@ -440,7 +187,7 @@ impl From for Event { .filter_map(|(k, v)| decode_value(v).map(|value| (k, value))) .collect::>(); - Event::Log(LogEvent { fields }) + Event::Log(LogEvent::from(fields)) } EventProto::Metric(proto) => { let kind = match proto.kind() { @@ -540,8 +287,8 @@ fn encode_array(items: Vec) -> proto::ValueArray { impl From for proto::EventWrapper { fn from(event: Event) -> Self { match event { - Event::Log(LogEvent { fields }) => { - let fields = fields + Event::Log(log_event) => { + let fields = log_event .into_iter() .map(|(k, v)| (k, encode_value(v))) .collect::>(); @@ -631,30 +378,16 @@ impl From for proto::EventWrapper { } } -// TODO: should probably get rid of this -impl From for Vec { - fn from(event: Event) -> Vec { - event - .into_log() - .remove(&log_schema().message_key()) - .unwrap() - .as_bytes() - .to_vec() - } -} - impl From for Event { fn from(message: Bytes) -> Self { - let mut event = Event::Log(LogEvent { - fields: BTreeMap::new(), - }); + let mut event = Event::Log(LogEvent::from(BTreeMap::new())); event .as_mut_log() - .insert(log_schema().message_key().clone(), message); + .insert(log_schema().message_key(), message); event .as_mut_log() - .insert(log_schema().timestamp_key().clone(), Utc::now()); + .insert(log_schema().timestamp_key(), Utc::now()); event } @@ -686,7 +419,7 @@ impl From for Event { #[cfg(test)] mod test { - use super::{Atom, Event, LogSchema, Value}; + use super::*; use regex::Regex; use std::collections::HashSet; @@ -700,7 +433,7 @@ mod test { "message": "raw log line", "foo": "bar", "bar": "baz", - "timestamp": event.as_log().get(&super::log_schema().timestamp_key()), + "timestamp": event.as_log().get(&log_schema().timestamp_key()), }); let actual_all = serde_json::to_value(event.as_log().all_fields()).unwrap(); @@ -778,13 +511,4 @@ mod test { ] ); } - - #[test] - fn partial_log_schema() { - let toml = r#" -message_key = "message" -timestamp_key = "timestamp" -"#; - let _ = toml::from_str::(toml).unwrap(); - } } diff --git a/src/event/value.rs b/src/event/value.rs new file mode 100644 index 00000000000000..c0784ff6c0b974 --- /dev/null +++ b/src/event/value.rs @@ -0,0 +1,185 @@ +use crate::event::timestamp_to_string; +use bytes::Bytes; +use chrono::{DateTime, Utc}; +use serde::{Serialize, Serializer}; +use std::collections::BTreeMap; + +#[derive(PartialEq, Debug, Clone)] +pub enum Value { + Bytes(Bytes), + Integer(i64), + Float(f64), + Boolean(bool), + Timestamp(DateTime), + Map(BTreeMap), + Array(Vec), + Null, +} + +impl Serialize for Value { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + match &self { + Value::Integer(i) => serializer.serialize_i64(*i), + Value::Float(f) => serializer.serialize_f64(*f), + Value::Boolean(b) => serializer.serialize_bool(*b), + Value::Bytes(_) | Value::Timestamp(_) => { + serializer.serialize_str(&self.to_string_lossy()) + } + Value::Map(m) => serializer.collect_map(m), + Value::Array(a) => serializer.collect_seq(a), + Value::Null => serializer.serialize_none(), + } + } +} + +impl From for Value { + fn from(bytes: Bytes) -> Self { + Value::Bytes(bytes) + } +} + +impl From> for Value { + fn from(bytes: Vec) -> Self { + Value::Bytes(bytes.into()) + } +} + +impl From for Value { + fn from(string: String) -> Self { + Value::Bytes(string.into()) + } +} + +// We only enable this in testing for convenience, since `"foo"` is a `&str`. +// In normal operation, it's better to let the caller decide where to clone and when, rather than +// hiding this from them. +#[cfg(test)] +impl From<&str> for Value { + fn from(s: &str) -> Self { + Value::Bytes(Vec::from(s.as_bytes()).into()) + } +} + +impl From> for Value { + fn from(timestamp: DateTime) -> Self { + Value::Timestamp(timestamp) + } +} + +impl From for Value { + fn from(value: f32) -> Self { + Value::Float(f64::from(value)) + } +} + +impl From for Value { + fn from(value: f64) -> Self { + Value::Float(value) + } +} + +impl From> for Value { + fn from(value: BTreeMap) -> Self { + Value::Map(value) + } +} + +impl From> for Value { + fn from(value: Vec) -> Self { + Value::Array(value) + } +} + +macro_rules! impl_valuekind_from_integer { + ($t:ty) => { + impl From<$t> for Value { + fn from(value: $t) -> Self { + Value::Integer(value as i64) + } + } + }; +} + +impl_valuekind_from_integer!(i64); +impl_valuekind_from_integer!(i32); +impl_valuekind_from_integer!(i16); +impl_valuekind_from_integer!(i8); +impl_valuekind_from_integer!(isize); + +impl From for Value { + fn from(value: bool) -> Self { + Value::Boolean(value) + } +} + +impl From for Value { + fn from(json_value: serde_json::Value) -> Self { + match json_value { + serde_json::Value::Bool(b) => Value::Boolean(b), + serde_json::Value::Number(n) => { + if let Some(i) = n.as_i64() { + Value::Integer(i) + } else if let Some(f) = n.as_f64() { + Value::Float(f) + } else { + Value::Bytes(n.to_string().into()) + } + } + serde_json::Value::String(s) => Value::Bytes(Bytes::from(s)), + serde_json::Value::Object(obj) => Value::Map( + obj.into_iter() + .map(|(key, value)| (key, Value::from(value))) + .collect(), + ), + serde_json::Value::Array(arr) => { + Value::Array(arr.into_iter().map(Value::from).collect()) + } + serde_json::Value::Null => Value::Null, + } + } +} + +impl Value { + // TODO: return Cow + pub fn to_string_lossy(&self) -> String { + match self { + Value::Bytes(bytes) => String::from_utf8_lossy(&bytes).into_owned(), + Value::Timestamp(timestamp) => timestamp_to_string(timestamp), + Value::Integer(num) => format!("{}", num), + Value::Float(num) => format!("{}", num), + Value::Boolean(b) => format!("{}", b), + Value::Map(map) => serde_json::to_string(map).expect("Cannot serialize map"), + Value::Array(arr) => serde_json::to_string(arr).expect("Cannot serialize array"), + Value::Null => "".to_string(), + } + } + + pub fn as_bytes(&self) -> Bytes { + match self { + Value::Bytes(bytes) => bytes.clone(), // cloning a Bytes is cheap + Value::Timestamp(timestamp) => Bytes::from(timestamp_to_string(timestamp)), + Value::Integer(num) => Bytes::from(format!("{}", num)), + Value::Float(num) => Bytes::from(format!("{}", num)), + Value::Boolean(b) => Bytes::from(format!("{}", b)), + Value::Map(map) => Bytes::from(serde_json::to_vec(map).expect("Cannot serialize map")), + Value::Array(arr) => { + Bytes::from(serde_json::to_vec(arr).expect("Cannot serialize array")) + } + Value::Null => Bytes::from(""), + } + } + + pub fn into_bytes(self) -> Bytes { + self.as_bytes() + } + + pub fn as_timestamp(&self) -> Option<&DateTime> { + match &self { + Value::Timestamp(ts) => Some(ts), + _ => None, + } + } +} diff --git a/src/internal_events/dedupe.rs b/src/internal_events/dedupe.rs new file mode 100644 index 00000000000000..39df178d7c76ba --- /dev/null +++ b/src/internal_events/dedupe.rs @@ -0,0 +1,36 @@ +use super::InternalEvent; +use metrics::counter; + +#[derive(Debug)] +pub(crate) struct DedupeEventProcessed; + +impl InternalEvent for DedupeEventProcessed { + fn emit_metrics(&self) { + counter!("events_processed", 1, + "component_kind" => "transform", + "component_type" => "dedupe", + ); + } +} + +#[derive(Debug)] +pub(crate) struct DedupeEventDiscarded { + pub event: crate::Event, +} + +impl InternalEvent for DedupeEventDiscarded { + fn emit_logs(&self) { + warn!( + message = "Encountered duplicate event; discarding.", + rate_limit_secs = 30 + ); + trace!(message = "Encountered duplicate event; discarding.", event = ?self.event); + } + + fn emit_metrics(&self) { + counter!("duplicate_events_discarded", 1, + "component_kind" => "transform", + "component_type" => "dedupe", + ); + } +} diff --git a/src/internal_events/mod.rs b/src/internal_events/mod.rs index 4faa99f4d3c3a3..c052608ac3738b 100644 --- a/src/internal_events/mod.rs +++ b/src/internal_events/mod.rs @@ -10,6 +10,8 @@ mod blackhole; mod coercer; #[cfg(feature = "transforms-concat")] mod concat; +#[cfg(feature = "transforms-dedupe")] +mod dedupe; #[cfg(feature = "sources-docker")] mod docker; mod elasticsearch; @@ -33,6 +35,8 @@ mod lua; mod process; #[cfg(feature = "sources-prometheus")] mod prometheus; +#[cfg(feature = "transforms-reduce")] +mod reduce; #[cfg(feature = "transforms-regex_parser")] mod regex_parser; mod remap; @@ -61,6 +65,8 @@ mod syslog; #[cfg(feature = "transforms-tag_cardinality_limit")] mod tag_cardinality_limit; mod tcp; +#[cfg(feature = "transforms-tokenizer")] +mod tokenizer; mod unix; mod vector; #[cfg(feature = "wasm")] @@ -78,6 +84,8 @@ pub use self::blackhole::*; pub(crate) use self::coercer::*; #[cfg(feature = "transforms-concat")] pub use self::concat::*; +#[cfg(feature = "transforms-dedupe")] +pub(crate) use self::dedupe::*; #[cfg(feature = "sources-docker")] pub use self::docker::*; pub use self::elasticsearch::*; @@ -102,6 +110,8 @@ pub use self::lua::*; pub use self::process::*; #[cfg(feature = "sources-prometheus")] pub use self::prometheus::*; +#[cfg(feature = "transforms-reduce")] +pub(crate) use self::reduce::*; #[cfg(feature = "transforms-regex_parser")] pub(crate) use self::regex_parser::*; pub use self::remap::*; @@ -126,6 +136,8 @@ pub use self::syslog::*; #[cfg(feature = "transforms-tag_cardinality_limit")] pub(crate) use self::tag_cardinality_limit::*; pub use self::tcp::*; +#[cfg(feature = "transforms-tokenizer")] +pub(crate) use self::tokenizer::*; pub use self::unix::*; pub use self::vector::*; #[cfg(feature = "wasm")] diff --git a/src/internal_events/reduce.rs b/src/internal_events/reduce.rs new file mode 100644 index 00000000000000..22b25c0077e8a4 --- /dev/null +++ b/src/internal_events/reduce.rs @@ -0,0 +1,26 @@ +use super::InternalEvent; +use metrics::counter; + +#[derive(Debug)] +pub(crate) struct ReduceEventProcessed; + +impl InternalEvent for ReduceEventProcessed { + fn emit_metrics(&self) { + counter!("events_processed", 1, + "component_kind" => "transform", + "component_type" => "reduce", + ); + } +} + +#[derive(Debug)] +pub(crate) struct ReduceStaleEventFlushed; + +impl InternalEvent for ReduceStaleEventFlushed { + fn emit_metrics(&self) { + counter!("stale_events_flushed", 1, + "component_kind" => "transform", + "component_type" => "reduce", + ); + } +} diff --git a/src/internal_events/tokenizer.rs b/src/internal_events/tokenizer.rs new file mode 100644 index 00000000000000..e72944ff9b18b9 --- /dev/null +++ b/src/internal_events/tokenizer.rs @@ -0,0 +1,63 @@ +use super::InternalEvent; +use metrics::counter; +use string_cache::DefaultAtom as Atom; + +#[derive(Debug)] +pub(crate) struct TokenizerEventProcessed; + +impl InternalEvent for TokenizerEventProcessed { + fn emit_metrics(&self) { + counter!("events_processed", 1, + "component_kind" => "transform", + "component_type" => "tokenizer", + ); + } +} + +#[derive(Debug)] +pub(crate) struct TokenizerFieldMissing<'a> { + pub field: &'a Atom, +} + +impl<'a> InternalEvent for TokenizerFieldMissing<'a> { + fn emit_logs(&self) { + debug!( + message = "Field does not exist.", + field = %self.field, + rate_limit_secs = 10 + ); + } + + fn emit_metrics(&self) { + counter!("processing_errors", 1, + "component_kind" => "transform", + "component_type" => "tokenizer", + "error_type" => "field_missing", + ); + } +} + +#[derive(Debug)] +pub(crate) struct TokenizerConvertFailed<'a> { + pub field: &'a Atom, + pub error: crate::types::Error, +} + +impl<'a> InternalEvent for TokenizerConvertFailed<'a> { + fn emit_logs(&self) { + debug!( + message = "Could not convert types.", + field = %self.field, + error = %self.error, + rate_limit_secs = 10 + ); + } + + fn emit_metrics(&self) { + counter!("processing_errors", 1, + "component_kind" => "transform", + "component_type" => "tokenizer", + "error_type" => "convert_failed", + ); + } +} diff --git a/src/main.rs b/src/main.rs index bb20fe88b4c275..cc4ea9ade0e339 100644 --- a/src/main.rs +++ b/src/main.rs @@ -118,7 +118,7 @@ fn main() { std::process::exit(exitcode::CONFIG); }); - vector::event::LOG_SCHEMA + crate::config::LOG_SCHEMA .set(config.global.log_schema.clone()) .expect("Couldn't set schema"); diff --git a/src/sinks/aws_cloudwatch_logs/mod.rs b/src/sinks/aws_cloudwatch_logs/mod.rs index e44301581d0d88..efc32e3332834b 100644 --- a/src/sinks/aws_cloudwatch_logs/mod.rs +++ b/src/sinks/aws_cloudwatch_logs/mod.rs @@ -1,9 +1,9 @@ mod request; use crate::{ - config::{DataType, SinkConfig, SinkContext}, + config::{log_schema, DataType, SinkConfig, SinkContext}, dns::Resolver, - event::{self, Event, LogEvent, Value}, + event::{Event, LogEvent, Value}, region::RegionOrEndpoint, sinks::util::{ encoding::{EncodingConfig, EncodingConfiguration}, @@ -407,7 +407,7 @@ fn encode_log( mut log: LogEvent, encoding: &EncodingConfig, ) -> Result { - let timestamp = match log.remove(&event::log_schema().timestamp_key()) { + let timestamp = match log.remove(&log_schema().timestamp_key()) { Some(Value::Timestamp(ts)) => ts.timestamp_millis(), _ => Utc::now().timestamp_millis(), }; @@ -415,7 +415,7 @@ fn encode_log( let message = match encoding.codec() { Encoding::Json => serde_json::to_string(&log).unwrap(), Encoding::Text => log - .get(&event::log_schema().message_key()) + .get(&log_schema().message_key()) .map(|v| v.to_string_lossy()) .unwrap_or_else(|| "".into()), }; @@ -661,7 +661,7 @@ mod tests { use super::*; use crate::{ dns::Resolver, - event::{self, Event, Value}, + event::{Event, Value}, region::RegionOrEndpoint, }; use std::collections::HashMap; @@ -785,7 +785,7 @@ mod tests { event.insert("key", "value"); let encoded = encode_log(event.clone(), &Encoding::Json.into()).unwrap(); - let ts = if let Value::Timestamp(ts) = event[&event::log_schema().timestamp_key()] { + let ts = if let Value::Timestamp(ts) = event[&log_schema().timestamp_key()] { ts.timestamp_millis() } else { panic!() @@ -800,7 +800,7 @@ mod tests { event.insert("key", "value"); let encoded = encode_log(event, &Encoding::Json.into()).unwrap(); let map: HashMap = serde_json::from_str(&encoded.message[..]).unwrap(); - assert!(map.get(&event::log_schema().timestamp_key()).is_none()); + assert!(map.get(&log_schema().timestamp_key()).is_none()); } #[test] @@ -820,7 +820,7 @@ mod tests { let mut event = Event::new_empty_log(); event .as_mut_log() - .insert(&event::log_schema().timestamp_key(), timestamp); + .insert(&log_schema().timestamp_key(), timestamp); encode_log(event.into_log(), &Encoding::Text.into()).unwrap() }) .collect(); @@ -851,7 +851,7 @@ mod integration_tests { use rusoto_logs::{CloudWatchLogs, CreateLogGroupRequest, GetLogEventsRequest}; use std::convert::TryFrom; - const GROUP_NAME: &'static str = "vector-cw"; + const GROUP_NAME: &str = "vector-cw"; #[tokio::test] async fn cloudwatch_insert_log_event() { @@ -930,10 +930,9 @@ mod integration_tests { if doit { let timestamp = chrono::Utc::now() - chrono::Duration::days(1); - event.as_mut_log().insert( - event::log_schema().timestamp_key(), - Value::Timestamp(timestamp), - ); + event + .as_mut_log() + .insert(log_schema().timestamp_key(), Value::Timestamp(timestamp)); } doit = true; @@ -994,7 +993,7 @@ mod integration_tests { let mut event = Event::from(line.clone()); event .as_mut_log() - .insert(event::log_schema().timestamp_key(), now + offset); + .insert(log_schema().timestamp_key(), now + offset); events.push(event); line }; diff --git a/src/sinks/aws_kinesis_firehose.rs b/src/sinks/aws_kinesis_firehose.rs index d1c5851e1ef98a..ccda2141094215 100644 --- a/src/sinks/aws_kinesis_firehose.rs +++ b/src/sinks/aws_kinesis_firehose.rs @@ -1,7 +1,7 @@ use crate::{ config::{DataType, SinkConfig, SinkContext, SinkDescription}, dns::Resolver, - event::{self, Event}, + event::Event, region::RegionOrEndpoint, sinks::util::{ encoding::{EncodingConfig, EncodingConfiguration}, @@ -234,7 +234,7 @@ fn encode_event(mut event: Event, encoding: &EncodingConfig) -> Option Encoding::Json => serde_json::to_vec(&log).expect("Error encoding event as json."), Encoding::Text => log - .get(&event::log_schema().message_key()) + .get(&crate::config::log_schema().message_key()) .map(|v| v.as_bytes().to_vec()) .unwrap_or_default(), }; @@ -247,7 +247,7 @@ fn encode_event(mut event: Event, encoding: &EncodingConfig) -> Option #[cfg(test)] mod tests { use super::*; - use crate::event::{self, Event}; + use crate::event::Event; use std::collections::BTreeMap; #[test] @@ -267,7 +267,10 @@ mod tests { let map: BTreeMap = serde_json::from_slice(&event.data[..]).unwrap(); - assert_eq!(map[&event::log_schema().message_key().to_string()], message); + assert_eq!( + map[&crate::config::log_schema().message_key().to_string()], + message + ); assert_eq!(map["key"], "value".to_string()); } } diff --git a/src/sinks/aws_kinesis_streams.rs b/src/sinks/aws_kinesis_streams.rs index ce62f7d873adcc..df7268a62b9ddd 100644 --- a/src/sinks/aws_kinesis_streams.rs +++ b/src/sinks/aws_kinesis_streams.rs @@ -1,7 +1,7 @@ use crate::{ - config::{DataType, SinkConfig, SinkContext, SinkDescription}, + config::{log_schema, DataType, SinkConfig, SinkContext, SinkDescription}, dns::Resolver, - event::{self, Event}, + event::Event, internal_events::AwsKinesisStreamsEventSent, region::RegionOrEndpoint, sinks::util::{ @@ -276,7 +276,7 @@ fn encode_event( let data = match encoding.codec() { Encoding::Json => serde_json::to_vec(&log).expect("Error encoding event as json."), Encoding::Text => log - .get(&event::log_schema().message_key()) + .get(&log_schema().message_key()) .map(|v| v.as_bytes().to_vec()) .unwrap_or_default(), }; @@ -305,10 +305,7 @@ fn gen_partition_key() -> String { #[cfg(test)] mod tests { use super::*; - use crate::{ - event::{self, Event}, - test_util::random_string, - }; + use crate::{event::Event, test_util::random_string}; use std::collections::BTreeMap; #[test] @@ -328,7 +325,7 @@ mod tests { let map: BTreeMap = serde_json::from_slice(&event.data[..]).unwrap(); - assert_eq!(map[&event::log_schema().message_key().to_string()], message); + assert_eq!(map[&log_schema().message_key().to_string()], message); assert_eq!(map["key"], "value".to_string()); } diff --git a/src/sinks/aws_s3.rs b/src/sinks/aws_s3.rs index b1c11ede7f8578..8bdb4af79aee28 100644 --- a/src/sinks/aws_s3.rs +++ b/src/sinks/aws_s3.rs @@ -1,7 +1,7 @@ use crate::{ - config::{DataType, SinkConfig, SinkContext, SinkDescription}, + config::{log_schema, DataType, SinkConfig, SinkContext, SinkDescription}, dns::Resolver, - event::{self, Event}, + event::Event, region::RegionOrEndpoint, serde::to_string, sinks::util::{ @@ -396,7 +396,7 @@ fn encode_event( .expect("Failed to encode event as json, this is a bug!"), Encoding::Text => { let mut bytes = log - .get(&event::log_schema().message_key()) + .get(&log_schema().message_key()) .map(|v| v.as_bytes().to_vec()) .unwrap_or_default(); bytes.push(b'\n'); @@ -410,7 +410,7 @@ fn encode_event( #[cfg(test)] mod tests { use super::*; - use crate::event::{self, Event}; + use crate::event::Event; use std::collections::BTreeMap; @@ -442,7 +442,7 @@ mod tests { let (bytes, _) = bytes.into_parts(); let map: BTreeMap = serde_json::from_slice(&bytes[..]).unwrap(); - assert_eq!(map[&event::log_schema().message_key().to_string()], message); + assert_eq!(map[&log_schema().message_key().to_string()], message); assert_eq!(map["key"], "value".to_string()); } @@ -465,7 +465,7 @@ mod tests { let (bytes, _) = bytes.into_parts(); let map: BTreeMap = serde_json::from_slice(&bytes[..]).unwrap(); - assert_eq!(map[&event::log_schema().message_key().to_string()], message); + assert_eq!(map[&log_schema().message_key().to_string()], message); // assert_eq!(map["key"], "value".to_string()); } diff --git a/src/sinks/blackhole.rs b/src/sinks/blackhole.rs index 37c1f214a038d8..d39a3224a9e374 100644 --- a/src/sinks/blackhole.rs +++ b/src/sinks/blackhole.rs @@ -2,7 +2,7 @@ use crate::{ buffers::Acker, config::{DataType, SinkConfig, SinkContext, SinkDescription}, emit, - event::{self, Event}, + event::Event, internal_events::BlackholeEventReceived, }; use futures::{future, FutureExt}; @@ -61,7 +61,7 @@ impl Sink for BlackholeSink { fn start_send(&mut self, item: Self::SinkItem) -> StartSend { let message_len = match item { Event::Log(log) => log - .get(&event::log_schema().message_key()) + .get(&crate::config::log_schema().message_key()) .map(|v| v.as_bytes().len()) .unwrap_or(0), Event::Metric(metric) => serde_json::to_string(&metric).map(|v| v.len()).unwrap_or(0), diff --git a/src/sinks/clickhouse.rs b/src/sinks/clickhouse.rs index 21ccf2adfb3629..e3fa182b3dd5d0 100644 --- a/src/sinks/clickhouse.rs +++ b/src/sinks/clickhouse.rs @@ -237,8 +237,7 @@ mod tests { mod integration_tests { use super::*; use crate::{ - config::{SinkConfig, SinkContext}, - event, + config::{log_schema, SinkConfig, SinkContext}, event::Event, sinks::util::encoding::TimestampFormat, test_util::{random_string, trace_init}, @@ -337,11 +336,11 @@ mod integration_tests { let exp_event = input_event.as_mut_log(); exp_event.insert( - event::log_schema().timestamp_key().clone(), + log_schema().timestamp_key().clone(), format!( "{}", exp_event - .get(&event::log_schema().timestamp_key()) + .get(&log_schema().timestamp_key()) .unwrap() .as_timestamp() .unwrap() @@ -397,11 +396,11 @@ timestamp_format = "unix""#, let exp_event = input_event.as_mut_log(); exp_event.insert( - event::log_schema().timestamp_key().clone(), + log_schema().timestamp_key().clone(), format!( "{}", exp_event - .get(&event::log_schema().timestamp_key()) + .get(&log_schema().timestamp_key()) .unwrap() .as_timestamp() .unwrap() diff --git a/src/sinks/console.rs b/src/sinks/console.rs index 121db786eb4d34..eb1ad21f1dfccb 100644 --- a/src/sinks/console.rs +++ b/src/sinks/console.rs @@ -1,6 +1,6 @@ use crate::{ config::{DataType, SinkConfig, SinkContext, SinkDescription}, - event::{self, Event}, + event::Event, sinks::util::{ encoding::{EncodingConfig, EncodingConfiguration}, StreamSink, @@ -84,7 +84,7 @@ fn encode_event( Encoding::Json => serde_json::to_string(&log), Encoding::Text => { let s = log - .get(&event::log_schema().message_key()) + .get(&crate::config::log_schema().message_key()) .map(|v| v.to_string_lossy()) .unwrap_or_else(|| "".into()); Ok(s) diff --git a/src/sinks/datadog/logs.rs b/src/sinks/datadog/logs.rs index 5c0c63d7a1ccb5..2a658c1826c7d3 100644 --- a/src/sinks/datadog/logs.rs +++ b/src/sinks/datadog/logs.rs @@ -1,6 +1,6 @@ use crate::{ - config::{DataType, SinkConfig, SinkContext, SinkDescription}, - event::{log_schema, Event}, + config::{log_schema, DataType, SinkConfig, SinkContext, SinkDescription}, + event::Event, sinks::{ util::{ self, diff --git a/src/sinks/elasticsearch.rs b/src/sinks/elasticsearch.rs index 593335473182ff..08ebb3713089ec 100644 --- a/src/sinks/elasticsearch.rs +++ b/src/sinks/elasticsearch.rs @@ -596,7 +596,6 @@ mod integration_tests { use crate::{ config::{SinkConfig, SinkContext}, dns::Resolver, - event, sinks::util::http::HttpClient, test_util::{random_events_with_stream, random_string, trace_init}, tls::TlsOptions, @@ -677,7 +676,7 @@ mod integration_tests { let expected = json!({ "message": "raw log line", "foo": "bar", - "timestamp": input_event.as_log()[&event::log_schema().timestamp_key()], + "timestamp": input_event.as_log()[&crate::config::log_schema().timestamp_key()], }); assert_eq!(expected, value); } diff --git a/src/sinks/file/mod.rs b/src/sinks/file/mod.rs index 935622fa788d14..6d819c5e51f8a1 100644 --- a/src/sinks/file/mod.rs +++ b/src/sinks/file/mod.rs @@ -1,7 +1,7 @@ use crate::expiring_hash_map::ExpiringHashMap; use crate::{ - config::{DataType, SinkConfig, SinkContext, SinkDescription}, - event::{self, Event}, + config::{log_schema, DataType, SinkConfig, SinkContext, SinkDescription}, + event::Event, sinks::util::{ encoding::{EncodingConfigWithDefault, EncodingConfiguration}, StreamSink, @@ -296,7 +296,7 @@ pub fn encode_event(encoding: &EncodingConfigWithDefault, mut event: E match encoding.codec() { Encoding::Ndjson => serde_json::to_vec(&log).expect("Unable to encode event as JSON."), Encoding::Text => log - .get(&event::log_schema().message_key()) + .get(&log_schema().message_key()) .map(|v| v.to_string_lossy().into_bytes()) .unwrap_or_default(), } @@ -325,12 +325,9 @@ impl StreamingSink for FileSink { #[cfg(test)] mod tests { use super::*; - use crate::{ - event, - test_util::{ - lines_from_file, lines_from_gzip_file, random_events_with_stream, - random_lines_with_stream, temp_dir, temp_file, trace_init, - }, + use crate::test_util::{ + lines_from_file, lines_from_gzip_file, random_events_with_stream, random_lines_with_stream, + temp_dir, temp_file, trace_init, }; use futures::stream; use std::convert::TryInto; @@ -437,35 +434,35 @@ mod tests { ]; assert_eq!( - input[0].as_log()[&event::log_schema().message_key()], + input[0].as_log()[&log_schema().message_key()], From::<&str>::from(&output[0][0]) ); assert_eq!( - input[1].as_log()[&event::log_schema().message_key()], + input[1].as_log()[&log_schema().message_key()], From::<&str>::from(&output[1][0]) ); assert_eq!( - input[2].as_log()[&event::log_schema().message_key()], + input[2].as_log()[&log_schema().message_key()], From::<&str>::from(&output[0][1]) ); assert_eq!( - input[3].as_log()[&event::log_schema().message_key()], + input[3].as_log()[&log_schema().message_key()], From::<&str>::from(&output[3][0]) ); assert_eq!( - input[4].as_log()[&event::log_schema().message_key()], + input[4].as_log()[&log_schema().message_key()], From::<&str>::from(&output[2][0]) ); assert_eq!( - input[5].as_log()[&event::log_schema().message_key()], + input[5].as_log()[&log_schema().message_key()], From::<&str>::from(&output[2][1]) ); assert_eq!( - input[6].as_log()[&event::log_schema().message_key()], + input[6].as_log()[&log_schema().message_key()], From::<&str>::from(&output[4][0]) ); assert_eq!( - input[7].as_log()[&event::log_schema().message_key()], + input[7].as_log()[&log_schema().message_key()], From::<&str>::from(&output[5][0]) ); } diff --git a/src/sinks/gcp/cloud_storage.rs b/src/sinks/gcp/cloud_storage.rs index d0186e57045247..a6cc387b27a119 100644 --- a/src/sinks/gcp/cloud_storage.rs +++ b/src/sinks/gcp/cloud_storage.rs @@ -1,7 +1,7 @@ use super::{healthcheck_response, GcpAuthConfig, GcpCredentials, Scope}; use crate::{ config::{DataType, SinkConfig, SinkContext, SinkDescription}, - event::{self, Event}, + event::Event, serde::to_string, sinks::{ util::{ @@ -423,7 +423,7 @@ fn encode_event( .expect("Failed to encode event as json, this is a bug!"), Encoding::Text => { let mut bytes = log - .get(&event::log_schema().message_key()) + .get(&crate::config::log_schema().message_key()) .map(|v| v.as_bytes().to_vec()) .unwrap_or_default(); bytes.push(b'\n'); @@ -464,7 +464,7 @@ impl RetryLogic for GcsRetryLogic { #[cfg(test)] mod tests { use super::*; - use crate::event::{self, Event}; + use crate::event::Event; use std::collections::HashMap; @@ -497,7 +497,7 @@ mod tests { let map: HashMap = serde_json::from_slice(&bytes[..]).unwrap(); assert_eq!( - map.get(&event::log_schema().message_key().to_string()), + map.get(&crate::config::log_schema().message_key().to_string()), Some(&message) ); assert_eq!(map["key"], "value".to_string()); diff --git a/src/sinks/honeycomb.rs b/src/sinks/honeycomb.rs index 33a1d69dd4ab7e..5a6fe7c3f8ff07 100644 --- a/src/sinks/honeycomb.rs +++ b/src/sinks/honeycomb.rs @@ -1,6 +1,6 @@ use crate::{ - config::{DataType, SinkConfig, SinkContext, SinkDescription}, - event::{log_schema, Event, Value}, + config::{log_schema, DataType, SinkConfig, SinkContext, SinkDescription}, + event::{Event, Value}, sinks::util::{ http::{BatchedHttpSink, HttpClient, HttpSink}, BatchConfig, BatchSettings, BoxedRawValue, JsonArrayBuffer, TowerRequestConfig, UriSerde, diff --git a/src/sinks/http.rs b/src/sinks/http.rs index 7cfc390f9fa9dc..4d8ac58c784846 100644 --- a/src/sinks/http.rs +++ b/src/sinks/http.rs @@ -1,6 +1,6 @@ use crate::{ config::{DataType, SinkConfig, SinkContext, SinkDescription}, - event::{self, Event}, + event::Event, sinks::util::{ encoding::{EncodingConfig, EncodingConfiguration}, http::{Auth, BatchedHttpSink, HttpClient, HttpSink}, @@ -157,7 +157,7 @@ impl HttpSink for HttpSinkConfig { let body = match &self.encoding.codec() { Encoding::Text => { - if let Some(v) = event.get(&event::log_schema().message_key()) { + if let Some(v) = event.get(&crate::config::log_schema().message_key()) { let mut b = v.to_string_lossy().into_bytes(); b.push(b'\n'); b diff --git a/src/sinks/influxdb/logs.rs b/src/sinks/influxdb/logs.rs index e62fc584a54a77..8600fbec4d2367 100644 --- a/src/sinks/influxdb/logs.rs +++ b/src/sinks/influxdb/logs.rs @@ -1,6 +1,6 @@ use crate::{ - config::{DataType, SinkConfig, SinkContext, SinkDescription}, - event::{log_schema, Event, Value}, + config::{log_schema, DataType, SinkConfig, SinkContext, SinkDescription}, + event::{Event, Value}, sinks::{ influxdb::{ encode_namespace, encode_timestamp, healthcheck, influx_line_protocol, diff --git a/src/sinks/kafka.rs b/src/sinks/kafka.rs index 2d58871d897943..1e083e92478fae 100644 --- a/src/sinks/kafka.rs +++ b/src/sinks/kafka.rs @@ -1,7 +1,7 @@ use crate::{ buffers::Acker, - config::{DataType, SinkConfig, SinkContext, SinkDescription}, - event::{self, Event, Value}, + config::{log_schema, DataType, SinkConfig, SinkContext, SinkDescription}, + event::{Event, Value}, kafka::{KafkaAuthConfig, KafkaCompression}, serde::to_string, sinks::util::encoding::{EncodingConfig, EncodingConfigWithDefault, EncodingConfiguration}, @@ -152,8 +152,7 @@ impl Sink for KafkaSink { let mut record = FutureRecord::to(&topic).key(&key).payload(&body[..]); - if let Some(Value::Timestamp(timestamp)) = - item.as_log().get(&event::log_schema().timestamp_key()) + if let Some(Value::Timestamp(timestamp)) = item.as_log().get(&log_schema().timestamp_key()) { record = record.timestamp(timestamp.timestamp_millis()); } @@ -267,7 +266,7 @@ fn encode_event( Encoding::Json => serde_json::to_vec(&event.as_log()).unwrap(), Encoding::Text => event .as_log() - .get(&event::log_schema().message_key()) + .get(&log_schema().message_key()) .map(|v| v.as_bytes().to_vec()) .unwrap_or_default(), }; @@ -278,7 +277,7 @@ fn encode_event( #[cfg(test)] mod tests { use super::*; - use crate::event::{self, Event}; + use crate::event::Event; use std::collections::BTreeMap; #[test] @@ -311,7 +310,7 @@ mod tests { let map: BTreeMap = serde_json::from_slice(&bytes[..]).unwrap(); assert_eq!(&key[..], b"value"); - assert_eq!(map[&event::log_schema().message_key().to_string()], message); + assert_eq!(map[&log_schema().message_key().to_string()], message); assert_eq!(map["key"], "value".to_string()); assert_eq!(map["foo"], "bar".to_string()); } diff --git a/src/sinks/logdna.rs b/src/sinks/logdna.rs index c4a9cd28fb498d..3b8e3a987bd9a8 100644 --- a/src/sinks/logdna.rs +++ b/src/sinks/logdna.rs @@ -1,6 +1,6 @@ use crate::{ config::{DataType, SinkConfig, SinkContext, SinkDescription}, - event::{self, Event}, + event::Event, sinks::util::{ encoding::EncodingConfigWithDefault, http::{Auth, BatchedHttpSink, HttpClient, HttpSink}, @@ -105,10 +105,10 @@ impl HttpSink for LogdnaConfig { let mut log = event.into_log(); let line = log - .remove(&event::log_schema().message_key()) + .remove(&crate::config::log_schema().message_key()) .unwrap_or_else(|| String::from("").into()); let timestamp = log - .remove(&event::log_schema().timestamp_key()) + .remove(&crate::config::log_schema().timestamp_key()) .unwrap_or_else(|| chrono::Utc::now().into()); let mut map = serde_json::map::Map::new(); diff --git a/src/sinks/loki.rs b/src/sinks/loki.rs index 7053a12b10500f..700ccdedc14c84 100644 --- a/src/sinks/loki.rs +++ b/src/sinks/loki.rs @@ -13,7 +13,7 @@ //! does not match, we will add a default label `{agent="vector"}`. use crate::{ - config::{DataType, SinkConfig, SinkContext, SinkDescription}, + config::{log_schema, DataType, SinkConfig, SinkContext, SinkDescription}, event::{self, Event, Value}, sinks::util::{ buffer::loki::{LokiBuffer, LokiEvent, LokiRecord}, @@ -134,15 +134,13 @@ impl HttpSink for LokiConfig { } } - let timestamp = match event.as_log().get(&event::log_schema().timestamp_key()) { + let timestamp = match event.as_log().get(&log_schema().timestamp_key()) { Some(event::Value::Timestamp(ts)) => ts.timestamp_nanos(), _ => chrono::Utc::now().timestamp_nanos(), }; if self.remove_timestamp { - event - .as_mut_log() - .remove(&event::log_schema().timestamp_key()); + event.as_mut_log().remove(&log_schema().timestamp_key()); } self.encoding.apply_rules(&mut event); @@ -152,7 +150,7 @@ impl HttpSink for LokiConfig { Encoding::Text => event .as_log() - .get(&event::log_schema().message_key()) + .get(&log_schema().message_key()) .map(Value::to_string_lossy) .unwrap_or_default(), }; diff --git a/src/sinks/papertrail.rs b/src/sinks/papertrail.rs index 15c4e6562eeb1c..469505cc9ad100 100644 --- a/src/sinks/papertrail.rs +++ b/src/sinks/papertrail.rs @@ -1,6 +1,5 @@ use crate::{ - config::{DataType, SinkConfig, SinkContext, SinkDescription}, - event::log_schema, + config::{log_schema, DataType, SinkConfig, SinkContext, SinkDescription}, sinks::util::{ encoding::{EncodingConfig, EncodingConfiguration}, tcp::TcpSink, diff --git a/src/sinks/pulsar.rs b/src/sinks/pulsar.rs index 4d68bfc9b74fcf..16d902de13857f 100644 --- a/src/sinks/pulsar.rs +++ b/src/sinks/pulsar.rs @@ -1,7 +1,7 @@ use crate::{ buffers::Acker, - config::{DataType, SinkConfig, SinkContext, SinkDescription}, - event::{self, Event}, + config::{log_schema, DataType, SinkConfig, SinkContext, SinkDescription}, + event::Event, sinks::util::encoding::{EncodingConfig, EncodingConfigWithDefault, EncodingConfiguration}, }; use futures::{lock::Mutex, FutureExt, TryFutureExt}; @@ -196,7 +196,7 @@ fn encode_event(item: Event, encoding: &EncodingConfig) -> crate::Resu Ok(match encoding.codec() { Encoding::Json => serde_json::to_vec(&log)?, Encoding::Text => log - .get(&event::log_schema().message_key()) + .get(&log_schema().message_key()) .map(|v| v.as_bytes().to_vec()) .unwrap_or_default(), }) @@ -214,7 +214,7 @@ mod tests { evt.as_mut_log().insert("key", "value"); let result = encode_event(evt, &EncodingConfig::from(Encoding::Json)).unwrap(); let map: HashMap = serde_json::from_slice(&result[..]).unwrap(); - assert_eq!(msg, map[&event::log_schema().message_key().to_string()]); + assert_eq!(msg, map[&log_schema().message_key().to_string()]); } #[test] diff --git a/src/sinks/sematext_logs.rs b/src/sinks/sematext_logs.rs index a42283c53f123d..aa71c2d5d00e3d 100644 --- a/src/sinks/sematext_logs.rs +++ b/src/sinks/sematext_logs.rs @@ -86,11 +86,11 @@ impl SinkConfig for SematextLogsConfig { fn map_timestamp(mut event: Event) -> impl Future { let log = event.as_mut_log(); - if let Some(ts) = log.remove(&crate::event::log_schema().timestamp_key()) { + if let Some(ts) = log.remove(&crate::config::log_schema().timestamp_key()) { log.insert("@timestamp", ts); } - if let Some(host) = log.remove(&crate::event::log_schema().host_key()) { + if let Some(host) = log.remove(&crate::config::log_schema().host_key()) { log.insert("os.host", host); } diff --git a/src/sinks/splunk_hec.rs b/src/sinks/splunk_hec.rs index 1d6a6fe894c5d3..ba8fda882fed93 100644 --- a/src/sinks/splunk_hec.rs +++ b/src/sinks/splunk_hec.rs @@ -1,6 +1,6 @@ use crate::{ - config::{DataType, SinkConfig, SinkContext, SinkDescription}, - event::{self, Event, LogEvent, Value}, + config::{log_schema, DataType, SinkConfig, SinkContext, SinkDescription}, + event::{Event, LogEvent, Value}, internal_events::{ SplunkEventEncodeError, SplunkEventSent, SplunkSourceMissingKeys, SplunkSourceTypeMissingKeys, @@ -76,7 +76,7 @@ pub enum Encoding { } fn default_host_key() -> Atom { - event::LogSchema::default().host_key().clone() + crate::config::LogSchema::default().host_key().clone() } inventory::submit! { @@ -153,7 +153,7 @@ impl HttpSink for HecSinkConfig { let host = event.get(&self.host_key).cloned(); - let timestamp = match event.remove(&event::log_schema().timestamp_key()) { + let timestamp = match event.remove(&log_schema().timestamp_key()) { Some(Value::Timestamp(ts)) => ts, _ => chrono::Utc::now(), }; @@ -168,7 +168,7 @@ impl HttpSink for HecSinkConfig { let event = match self.encoding.codec() { Encoding::Json => json!(event), Encoding::Text => json!(event - .get(&event::log_schema().message_key()) + .get(&log_schema().message_key()) .map(|v| v.to_string_lossy()) .unwrap_or_else(|| "".into())), }; @@ -315,11 +315,11 @@ mod tests { assert_eq!(kv, &"value".to_string()); assert_eq!( - event[&event::log_schema().message_key().to_string()], + event[&log_schema().message_key().to_string()], "hello world".to_string() ); assert!(event - .get(&event::log_schema().timestamp_key().to_string()) + .get(&log_schema().timestamp_key().to_string()) .is_none()); assert_eq!( diff --git a/src/sinks/util/auto_concurrency/controller.rs b/src/sinks/util/auto_concurrency/controller.rs index dc7d4f0163072f..916f95e1230cbe 100644 --- a/src/sinks/util/auto_concurrency/controller.rs +++ b/src/sinks/util/auto_concurrency/controller.rs @@ -5,7 +5,7 @@ use crate::internal_events::{ AutoConcurrencyAveragedRtt, AutoConcurrencyInFlight, AutoConcurrencyLimit, AutoConcurrencyObservedRtt, }; -use crate::sinks::util::retries::RetryLogic; +use crate::sinks::util::retries::{RetryAction, RetryLogic}; #[cfg(test)] use crate::test_util::stats::{TimeHistogram, TimeWeightedSum}; use std::future::Future; @@ -229,8 +229,14 @@ where start: Instant, response: &Result, ) { - let is_back_pressure = match response { - Ok(_) => false, + // It would be better to avoid generating the string in Retry(_) + // just to throw it away here, but it's probably not worth the + // effort. + let response_action = response + .as_ref() + .map(|resp| self.logic.should_retry_response(resp)); + let is_back_pressure = match &response_action { + Ok(action) => matches!(action, RetryAction::Retry(_)), Err(err) => { if let Some(err) = err.downcast_ref::() { self.logic.is_retriable_error(err) @@ -242,7 +248,7 @@ where } }; // Only adjust to the RTT when the request was successfully processed. - let use_rtt = response.is_ok(); + let use_rtt = matches!(response_action, Ok(RetryAction::Successful)); self.adjust_to_response_inner(start, is_back_pressure, use_rtt) } } diff --git a/src/sinks/util/auto_concurrency/tests.rs b/src/sinks/util/auto_concurrency/tests.rs index a5d3c07c7864dd..053539422cdc47 100644 --- a/src/sinks/util/auto_concurrency/tests.rs +++ b/src/sinks/util/auto_concurrency/tests.rs @@ -1,11 +1,7 @@ -// Only run the test suite on unix systems, as the timings on especially -// MacOS are too variable to produce reliable results in these tests. -#![cfg(all(test, not(target_os = "macos"), feature = "sources-generator"))] +#![cfg(all(test, feature = "sources-generator"))] use super::controller::ControllerStatistics; -use super::MAX_CONCURRENCY; use crate::{ - assert_within, config::{self, DataType, SinkConfig, SinkContext}, event::{metric::MetricValue, Event}, metrics::{self, capture_metrics, get_controller}, @@ -17,7 +13,10 @@ use crate::{ Healthcheck, VectorSink, }, sources::generator::GeneratorConfig, - test_util::{start_topology, stats::LevelTimeHistogram}, + test_util::{ + start_topology, + stats::{HistogramStats, LevelTimeHistogram, WeightedSumStats}, + }, }; use core::task::Context; use futures::{ @@ -27,20 +26,29 @@ use futures::{ }; use futures01::Sink; use rand::{distributions::Exp1, prelude::*}; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use snafu::Snafu; -use std::collections::HashMap; -use std::sync::{Arc, Mutex}; -use std::task::Poll; -use std::time::{Duration, Instant}; -use tokio::time::{delay_for, delay_until}; +use std::{ + collections::HashMap, + fs::{read_dir, File}, + io::Read, + path::PathBuf, + sync::{Arc, Mutex}, + task::Poll, +}; +use tokio::time::{self, delay_until, Duration, Instant}; use tower::Service; -#[derive(Copy, Clone, Debug, Default, Serialize)] +#[derive(Copy, Clone, Debug, Default, Deserialize, Serialize)] struct TestParams { + // The number of requests to issue. + requests: usize, + + // The time interval between requests. + interval: Option, + // The delay is the base time every request takes return. - #[serde(default)] - delay: Duration, + delay: f64, // The jitter is the amount of per-request response time randomness, // as a fraction of `delay`. The average response time will be @@ -62,6 +70,13 @@ struct TestParams { // The number of outstanding requests at which requests will be dropped. #[serde(default)] concurrency_drop: usize, + + #[serde(default = "default_in_flight_limit")] + in_flight_limit: InFlightLimit, +} + +fn default_in_flight_limit() -> InFlightLimit { + InFlightLimit::Auto } #[derive(Debug, Default, Serialize)] @@ -160,26 +175,27 @@ impl Service> for TestSink { fn call(&mut self, _request: Vec) -> Self::Future { let now = Instant::now(); let mut stats = self.stats.lock().expect("Poisoned stats lock"); - stats.in_flight.adjust(1, now); + stats.in_flight.adjust(1, now.into()); let in_flight = stats.in_flight.level(); let params = self.params; - let delay = params.delay.mul_f64( - 1.0 + (in_flight - 1) as f64 * params.concurrency_scale - + thread_rng().sample(Exp1) * params.jitter, + let delay = Duration::from_secs_f64( + params.delay + * (1.0 + + (in_flight - 1) as f64 * params.concurrency_scale + + thread_rng().sample(Exp1) * params.jitter), ); - let delay = delay_until((now + delay).into()); if params.concurrency_drop > 0 && in_flight >= params.concurrency_drop { - stats.in_flight.adjust(-1, now); + stats.in_flight.adjust(-1, now.into()); Box::pin(pending()) } else { let stats2 = Arc::clone(&self.stats); Box::pin(async move { - delay.await; + delay_until(now + delay).await; let mut stats = stats2.lock().expect("Poisoned stats lock"); let in_flight = stats.in_flight.level(); - stats.in_flight.adjust(-1, Instant::now()); + stats.in_flight.adjust(-1, Instant::now().into()); if params.concurrency_defer > 0 && in_flight >= params.concurrency_defer { Err(Error::Deferred) @@ -220,26 +236,17 @@ struct Statistics { } #[derive(Debug)] -struct TestData { +struct TestResults { stats: Statistics, cstats: ControllerStatistics, } -async fn run_test(lines: usize, interval: Option, params: TestParams) -> TestData { - run_test4(lines, interval, params, InFlightLimit::Auto).await -} - -async fn run_test4( - lines: usize, - interval: Option, - params: TestParams, - in_flight_limit: InFlightLimit, -) -> TestData { +async fn run_test(params: TestParams) -> TestResults { let _ = metrics::init(); let test_config = TestConfig { request: TowerRequestConfig { - in_flight_limit, + in_flight_limit: params.in_flight_limit, rate_limit_num: Some(9999), timeout_secs: Some(1), ..Default::default() @@ -252,7 +259,8 @@ async fn run_test4( let cstats = Arc::clone(&test_config.controller_stats); let mut config = config::Config::builder(); - let generator = GeneratorConfig::repeat(vec!["line 1".into()], lines, interval); + let generator = + GeneratorConfig::repeat(vec!["line 1".into()], params.requests, params.interval); config.add_source("in", generator); config.add_sink("out", &["in"], test_config); @@ -260,11 +268,16 @@ async fn run_test4( let controller = get_controller().unwrap(); - // Give time for the generator to start and queue all its data. - let delay = interval.unwrap_or(0.0) * (lines as f64) + 1.0; - delay_for(Duration::from_secs_f64(delay)).await; + // Give time for the generator to start and queue all its data, and + // all the requests to resolve to a response. + let delay = params.interval.unwrap_or(0.0) * (params.requests as f64) + 1.0; + // This is crude and dumb, but it works, and the tests run fast and + // the results are highly repeatable. + let msecs = (delay * 1000.0) as usize; + for _ in 0..msecs { + time::advance(Duration::from_millis(1)).await; + } topology.stop().compat().await.unwrap(); - //shutdown_on_idle(rt); let stats = Arc::try_unwrap(stats) .expect("Failed to unwrap stats Arc") @@ -293,7 +306,7 @@ async fn run_test4( matches!(metrics.get("auto_concurrency_averaged_rtt").unwrap().value, MetricValue::Distribution { .. }) ); - if in_flight_limit == InFlightLimit::Auto { + if params.in_flight_limit == InFlightLimit::Auto { assert!( matches!(metrics.get("auto_concurrency_limit").unwrap().value, MetricValue::Distribution { .. }) @@ -304,336 +317,203 @@ async fn run_test4( MetricValue::Distribution { .. }) ); - TestData { stats, cstats } + TestResults { stats, cstats } } -#[tokio::test] -async fn fixed_concurrency() { - // Simulate a very jittery link, but with a fixed concurrency - let results = run_test4( - 200, - None, - TestParams { - delay: Duration::from_millis(100), - jitter: 0.5, - ..Default::default() - }, - InFlightLimit::Fixed(10), - ) - .await; - - let in_flight = results.stats.in_flight.stats().unwrap(); - assert_eq!(in_flight.max, 10, "{:#?}", results); - assert_eq!(in_flight.mode, 10, "{:#?}", results); - - // Even with jitter, the concurrency limit should never vary - let concurrency_limit = results.cstats.concurrency_limit.stats().unwrap(); - assert_eq!(concurrency_limit.min, 10, "{:#?}", results); - assert_eq!(concurrency_limit.max, 10, "{:#?}", results); - let in_flight = results.cstats.in_flight.stats().unwrap(); - assert_eq!(in_flight.max, 10, "{:#?}", results); - assert_eq!(in_flight.mode, 10, "{:#?}", results); +#[derive(Debug)] +enum FailureMode { + ExceededMinimum, + ExceededMaximum, } -#[tokio::test] -async fn constant_link() { - let results = run_test( - 500, - None, - TestParams { - delay: Duration::from_millis(100), - ..Default::default() - }, - ) - .await; - - // With a constant response time link and enough responses, the - // limiter will ramp up towards the maximum concurrency. - let in_flight = results.stats.in_flight.stats().unwrap(); - assert_within!(in_flight.max, 10, MAX_CONCURRENCY, "{:#?}", results); - assert_within!( - in_flight.mean, - 6.0, - MAX_CONCURRENCY as f64, - "{:#?}", - results - ); +#[derive(Debug)] +struct Failure { + stat_name: String, + mode: FailureMode, + value: f64, + reference: f64, +} - let observed_rtt = results.cstats.observed_rtt.stats().unwrap(); - assert_within!(observed_rtt.min, 0.090, 0.120, "{:#?}", results); - assert_within!(observed_rtt.max, 0.090, 0.130, "{:#?}", results); - assert_within!(observed_rtt.mean, 0.090, 0.120, "{:#?}", results); - let averaged_rtt = results.cstats.averaged_rtt.stats().unwrap(); - assert_within!(averaged_rtt.min, 0.090, 0.120, "{:#?}", results); - assert_within!(averaged_rtt.max, 0.090, 0.130, "{:#?}", results); - assert_within!(averaged_rtt.mean, 0.090, 0.120, "{:#?}", results); - let concurrency_limit = results.cstats.concurrency_limit.stats().unwrap(); - assert_within!(concurrency_limit.max, 9, MAX_CONCURRENCY, "{:#?}", results); - assert_within!( - concurrency_limit.mean, - 5.0, - MAX_CONCURRENCY as f64, - "{:#?}", - results - ); - let c_in_flight = results.cstats.in_flight.stats().unwrap(); - assert_within!(c_in_flight.max, 9, MAX_CONCURRENCY, "{:#?}", results); - assert_within!( - c_in_flight.mean, - 6.5, - MAX_CONCURRENCY as f64, - "{:#?}", - results - ); +#[derive(Clone, Copy, Debug, Deserialize)] +struct Range(f64, f64); + +impl Range { + fn assert_usize(&self, value: usize, name1: &str, name2: &str) -> Option { + if value < self.0 as usize { + Some(Failure { + stat_name: format!("{} {}", name1, name2), + mode: FailureMode::ExceededMinimum, + value: value as f64, + reference: self.0, + }) + } else if value > self.1 as usize { + Some(Failure { + stat_name: format!("{} {}", name1, name2), + mode: FailureMode::ExceededMaximum, + value: value as f64, + reference: self.1, + }) + } else { + None + } + } + + fn assert_f64(&self, value: f64, name1: &str, name2: &str) -> Option { + if value < self.0 { + Some(Failure { + stat_name: format!("{} {}", name1, name2), + mode: FailureMode::ExceededMinimum, + value, + reference: self.0, + }) + } else if value > self.1 { + Some(Failure { + stat_name: format!("{} {}", name1, name2), + mode: FailureMode::ExceededMaximum, + value, + reference: self.1, + }) + } else { + None + } + } } -#[tokio::test] -async fn defers_at_high_concurrency() { - let results = run_test( - 500, - None, - TestParams { - delay: Duration::from_millis(100), - concurrency_defer: 5, - ..Default::default() - }, - ) - .await; - - // With a constant time link that gives deferrals over a certain - // concurrency, the limiter will ramp up to that concurrency and - // then drop down repeatedly. Note that, due to the timing of the - // adjustment, this may actually occasionally go over the error - // limit above, but it will be rare. - let in_flight = results.stats.in_flight.stats().unwrap(); - assert_within!(in_flight.max, 4, 6, "{:#?}", results); - // Since the concurrency will drop down by half each time, the - // average will be below this maximum. - assert_within!(in_flight.mode, 2, 4, "{:#?}", results); - assert_within!(in_flight.mean, 2.0, 4.0, "{:#?}", results); - - let observed_rtt = results.cstats.observed_rtt.stats().unwrap(); - assert_within!(observed_rtt.min, 0.090, 0.120, "{:#?}", results); - assert_within!(observed_rtt.max, 0.090, 0.130, "{:#?}", results); - assert_within!(observed_rtt.mean, 0.090, 0.120, "{:#?}", results); - let averaged_rtt = results.cstats.averaged_rtt.stats().unwrap(); - assert_within!(averaged_rtt.min, 0.090, 0.120, "{:#?}", results); - assert_within!(averaged_rtt.max, 0.090, 0.130, "{:#?}", results); - assert_within!(averaged_rtt.mean, 0.090, 0.120, "{:#?}", results); - let concurrency_limit = results.cstats.concurrency_limit.stats().unwrap(); - assert_within!(concurrency_limit.max, 5, 6, "{:#?}", results); - assert_within!(concurrency_limit.mode, 2, 5, "{:#?}", results); - assert_within!(concurrency_limit.mean, 2.0, 4.9, "{:#?}", results); - let c_in_flight = results.cstats.in_flight.stats().unwrap(); - assert_within!(c_in_flight.max, 5, 6, "{:#?}", results); - assert_within!(c_in_flight.mode, 2, 4, "{:#?}", results); - assert_within!(c_in_flight.mean, 2.0, 4.0, "{:#?}", results); +#[derive(Clone, Copy, Debug, Deserialize)] +struct ResultTest { + min: Option, + max: Option, + mode: Option, + mean: Option, } -#[tokio::test] -async fn drops_at_high_concurrency() { - let results = run_test( - 500, - None, - TestParams { - delay: Duration::from_millis(100), - concurrency_drop: 5, - ..Default::default() - }, - ) - .await; - - // Since our internal framework doesn't track the "dropped" - // requests, the values won't be representative of the actual number - // of requests in flight (tracked below in the internal stats). - let in_flight = results.stats.in_flight.stats().unwrap(); - assert_within!(in_flight.max, 4, 5, "{:#?}", results); - assert_within!(in_flight.mode, 3, 4, "{:#?}", results); - assert_within!(in_flight.mean, 1.5, 3.5, "{:#?}", results); - - let observed_rtt = results.cstats.observed_rtt.stats().unwrap(); - assert_within!(observed_rtt.min, 0.090, 0.125, "{:#?}", results); - assert_within!(observed_rtt.max, 0.090, 0.125, "{:#?}", results); - assert_within!(observed_rtt.mean, 0.090, 0.125, "{:#?}", results); - let averaged_rtt = results.cstats.averaged_rtt.stats().unwrap(); - assert_within!(averaged_rtt.min, 0.090, 0.125, "{:#?}", results); - assert_within!(averaged_rtt.max, 0.090, 0.125, "{:#?}", results); - assert_within!(averaged_rtt.mean, 0.090, 0.125, "{:#?}", results); - let concurrency_limit = results.cstats.concurrency_limit.stats().unwrap(); - assert_within!(concurrency_limit.max, 8, 15, "{:#?}", results); - assert_within!(concurrency_limit.mode, 5, 10, "{:#?}", results); - assert_within!(concurrency_limit.mean, 5.0, 10.0, "{:#?}", results); - let c_in_flight = results.cstats.in_flight.stats().unwrap(); - assert_within!(c_in_flight.max, 8, 15, "{:#?}", results); - assert_within!(c_in_flight.mode, 5, 10, "{:#?}", results); - assert_within!(c_in_flight.mean, 5.0, 10.0, "{:#?}", results); +impl ResultTest { + fn compare_histogram(&self, stat: HistogramStats, name: &str) -> Vec { + vec![ + self.min + .and_then(|range| range.assert_usize(stat.min, name, "min")), + self.max + .and_then(|range| range.assert_usize(stat.max, name, "max")), + self.mean + .and_then(|range| range.assert_f64(stat.mean, name, "mean")), + self.mode + .and_then(|range| range.assert_usize(stat.mode, name, "mode")), + ] + .into_iter() + .filter_map(|f| f) + .collect::>() + } + + fn compare_weighted_sum(&self, stat: WeightedSumStats, name: &str) -> Vec { + vec![ + self.min + .and_then(|range| range.assert_f64(stat.min, name, "min")), + self.max + .and_then(|range| range.assert_f64(stat.max, name, "max")), + self.mean + .and_then(|range| range.assert_f64(stat.mean, name, "mean")), + ] + .into_iter() + .filter_map(|f| f) + .collect::>() + } } -#[tokio::test] -async fn slow_link() { - let results = run_test( - 200, - None, - TestParams { - delay: Duration::from_millis(100), - concurrency_scale: 1.0, - ..Default::default() - }, - ) - .await; - - // With a link that slows down heavily as concurrency increases, the - // limiter will keep the concurrency low (timing skews occasionally - // has it reaching 3, but usually just 2), - let in_flight = results.stats.in_flight.stats().unwrap(); - assert_within!(in_flight.max, 1, 3, "{:#?}", results); - // and it will spend most of its time between 1 and 2. - assert_within!(in_flight.mode, 1, 2, "{:#?}", results); - assert_within!(in_flight.mean, 1.0, 2.0, "{:#?}", results); - - let observed_rtt = results.cstats.observed_rtt.stats().unwrap(); - assert_within!(observed_rtt.min, 0.090, 0.120, "{:#?}", results); - assert_within!(observed_rtt.mean, 0.090, 0.310, "{:#?}", results); - let averaged_rtt = results.cstats.averaged_rtt.stats().unwrap(); - assert_within!(averaged_rtt.min, 0.090, 0.120, "{:#?}", results); - assert_within!(averaged_rtt.mean, 0.090, 0.310, "{:#?}", results); - let concurrency_limit = results.cstats.concurrency_limit.stats().unwrap(); - assert_within!(concurrency_limit.mode, 1, 3, "{:#?}", results); - assert_within!(concurrency_limit.mean, 1.0, 2.0, "{:#?}", results); - let c_in_flight = results.cstats.in_flight.stats().unwrap(); - assert_within!(c_in_flight.max, 1, 3, "{:#?}", results); - assert_within!(c_in_flight.mode, 1, 2, "{:#?}", results); - assert_within!(c_in_flight.mean, 1.0, 2.0, "{:#?}", results); +#[derive(Debug, Deserialize)] +struct ControllerResults { + observed_rtt: Option, + averaged_rtt: Option, + concurrency_limit: Option, + in_flight: Option, } -#[tokio::test] -async fn slow_send_1() { - let results = run_test( - 100, - Some(0.100), - TestParams { - delay: Duration::from_millis(50), - ..Default::default() - }, - ) - .await; - - // With a generator running slower than the link can process, the - // limiter will never raise the concurrency above 1. - let in_flight = results.stats.in_flight.stats().unwrap(); - assert_eq!(in_flight.max, 1, "{:#?}", results); - assert_eq!(in_flight.mode, 1, "{:#?}", results); - assert_within!(in_flight.mean, 0.5, 1.0, "{:#?}", results); - - let observed_rtt = results.cstats.observed_rtt.stats().unwrap(); - assert_within!(observed_rtt.min, 0.045, 0.060, "{:#?}", results); - assert_within!(observed_rtt.mean, 0.045, 0.060, "{:#?}", results); - let averaged_rtt = results.cstats.averaged_rtt.stats().unwrap(); - assert_within!(averaged_rtt.min, 0.045, 0.060, "{:#?}", results); - assert_within!(averaged_rtt.mean, 0.045, 0.060, "{:#?}", results); - let concurrency_limit = results.cstats.concurrency_limit.stats().unwrap(); - assert_eq!(concurrency_limit.mode, 1, "{:#?}", results); - assert_eq!(concurrency_limit.mean, 1.0, "{:#?}", results); - let c_in_flight = results.cstats.in_flight.stats().unwrap(); - assert_eq!(c_in_flight.max, 1, "{:#?}", results); - assert_eq!(c_in_flight.mode, 1, "{:#?}", results); - assert_within!(c_in_flight.mean, 0.5, 1.0, "{:#?}", results); +#[derive(Debug, Deserialize)] +struct StatsResults { + in_flight: Option, } -#[tokio::test] -async fn slow_send_2() { - let results = run_test( - 100, - Some(0.050), - TestParams { - delay: Duration::from_millis(50), - ..Default::default() - }, - ) - .await; - - // With a generator running at the same speed as the link RTT, the - // limiter will keep the limit around 1-2 depending on timing jitter. - let in_flight = results.stats.in_flight.stats().unwrap(); - assert_within!(in_flight.max, 1, 3, "{:#?}", results); - assert_within!(in_flight.mode, 1, 2, "{:#?}", results); - assert_within!(in_flight.mean, 0.5, 2.0, "{:#?}", results); - - let observed_rtt = results.cstats.observed_rtt.stats().unwrap(); - assert_within!(observed_rtt.min, 0.045, 0.060, "{:#?}", results); - assert_within!(observed_rtt.mean, 0.045, 0.110, "{:#?}", results); - let averaged_rtt = results.cstats.averaged_rtt.stats().unwrap(); - assert_within!(averaged_rtt.min, 0.045, 0.060, "{:#?}", results); - assert_within!(averaged_rtt.mean, 0.045, 0.110, "{:#?}", results); - let concurrency_limit = results.cstats.concurrency_limit.stats().unwrap(); - assert_within!(concurrency_limit.mode, 1, 2, "{:#?}", results); - assert_within!(concurrency_limit.mean, 1.0, 2.0, "{:#?}", results); - let c_in_flight = results.cstats.in_flight.stats().unwrap(); - assert_within!(c_in_flight.max, 1, 3, "{:#?}", results); - assert_within!(c_in_flight.mode, 1, 2, "{:#?}", results); - assert_within!(c_in_flight.mean, 1.0, 2.0, "{:#?}", results); +#[derive(Debug, Deserialize)] +struct TestInput { + params: TestParams, + stats: StatsResults, + controller: ControllerResults, } -#[tokio::test] -async fn medium_send() { - let results = run_test( - 500, - Some(0.025), - TestParams { - delay: Duration::from_millis(100), - ..Default::default() - }, - ) - .await; - - let in_flight = results.stats.in_flight.stats().unwrap(); - // With a generator running at four times the speed as the link RTT, - // the limiter will keep around 4-5 requests in flight depending on - // timing jitter. - assert_within!(in_flight.mode, 4, 5, "{:#?}", results); - assert_within!(in_flight.mean, 4.0, 6.0, "{:#?}", results); - - let observed_rtt = results.cstats.observed_rtt.stats().unwrap(); - assert_within!(observed_rtt.min, 0.090, 0.120, "{:#?}", results); - assert_within!(observed_rtt.mean, 0.090, 0.120, "{:#?}", results); - let averaged_rtt = results.cstats.averaged_rtt.stats().unwrap(); - assert_within!(averaged_rtt.min, 0.090, 0.120, "{:#?}", results); - assert_within!(averaged_rtt.mean, 0.090, 0.500, "{:#?}", results); - let concurrency_limit = results.cstats.concurrency_limit.stats().unwrap(); - assert_within!(concurrency_limit.max, 4, MAX_CONCURRENCY, "{:#?}", results); - let c_in_flight = results.cstats.in_flight.stats().unwrap(); - assert_within!(c_in_flight.max, 4, MAX_CONCURRENCY, "{:#?}", results); - assert_within!(c_in_flight.mode, 4, 5, "{:#?}", results); - assert_within!(c_in_flight.mean, 4.0, 5.0, "{:#?}", results); +async fn run_compare(file_path: PathBuf, input: TestInput) { + eprintln!("Running test in {:?}", file_path); + + let results = run_test(input.params).await; + + let mut failures = Vec::new(); + + if let Some(test) = input.stats.in_flight { + let in_flight = results.stats.in_flight.stats().unwrap(); + failures.extend(test.compare_histogram(in_flight, "stats in_flight")); + } + + if let Some(test) = input.controller.in_flight { + let in_flight = results.cstats.in_flight.stats().unwrap(); + failures.extend(test.compare_histogram(in_flight, "controller in_flight")); + } + + if let Some(test) = input.controller.concurrency_limit { + let concurrency_limit = results.cstats.concurrency_limit.stats().unwrap(); + failures.extend(test.compare_histogram(concurrency_limit, "controller concurrency_limit")); + } + + if let Some(test) = input.controller.observed_rtt { + let observed_rtt = results.cstats.observed_rtt.stats().unwrap(); + failures.extend(test.compare_weighted_sum(observed_rtt, "controller observed_rtt")); + } + + if let Some(test) = input.controller.averaged_rtt { + let averaged_rtt = results.cstats.averaged_rtt.stats().unwrap(); + failures.extend(test.compare_weighted_sum(averaged_rtt, "controller averaged_rtt")); + } + + for failure in &failures { + let mode = match failure.mode { + FailureMode::ExceededMinimum => "minimum", + FailureMode::ExceededMaximum => "maximum", + }; + eprintln!( + "Comparison failed: {} = {}; {} = {}", + failure.stat_name, failure.value, mode, failure.reference + ); + } + assert!(failures.is_empty(), "{:#?}", results); } #[tokio::test] -async fn jittery_link_small() { - let results = run_test( - 500, - None, - TestParams { - delay: Duration::from_millis(100), - jitter: 0.1, - ..Default::default() - }, - ) - .await; - - // Jitter can cause concurrency management to vary widely, though it - // will typically reach the maximum of 10 in flight. - let in_flight = results.stats.in_flight.stats().unwrap(); - assert_within!(in_flight.max, 15, 30, "{:#?}", results); - assert_within!(in_flight.mean, 4.0, 20.0, "{:#?}", results); - - let observed_rtt = results.cstats.observed_rtt.stats().unwrap(); - assert_within!(observed_rtt.mean, 0.090, 0.130, "{:#?}", results); - let averaged_rtt = results.cstats.averaged_rtt.stats().unwrap(); - assert_within!(averaged_rtt.mean, 0.090, 0.130, "{:#?}", results); - let concurrency_limit = results.cstats.concurrency_limit.stats().unwrap(); - assert_within!(concurrency_limit.max, 10, 30, "{:#?}", results); - assert_within!(concurrency_limit.mean, 6.0, 20.0, "{:#?}", results); - let c_in_flight = results.cstats.in_flight.stats().unwrap(); - assert_within!(c_in_flight.max, 15, 30, "{:#?}", results); - assert_within!(c_in_flight.mean, 6.0, 20.0, "{:#?}", results); +async fn all_tests() { + const PATH: &str = "tests/data/auto-concurrency"; + + // Read and parse everything first + let mut entries = read_dir(PATH) + .expect("Could not open data directory") + .map(|entry| entry.expect("Could not read data directory").path()) + .filter_map(|file_path| { + if (file_path.extension().map(|ext| ext == "toml")).unwrap_or(false) { + let mut data = String::new(); + File::open(&file_path) + .unwrap() + .read_to_string(&mut data) + .unwrap(); + let input: TestInput = toml::from_str(&data) + .unwrap_or_else(|error| panic!("Invalid TOML in {:?}: {:?}", file_path, error)); + Some((file_path, input)) + } else { + None + } + }) + .collect::>(); + + entries.sort_unstable_by_key(|entry| entry.0.to_string_lossy().to_string()); + + time::pause(); + + // Then run all the tests + for (file_path, input) in entries { + run_compare(file_path, input).await; + } } diff --git a/src/sinks/util/encoding/mod.rs b/src/sinks/util/encoding/mod.rs index cd4eb77d8a92c9..f9ded879f11e1d 100644 --- a/src/sinks/util/encoding/mod.rs +++ b/src/sinks/util/encoding/mod.rs @@ -156,7 +156,8 @@ pub enum TimestampFormat { #[cfg(test)] mod tests { use super::*; - use crate::event; + use crate::config::log_schema; + #[derive(Deserialize, Serialize, Debug, Eq, PartialEq, Clone)] enum TestEncoding { Snoot, @@ -282,7 +283,7 @@ mod tests { let mut event = Event::from("Demo"); let timestamp = event .as_mut_log() - .get(&event::log_schema().timestamp_key()) + .get(&log_schema().timestamp_key()) .unwrap() .clone(); let timestamp = timestamp.as_timestamp().unwrap(); @@ -294,7 +295,7 @@ mod tests { match event .as_mut_log() - .get(&event::log_schema().timestamp_key()) + .get(&log_schema().timestamp_key()) .unwrap() { Value::Integer(_) => {} diff --git a/src/sinks/util/mod.rs b/src/sinks/util/mod.rs index 0fbc0ff37163a3..deb6a42ecfb925 100644 --- a/src/sinks/util/mod.rs +++ b/src/sinks/util/mod.rs @@ -16,7 +16,7 @@ pub mod udp; pub mod unix; pub mod uri; -use crate::event::{self, Event}; +use crate::event::Event; use bytes::Bytes; use encoding::{EncodingConfig, EncodingConfiguration}; use serde::{Deserialize, Serialize}; @@ -67,7 +67,7 @@ pub fn encode_event(mut event: Event, encoding: &EncodingConfig) -> Op Encoding::Json => serde_json::to_vec(&log), Encoding::Text => { let bytes = log - .get(&event::log_schema().message_key()) + .get(&crate::config::log_schema().message_key()) .map(|v| v.as_bytes().to_vec()) .unwrap_or_default(); Ok(bytes) diff --git a/src/sources/docker.rs b/src/sources/docker.rs index c257edd3fda06a..a3bfabb4388847 100644 --- a/src/sources/docker.rs +++ b/src/sources/docker.rs @@ -1,6 +1,6 @@ use super::util::MultilineConfig; use crate::{ - config::{DataType, GlobalOptions, SourceConfig, SourceDescription}, + config::{log_schema, DataType, GlobalOptions, SourceConfig, SourceDescription}, event::merge_state::LogEventMergeState, event::{self, Event, LogEvent, Value}, internal_events::{ @@ -820,17 +820,17 @@ impl ContainerLogInfo { let mut log_event = LogEvent::default(); // Source type - log_event.insert(event::log_schema().source_type_key(), Bytes::from("docker")); + log_event.insert(log_schema().source_type_key(), Bytes::from("docker")); // The log message. - log_event.insert(event::log_schema().message_key().clone(), bytes_message); + log_event.insert(log_schema().message_key(), bytes_message); // Stream we got the message from. log_event.insert(STREAM.clone(), stream); // Timestamp of the event. if let Some(timestamp) = timestamp { - log_event.insert(event::log_schema().timestamp_key().clone(), timestamp); + log_event.insert(log_schema().timestamp_key(), timestamp); } // Container ID. @@ -868,10 +868,8 @@ impl ContainerLogInfo { // Otherwise, create a new partial event merge state with the // current message being the initial one. if let Some(partial_event_merge_state) = partial_event_merge_state { - partial_event_merge_state.merge_in_next_event( - log_event, - &[event::log_schema().message_key().clone()], - ); + partial_event_merge_state + .merge_in_next_event(log_event, &[log_schema().message_key().clone()]); } else { *partial_event_merge_state = Some(LogEventMergeState::new(log_event)); }; @@ -884,7 +882,7 @@ impl ContainerLogInfo { // Otherwise it's just a regular event that we return as-is. match partial_event_merge_state.take() { Some(partial_event_merge_state) => partial_event_merge_state - .merge_in_final_event(log_event, &[event::log_schema().message_key().clone()]), + .merge_in_final_event(log_event, &[log_schema().message_key().clone()]), None => log_event, } } else { @@ -974,7 +972,7 @@ fn line_agg_adapter( let mut log_event = event.into_log(); let message_value = log_event - .remove(event::log_schema().message_key()) + .remove(log_schema().message_key()) .expect("message must exist in the event"); let stream_value = log_event .get(&STREAM) @@ -986,7 +984,7 @@ fn line_agg_adapter( }); let line_agg_out = LineAgg::<_, Bytes, LogEvent>::new(line_agg_in, logic); line_agg_out.map(|(_, message, mut log_event)| { - log_event.insert(event::log_schema().message_key(), message); + log_event.insert(log_schema().message_key(), message); Event::Log(log_event) }) } @@ -1223,7 +1221,7 @@ mod tests { // Wait for before message let events = collect_n(out, 1).await.unwrap(); assert_eq!( - events[0].as_log()[&event::log_schema().message_key()], + events[0].as_log()[&log_schema().message_key()], "before".into() ); @@ -1253,14 +1251,14 @@ mod tests { container_remove(&id, &docker).await; let log = events[0].as_log(); - assert_eq!(log[&event::log_schema().message_key()], message.into()); + assert_eq!(log[&log_schema().message_key()], message.into()); assert_eq!(log[&super::CONTAINER], id.into()); assert!(log.get(&super::CREATED_AT).is_some()); assert_eq!(log[&super::IMAGE], "busybox".into()); assert!(log.get(&format!("label.{}", label).into()).is_some()); assert_eq!(events[0].as_log()[&super::NAME], name.into()); assert_eq!( - events[0].as_log()[event::log_schema().source_type_key()], + events[0].as_log()[log_schema().source_type_key()], "docker".into() ); } @@ -1281,11 +1279,11 @@ mod tests { container_remove(&id, &docker).await; assert_eq!( - events[0].as_log()[&event::log_schema().message_key()], + events[0].as_log()[&log_schema().message_key()], message.into() ); assert_eq!( - events[1].as_log()[&event::log_schema().message_key()], + events[1].as_log()[&log_schema().message_key()], message.into() ); } @@ -1309,7 +1307,7 @@ mod tests { container_remove(&id1, &docker).await; assert_eq!( - events[0].as_log()[&event::log_schema().message_key()], + events[0].as_log()[&log_schema().message_key()], message.into() ); } @@ -1334,7 +1332,7 @@ mod tests { container_remove(&id1, &docker).await; assert_eq!( - events[0].as_log()[&event::log_schema().message_key()], + events[0].as_log()[&log_schema().message_key()], message.into() ); } @@ -1356,14 +1354,14 @@ mod tests { container_remove(&id, &docker).await; let log = events[0].as_log(); - assert_eq!(log[&event::log_schema().message_key()], message.into()); + assert_eq!(log[&log_schema().message_key()], message.into()); assert_eq!(log[&super::CONTAINER], id.into()); assert!(log.get(&super::CREATED_AT).is_some()); assert_eq!(log[&super::IMAGE], "busybox".into()); assert!(log.get(&format!("label.{}", label).into()).is_some()); assert_eq!(events[0].as_log()[&super::NAME], name.into()); assert_eq!( - events[0].as_log()[event::log_schema().source_type_key()], + events[0].as_log()[log_schema().source_type_key()], "docker".into() ); } @@ -1389,7 +1387,7 @@ mod tests { container_remove(&id, &docker).await; assert_eq!( - events[0].as_log()[&event::log_schema().message_key()], + events[0].as_log()[&log_schema().message_key()], message.into() ); } @@ -1463,7 +1461,7 @@ mod tests { container_remove(&id, &docker).await; let log = events[0].as_log(); - assert_eq!(log[&event::log_schema().message_key()], message.into()); + assert_eq!(log[&log_schema().message_key()], message.into()); } #[tokio::test] @@ -1516,7 +1514,7 @@ mod tests { .map(|event| { event .into_log() - .remove(event::log_schema().message_key()) + .remove(crate::config::log_schema().message_key()) .unwrap() .to_string_lossy() }) diff --git a/src/sources/file.rs b/src/sources/file.rs index f9c8649322cd44..003b19714b8d87 100644 --- a/src/sources/file.rs +++ b/src/sources/file.rs @@ -1,7 +1,7 @@ use super::util::MultilineConfig; use crate::{ - config::{DataType, GlobalOptions, SourceConfig, SourceDescription}, - event::{self, Event}, + config::{log_schema, DataType, GlobalOptions, SourceConfig, SourceDescription}, + event::Event, internal_events::{FileEventReceived, FileSourceInternalEventsEmitter}, line_agg::{self, LineAgg}, shutdown::ShutdownSignal, @@ -205,7 +205,7 @@ pub fn file_source( let host_key = config .host_key .clone() - .unwrap_or_else(|| event::log_schema().host_key().to_string()); + .unwrap_or_else(|| log_schema().host_key().to_string()); let hostname = hostname::get_hostname(); let include = config.include.clone(); @@ -297,7 +297,7 @@ fn create_event( // Add source type event .as_mut_log() - .insert(event::log_schema().source_type_key(), Bytes::from("file")); + .insert(log_schema().source_type_key(), Bytes::from("file")); if let Some(file_key) = &file_key { event.as_mut_log().insert(file_key.clone(), file); @@ -313,9 +313,7 @@ fn create_event( #[cfg(test)] mod tests { use super::*; - use crate::{ - config::Config, event, shutdown::ShutdownSignal, sources::file, test_util::trace_init, - }; + use crate::{config::Config, shutdown::ShutdownSignal, sources::file, test_util::trace_init}; use futures01::Stream; use pretty_assertions::assert_eq; use std::{ @@ -433,11 +431,8 @@ mod tests { assert_eq!(log[&"file".into()], "some_file.rs".into()); assert_eq!(log[&"host".into()], "Some.Machine".into()); - assert_eq!( - log[&event::log_schema().message_key()], - "hello world".into() - ); - assert_eq!(log[event::log_schema().source_type_key()], "file".into()); + assert_eq!(log[&log_schema().message_key()], "hello world".into()); + assert_eq!(log[log_schema().source_type_key()], "file".into()); } #[tokio::test] @@ -477,7 +472,7 @@ mod tests { let mut goodbye_i = 0; for event in received { - let line = event.as_log()[&event::log_schema().message_key()].to_string_lossy(); + let line = event.as_log()[&log_schema().message_key()].to_string_lossy(); if line.starts_with("hello") { assert_eq!(line, format!("hello {}", hello_i)); assert_eq!( @@ -547,7 +542,7 @@ mod tests { path.to_str().unwrap() ); - let line = event.as_log()[&event::log_schema().message_key()].to_string_lossy(); + let line = event.as_log()[&log_schema().message_key()].to_string_lossy(); if pre_trunc { assert_eq!(line, format!("pretrunc {}", i)); @@ -613,7 +608,7 @@ mod tests { path.to_str().unwrap() ); - let line = event.as_log()[&event::log_schema().message_key()].to_string_lossy(); + let line = event.as_log()[&log_schema().message_key()].to_string_lossy(); if pre_rot { assert_eq!(line, format!("prerot {}", i)); @@ -672,7 +667,7 @@ mod tests { let mut is = [0; 3]; for event in received { - let line = event.as_log()[&event::log_schema().message_key()].to_string_lossy(); + let line = event.as_log()[&log_schema().message_key()].to_string_lossy(); let mut split = line.split(' '); let file = split.next().unwrap().parse::().unwrap(); assert_ne!(file, 4); @@ -794,10 +789,10 @@ mod tests { assert_eq!( received.as_log().keys().collect::>(), vec![ - event::log_schema().host_key().to_string(), - event::log_schema().message_key().to_string(), - event::log_schema().timestamp_key().to_string(), - event::log_schema().source_type_key().to_string() + log_schema().host_key().to_string(), + log_schema().message_key().to_string(), + log_schema().timestamp_key().to_string(), + log_schema().source_type_key().to_string() ] .into_iter() .collect::>() @@ -835,7 +830,7 @@ mod tests { let received = wait_with_timeout(rx.collect().compat()).await; let lines = received .into_iter() - .map(|event| event.as_log()[&event::log_schema().message_key()].to_string_lossy()) + .map(|event| event.as_log()[&log_schema().message_key()].to_string_lossy()) .collect::>(); assert_eq!(lines, vec!["zeroth line", "first line"]); } @@ -856,7 +851,7 @@ mod tests { let received = wait_with_timeout(rx.collect().compat()).await; let lines = received .into_iter() - .map(|event| event.as_log()[&event::log_schema().message_key()].to_string_lossy()) + .map(|event| event.as_log()[&log_schema().message_key()].to_string_lossy()) .collect::>(); assert_eq!(lines, vec!["second line"]); } @@ -882,7 +877,7 @@ mod tests { let received = wait_with_timeout(rx.collect().compat()).await; let lines = received .into_iter() - .map(|event| event.as_log()[&event::log_schema().message_key()].to_string_lossy()) + .map(|event| event.as_log()[&log_schema().message_key()].to_string_lossy()) .collect::>(); assert_eq!( lines, @@ -919,7 +914,7 @@ mod tests { let received = wait_with_timeout(rx.collect().compat()).await; let lines = received .into_iter() - .map(|event| event.as_log()[&event::log_schema().message_key()].to_string_lossy()) + .map(|event| event.as_log()[&log_schema().message_key()].to_string_lossy()) .collect::>(); assert_eq!(lines, vec!["first line"]); } @@ -944,7 +939,7 @@ mod tests { let received = wait_with_timeout(rx.collect().compat()).await; let lines = received .into_iter() - .map(|event| event.as_log()[&event::log_schema().message_key()].to_string_lossy()) + .map(|event| event.as_log()[&log_schema().message_key()].to_string_lossy()) .collect::>(); assert_eq!(lines, vec!["second line"]); } @@ -1022,7 +1017,7 @@ mod tests { .to_string_lossy() .ends_with("before") }) - .map(|event| event.as_log()[&event::log_schema().message_key()].to_string_lossy()) + .map(|event| event.as_log()[&log_schema().message_key()].to_string_lossy()) .collect::>(); let after_lines = received .iter() @@ -1031,7 +1026,7 @@ mod tests { .to_string_lossy() .ends_with("after") }) - .map(|event| event.as_log()[&event::log_schema().message_key()].to_string_lossy()) + .map(|event| event.as_log()[&log_schema().message_key()].to_string_lossy()) .collect::>(); assert_eq!(before_lines, vec!["second line"]); assert_eq!(after_lines, vec!["_first line", "_second line"]); @@ -1080,7 +1075,7 @@ mod tests { rx.map(|event| { event .as_log() - .get(&event::log_schema().message_key()) + .get(&log_schema().message_key()) .unwrap() .clone() }) @@ -1142,7 +1137,7 @@ mod tests { rx.map(|event| { event .as_log() - .get(&event::log_schema().message_key()) + .get(&log_schema().message_key()) .unwrap() .clone() }) @@ -1217,7 +1212,7 @@ mod tests { rx.map(|event| { event .as_log() - .get(&event::log_schema().message_key()) + .get(&log_schema().message_key()) .unwrap() .clone() }) @@ -1284,7 +1279,7 @@ mod tests { rx.map(|event| { event .as_log() - .get(&event::log_schema().message_key()) + .get(&log_schema().message_key()) .unwrap() .clone() }) @@ -1349,7 +1344,7 @@ mod tests { rx.map(|event| { event .as_log() - .get(&event::log_schema().message_key()) + .get(&log_schema().message_key()) .unwrap() .clone() }) @@ -1393,7 +1388,7 @@ mod tests { rx.map(|event| { event .as_log() - .get(&event::log_schema().message_key()) + .get(&log_schema().message_key()) .unwrap() .clone() }) diff --git a/src/sources/generator.rs b/src/sources/generator.rs index 49bbf3be36a5f2..a30771e1928c6b 100644 --- a/src/sources/generator.rs +++ b/src/sources/generator.rs @@ -109,7 +109,7 @@ impl GeneratorConfig { #[cfg(test)] mod tests { use super::*; - use crate::{event, shutdown::ShutdownSignal, Pipeline}; + use crate::{config::log_schema, shutdown::ShutdownSignal, Pipeline}; use futures::compat::Future01CompatExt; use futures01::{stream::Stream, sync::mpsc, Async::*}; use std::time::{Duration, Instant}; @@ -127,7 +127,7 @@ mod tests { #[tokio::test] async fn copies_lines() { - let message_key = event::log_schema().message_key(); + let message_key = log_schema().message_key(); let mut rx = runit( r#"lines = ["one", "two"] count = 1"#, @@ -166,7 +166,7 @@ mod tests { #[tokio::test] async fn adds_sequence() { - let message_key = event::log_schema().message_key(); + let message_key = log_schema().message_key(); let mut rx = runit( r#"lines = ["one", "two"] count = 2 diff --git a/src/sources/http.rs b/src/sources/http.rs index 8e16ea0d139b79..d621c7d599a31a 100644 --- a/src/sources/http.rs +++ b/src/sources/http.rs @@ -1,6 +1,6 @@ use crate::{ - config::{DataType, GlobalOptions, SourceConfig, SourceDescription}, - event::{self, Event}, + config::{log_schema, DataType, GlobalOptions, SourceConfig, SourceDescription}, + event::Event, shutdown::ShutdownSignal, sources::util::{ErrorMessage, HttpSource}, tls::TlsConfig, @@ -52,7 +52,7 @@ impl HttpSource for SimpleHttpSource { .map(|events| add_headers(events, &self.headers, header_map)) .map(|mut events| { // Add source type - let key = event::log_schema().source_type_key(); + let key = log_schema().source_type_key(); for event in events.iter_mut() { event.as_mut_log().try_insert(key, Bytes::from("http")); } @@ -162,7 +162,7 @@ fn decode_body(body: Bytes, enc: Encoding) -> Result, ErrorMessage> { fn json_parse_object(value: JsonValue) -> Result { let mut event = Event::new_empty_log(); let log = event.as_mut_log(); - log.insert(event::log_schema().timestamp_key().clone(), Utc::now()); // Add timestamp + log.insert(log_schema().timestamp_key().clone(), Utc::now()); // Add timestamp match value { JsonValue::Object(map) => { for (k, v) in map { @@ -215,8 +215,8 @@ mod tests { use crate::shutdown::ShutdownSignal; use crate::{ - config::{GlobalOptions, SourceConfig}, - event::{self, Event}, + config::{log_schema, GlobalOptions, SourceConfig}, + event::Event, test_util::{collect_n, next_addr, trace_init, wait_for_tcp}, Pipeline, }; @@ -293,19 +293,16 @@ mod tests { { let event = events.remove(0); let log = event.as_log(); - assert_eq!(log[&event::log_schema().message_key()], "test body".into()); - assert!(log.get(&event::log_schema().timestamp_key()).is_some()); - assert_eq!(log[event::log_schema().source_type_key()], "http".into()); + assert_eq!(log[&log_schema().message_key()], "test body".into()); + assert!(log.get(&log_schema().timestamp_key()).is_some()); + assert_eq!(log[log_schema().source_type_key()], "http".into()); } { let event = events.remove(0); let log = event.as_log(); - assert_eq!( - log[&event::log_schema().message_key()], - "test body 2".into() - ); - assert!(log.get(&event::log_schema().timestamp_key()).is_some()); - assert_eq!(log[event::log_schema().source_type_key()], "http".into()); + assert_eq!(log[&log_schema().message_key()], "test body 2".into()); + assert!(log.get(&log_schema().timestamp_key()).is_some()); + assert_eq!(log[log_schema().source_type_key()], "http".into()); } } @@ -324,19 +321,16 @@ mod tests { { let event = events.remove(0); let log = event.as_log(); - assert_eq!(log[&event::log_schema().message_key()], "test body".into()); - assert!(log.get(&event::log_schema().timestamp_key()).is_some()); - assert_eq!(log[event::log_schema().source_type_key()], "http".into()); + assert_eq!(log[&log_schema().message_key()], "test body".into()); + assert!(log.get(&log_schema().timestamp_key()).is_some()); + assert_eq!(log[log_schema().source_type_key()], "http".into()); } { let event = events.remove(0); let log = event.as_log(); - assert_eq!( - log[&event::log_schema().message_key()], - "test body 2".into() - ); - assert!(log.get(&event::log_schema().timestamp_key()).is_some()); - assert_eq!(log[event::log_schema().source_type_key()], "http".into()); + assert_eq!(log[&log_schema().message_key()], "test body 2".into()); + assert!(log.get(&log_schema().timestamp_key()).is_some()); + assert_eq!(log[log_schema().source_type_key()], "http".into()); } } @@ -356,12 +350,12 @@ mod tests { assert!(events .remove(1) .as_log() - .get(&event::log_schema().timestamp_key()) + .get(&log_schema().timestamp_key()) .is_some()); assert!(events .remove(0) .as_log() - .get(&event::log_schema().timestamp_key()) + .get(&log_schema().timestamp_key()) .is_some()); } @@ -379,15 +373,15 @@ mod tests { let event = events.remove(0); let log = event.as_log(); assert_eq!(log[&Atom::from("key")], "value".into()); - assert!(log.get(&event::log_schema().timestamp_key()).is_some()); - assert_eq!(log[event::log_schema().source_type_key()], "http".into()); + assert!(log.get(&log_schema().timestamp_key()).is_some()); + assert_eq!(log[log_schema().source_type_key()], "http".into()); } { let event = events.remove(0); let log = event.as_log(); assert_eq!(log[&Atom::from("key2")], "value2".into()); - assert!(log.get(&event::log_schema().timestamp_key()).is_some()); - assert_eq!(log[event::log_schema().source_type_key()], "http".into()); + assert!(log.get(&log_schema().timestamp_key()).is_some()); + assert_eq!(log[log_schema().source_type_key()], "http".into()); } } @@ -409,15 +403,15 @@ mod tests { let event = events.remove(0); let log = event.as_log(); assert_eq!(log[&Atom::from("key1")], "value1".into()); - assert!(log.get(&event::log_schema().timestamp_key()).is_some()); - assert_eq!(log[event::log_schema().source_type_key()], "http".into()); + assert!(log.get(&log_schema().timestamp_key()).is_some()); + assert_eq!(log[log_schema().source_type_key()], "http".into()); } { let event = events.remove(0); let log = event.as_log(); assert_eq!(log[&Atom::from("key2")], "value2".into()); - assert!(log.get(&event::log_schema().timestamp_key()).is_some()); - assert_eq!(log[event::log_schema().source_type_key()], "http".into()); + assert!(log.get(&log_schema().timestamp_key()).is_some()); + assert_eq!(log[log_schema().source_type_key()], "http".into()); } } @@ -455,8 +449,8 @@ mod tests { "false".into() ); assert_eq!(log[&Atom::from("AbsentHeader")], "".into()); - assert!(log.get(&event::log_schema().timestamp_key()).is_some()); - assert_eq!(log[event::log_schema().source_type_key()], "http".into()); + assert!(log.get(&log_schema().timestamp_key()).is_some()); + assert_eq!(log[log_schema().source_type_key()], "http".into()); } } } diff --git a/src/sources/journald.rs b/src/sources/journald.rs index 952a79714f1b86..3571472877b489 100644 --- a/src/sources/journald.rs +++ b/src/sources/journald.rs @@ -1,6 +1,5 @@ use crate::{ - config::{DataType, GlobalOptions, SourceConfig, SourceDescription}, - event, + config::{log_schema, DataType, GlobalOptions, SourceConfig, SourceDescription}, event::{Event, LogEvent, Value}, internal_events::{JournaldEventReceived, JournaldInvalidRecord}, shutdown::ShutdownSignal, @@ -205,10 +204,10 @@ fn create_event(record: Record) -> Event { let mut log = LogEvent::from_iter(record); // Convert some journald-specific field names into Vector standard ones. if let Some(message) = log.remove(&MESSAGE) { - log.insert(event::log_schema().message_key().clone(), message); + log.insert(log_schema().message_key().clone(), message); } if let Some(host) = log.remove(&HOSTNAME) { - log.insert(event::log_schema().host_key().clone(), host); + log.insert(log_schema().host_key().clone(), host); } // Translate the timestamp, and so leave both old and new names. if let Some(timestamp) = log @@ -222,17 +221,14 @@ fn create_event(record: Record) -> Event { (timestamp % 1_000_000) as u32 * 1_000, ); log.insert( - event::log_schema().timestamp_key().clone(), + log_schema().timestamp_key().clone(), Value::Timestamp(timestamp), ); } } } // Add source type - log.try_insert( - event::log_schema().source_type_key(), - Bytes::from("journald"), - ); + log.try_insert(log_schema().source_type_key(), Bytes::from("journald")); log.into() } @@ -666,7 +662,7 @@ mod tests { Value::Bytes("System Initialization".into()) ); assert_eq!( - received[0].as_log()[event::log_schema().source_type_key()], + received[0].as_log()[log_schema().source_type_key()], "journald".into() ); assert_eq!(timestamp(&received[0]), value_ts(1578529839, 140001000)); @@ -748,11 +744,11 @@ mod tests { } fn message(event: &Event) -> Value { - event.as_log()[&event::log_schema().message_key()].clone() + event.as_log()[&log_schema().message_key()].clone() } fn timestamp(event: &Event) -> Value { - event.as_log()[&event::log_schema().timestamp_key()].clone() + event.as_log()[&log_schema().timestamp_key()].clone() } fn value_ts(secs: i64, usecs: u32) -> Value { diff --git a/src/sources/kafka.rs b/src/sources/kafka.rs index 226220de1e5fbb..ecbd274d7a9786 100644 --- a/src/sources/kafka.rs +++ b/src/sources/kafka.rs @@ -1,6 +1,6 @@ use crate::{ - config::{DataType, GlobalOptions, SourceConfig, SourceDescription}, - event::{self, Event}, + config::{log_schema, DataType, GlobalOptions, SourceConfig, SourceDescription}, + event::Event, internal_events::{KafkaEventFailed, KafkaEventReceived, KafkaOffsetUpdateFailed}, kafka::KafkaAuthConfig, shutdown::ShutdownSignal, @@ -131,7 +131,7 @@ fn kafka_source( let mut event = Event::new_empty_log(); let log = event.as_mut_log(); - log.insert(event::log_schema().message_key().clone(), payload.to_vec()); + log.insert(log_schema().message_key().clone(), payload.to_vec()); // Extract timestamp from kafka message let timestamp = msg @@ -139,10 +139,10 @@ fn kafka_source( .to_millis() .and_then(|millis| Utc.timestamp_millis_opt(millis).latest()) .unwrap_or_else(Utc::now); - log.insert(event::log_schema().timestamp_key().clone(), timestamp); + log.insert(log_schema().timestamp_key().clone(), timestamp); // Add source type - log.insert(event::log_schema().source_type_key(), Bytes::from("kafka")); + log.insert(log_schema().source_type_key(), Bytes::from("kafka")); if let Some(key_field) = &key_field { match msg.key() { @@ -257,9 +257,8 @@ mod test { #[cfg(feature = "kafka-integration-tests")] #[cfg(test)] mod integration_test { - use super::{kafka_source, KafkaSourceConfig}; + use super::*; use crate::{ - event, shutdown::ShutdownSignal, test_util::{collect_n, random_string}, Pipeline, @@ -333,7 +332,7 @@ mod integration_test { let events = collect_n(rx, 1).await.unwrap(); assert_eq!( - events[0].as_log()[&event::log_schema().message_key()], + events[0].as_log()[&log_schema().message_key()], "my message".into() ); assert_eq!( @@ -341,12 +340,9 @@ mod integration_test { "my key".into() ); assert_eq!( - events[0].as_log()[event::log_schema().source_type_key()], + events[0].as_log()[log_schema().source_type_key()], "kafka".into() ); - assert_eq!( - events[0].as_log()[event::log_schema().timestamp_key()], - now.into() - ); + assert_eq!(events[0].as_log()[log_schema().timestamp_key()], now.into()); } } diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index d32592d5b8c9d4..f671e4cd177e67 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -5,7 +5,7 @@ #![deny(missing_docs)] -use crate::event::{self, Event}; +use crate::event::Event; use crate::internal_events::{ FileSourceInternalEventsEmitter, KubernetesLogsEventAnnotationFailed, KubernetesLogsEventReceived, @@ -289,7 +289,7 @@ fn create_event(line: Bytes, file: &str) -> Event { // Add source type. event.as_mut_log().insert( - event::log_schema().source_type_key(), + crate::config::log_schema().source_type_key(), COMPONENT_NAME.to_owned(), ); diff --git a/src/sources/kubernetes_logs/parser/cri.rs b/src/sources/kubernetes_logs/parser/cri.rs index a28164a9bfe5c6..20f3e7510852f8 100644 --- a/src/sources/kubernetes_logs/parser/cri.rs +++ b/src/sources/kubernetes_logs/parser/cri.rs @@ -38,7 +38,7 @@ impl Cri { rp_config.patterns = vec![pattern.to_owned()]; rp_config.types.insert( - event::log_schema().timestamp_key().clone(), + crate::config::log_schema().timestamp_key().clone(), "timestamp|%+".to_owned(), ); diff --git a/src/sources/kubernetes_logs/parser/docker.rs b/src/sources/kubernetes_logs/parser/docker.rs index 2327ed584db3a0..6fdc5a8ae6f3fb 100644 --- a/src/sources/kubernetes_logs/parser/docker.rs +++ b/src/sources/kubernetes_logs/parser/docker.rs @@ -1,4 +1,5 @@ use crate::{ + config::log_schema, event::{self, Event, LogEvent, Value}, internal_events::KubernetesLogsDockerFormatParseFailed, transforms::Transform, @@ -34,7 +35,7 @@ impl Transform for Docker { /// Parses `message` as json object and removes it. fn parse_json(log: &mut LogEvent) -> Option<()> { - let to_parse = log.remove(&event::log_schema().message_key())?.as_bytes(); + let to_parse = log.remove(&log_schema().message_key())?.as_bytes(); match serde_json::from_slice(to_parse.as_ref()) { Ok(JsonValue::Object(object)) => { @@ -61,10 +62,7 @@ fn normalize_event(log: &mut LogEvent) -> Result<(), NormalizationError> { }; let time = DateTime::parse_from_rfc3339(String::from_utf8_lossy(time.as_ref()).as_ref()) .context(TimeParsing)?; - log.insert( - event::log_schema().timestamp_key(), - time.with_timezone(&Utc), - ); + log.insert(log_schema().timestamp_key(), time.with_timezone(&Utc)); // Parse message, remove trailing newline and detect if it's partial. let message = log.remove(&LOG).context(LogFieldMissing)?; @@ -88,7 +86,7 @@ fn normalize_event(log: &mut LogEvent) -> Result<(), NormalizationError> { message.truncate(message.len() - 1); is_partial = false; }; - log.insert(event::log_schema().message_key(), message); + log.insert(log_schema().message_key(), message); // For partial messages add a partial event indicator. if is_partial { @@ -109,9 +107,7 @@ enum NormalizationError { #[cfg(test)] pub mod tests { - use super::super::test_util; - use super::Docker; - use crate::event::LogEvent; + use super::{super::test_util, *}; fn make_long_string(base: &str, len: usize) -> String { base.chars().cycle().take(len).collect() diff --git a/src/sources/kubernetes_logs/parser/picker.rs b/src/sources/kubernetes_logs/parser/picker.rs index 168a2948375c63..c398d73ec7cbc6 100644 --- a/src/sources/kubernetes_logs/parser/picker.rs +++ b/src/sources/kubernetes_logs/parser/picker.rs @@ -1,6 +1,6 @@ use super::{cri::Cri, docker::Docker}; use crate::{ - event::{self, Event, Value}, + event::{Event, Value}, transforms::Transform, }; @@ -22,7 +22,7 @@ impl Transform for Picker { Picker::Init => { let message = event .as_log() - .get(event::log_schema().message_key()) + .get(crate::config::log_schema().message_key()) .expect("message key must be present"); let bytes = if let Value::Bytes(bytes) = message { bytes diff --git a/src/sources/kubernetes_logs/partial_events_merger.rs b/src/sources/kubernetes_logs/partial_events_merger.rs index 27e90bef39e986..5552378fcdee9c 100644 --- a/src/sources/kubernetes_logs/partial_events_merger.rs +++ b/src/sources/kubernetes_logs/partial_events_merger.rs @@ -14,7 +14,7 @@ pub fn build(enabled: bool) -> PartialEventsMerger { Some( MergeConfig { partial_event_marker_field: event::PARTIAL.clone(), - merge_fields: vec![event::log_schema().message_key().clone()], + merge_fields: vec![crate::config::log_schema().message_key().clone()], stream_discriminant_fields: vec![Atom::from(FILE_KEY)], } .into(), diff --git a/src/sources/logplex.rs b/src/sources/logplex.rs index a41a5684438f01..d5f0e124493725 100644 --- a/src/sources/logplex.rs +++ b/src/sources/logplex.rs @@ -1,6 +1,6 @@ use crate::{ - config::{DataType, GlobalOptions, SourceConfig}, - event::{self, Event}, + config::{log_schema, DataType, GlobalOptions, SourceConfig}, + event::Event, internal_events::{HerokuLogplexRequestReadError, HerokuLogplexRequestReceived}, shutdown::ShutdownSignal, sources::util::{ErrorMessage, HttpSource}, @@ -143,10 +143,10 @@ fn line_to_event(line: String) -> Event { let log = event.as_mut_log(); if let Ok(ts) = timestamp.parse::>() { - log.insert(event::log_schema().timestamp_key().clone(), ts); + log.insert(log_schema().timestamp_key().clone(), ts); } - log.insert(event::log_schema().host_key().clone(), hostname.to_owned()); + log.insert(log_schema().host_key().clone(), hostname.to_owned()); log.insert("app_name", app_name.to_owned()); log.insert("proc_id", proc_id.to_owned()); @@ -162,10 +162,9 @@ fn line_to_event(line: String) -> Event { }; // Add source type - event.as_mut_log().try_insert( - event::log_schema().source_type_key(), - Bytes::from("logplex"), - ); + event + .as_mut_log() + .try_insert(log_schema().source_type_key(), Bytes::from("logplex")); event } @@ -175,8 +174,8 @@ mod tests { use super::LogplexConfig; use crate::shutdown::ShutdownSignal; use crate::{ - config::{GlobalOptions, SourceConfig}, - event::{self, Event}, + config::{log_schema, GlobalOptions, SourceConfig}, + event::Event, test_util::{collect_n, next_addr, trace_init, wait_for_tcp}, Pipeline, }; @@ -237,18 +236,18 @@ mod tests { let log = event.as_log(); assert_eq!( - log[&event::log_schema().message_key()], + log[&log_schema().message_key()], r#"at=info method=GET path="/cart_link" host=lumberjack-store.timber.io request_id=05726858-c44e-4f94-9a20-37df73be9006 fwd="73.75.38.87" dyno=web.1 connect=1ms service=22ms status=304 bytes=656 protocol=http"#.into() ); assert_eq!( - log[&event::log_schema().timestamp_key()], + log[&log_schema().timestamp_key()], "2020-01-08T22:33:57.353034+00:00" .parse::>() .unwrap() .into() ); - assert_eq!(log[&event::log_schema().host_key()], "host".into()); - assert_eq!(log[event::log_schema().source_type_key()], "logplex".into()); + assert_eq!(log[&log_schema().host_key()], "host".into()); + assert_eq!(log[log_schema().source_type_key()], "logplex".into()); } #[test] @@ -257,19 +256,16 @@ mod tests { let event = super::line_to_event(body.into()); let log = event.as_log(); + assert_eq!(log[&log_schema().message_key()], "foo bar baz".into()); assert_eq!( - log[&event::log_schema().message_key()], - "foo bar baz".into() - ); - assert_eq!( - log[&event::log_schema().timestamp_key()], + log[&log_schema().timestamp_key()], "2020-01-08T22:33:57.353034+00:00" .parse::>() .unwrap() .into() ); - assert_eq!(log[&event::log_schema().host_key()], "host".into()); - assert_eq!(log[event::log_schema().source_type_key()], "logplex".into()); + assert_eq!(log[&log_schema().host_key()], "host".into()); + assert_eq!(log[log_schema().source_type_key()], "logplex".into()); } #[test] @@ -279,11 +275,11 @@ mod tests { let log = event.as_log(); assert_eq!( - log[&event::log_schema().message_key()], + log[&log_schema().message_key()], "what am i doing here".into() ); - assert!(log.get(&event::log_schema().timestamp_key()).is_some()); - assert_eq!(log[event::log_schema().source_type_key()], "logplex".into()); + assert!(log.get(&log_schema().timestamp_key()).is_some()); + assert_eq!(log[log_schema().source_type_key()], "logplex".into()); } #[test] @@ -292,18 +288,15 @@ mod tests { let event = super::line_to_event(body.into()); let log = event.as_log(); + assert_eq!(log[&log_schema().message_key()], "i'm not that long".into()); assert_eq!( - log[&event::log_schema().message_key()], - "i'm not that long".into() - ); - assert_eq!( - log[&event::log_schema().timestamp_key()], + log[&log_schema().timestamp_key()], "2020-01-08T22:33:57.353034+00:00" .parse::>() .unwrap() .into() ); - assert_eq!(log[&event::log_schema().host_key()], "host".into()); - assert_eq!(log[event::log_schema().source_type_key()], "logplex".into()); + assert_eq!(log[&log_schema().host_key()], "host".into()); + assert_eq!(log[log_schema().source_type_key()], "logplex".into()); } } diff --git a/src/sources/socket/mod.rs b/src/sources/socket/mod.rs index 733d8a510044b6..af09373ca5ed3f 100644 --- a/src/sources/socket/mod.rs +++ b/src/sources/socket/mod.rs @@ -5,8 +5,7 @@ mod unix; use super::util::TcpSource; use crate::{ - config::{DataType, GlobalOptions, SourceConfig, SourceDescription}, - event, + config::{log_schema, DataType, GlobalOptions, SourceConfig, SourceDescription}, shutdown::ShutdownSignal, tls::MaybeTlsSettings, Pipeline, @@ -93,7 +92,7 @@ impl SourceConfig for SocketConfig { let host_key = config .host_key .clone() - .unwrap_or_else(|| event::log_schema().host_key().clone()); + .unwrap_or_else(|| log_schema().host_key().clone()); Ok(udp::udp( config.address, config.max_length, @@ -107,7 +106,7 @@ impl SourceConfig for SocketConfig { let host_key = config .host_key .clone() - .unwrap_or_else(|| event::log_schema().host_key().to_string()); + .unwrap_or_else(|| log_schema().host_key().to_string()); Ok(unix::unix( config.path, config.max_length, @@ -132,9 +131,8 @@ impl SourceConfig for SocketConfig { mod test { use super::{tcp::TcpConfig, udp::UdpConfig, SocketConfig}; use crate::{ - config::{GlobalOptions, SourceConfig}, + config::{log_schema, GlobalOptions, SourceConfig}, dns::Resolver, - event, shutdown::{ShutdownSignal, SourceShutdownCoordinator}, sinks::util::tcp::TcpSink, test_util::{ @@ -192,10 +190,7 @@ mod test { .unwrap(); let event = rx.compat().next().await.unwrap().unwrap(); - assert_eq!( - event.as_log()[&event::log_schema().host_key()], - "127.0.0.1".into() - ); + assert_eq!(event.as_log()[&log_schema().host_key()], "127.0.0.1".into()); } #[tokio::test] @@ -221,7 +216,7 @@ mod test { let event = rx.compat().next().await.unwrap().unwrap(); assert_eq!( - event.as_log()[event::log_schema().source_type_key()], + event.as_log()[log_schema().source_type_key()], "socket".into() ); } @@ -256,14 +251,11 @@ mod test { send_lines(addr, lines.into_iter()).await.unwrap(); let event = rx.next().await.unwrap().unwrap(); - assert_eq!( - event.as_log()[&event::log_schema().message_key()], - "short".into() - ); + assert_eq!(event.as_log()[&log_schema().message_key()], "short".into()); let event = rx.next().await.unwrap().unwrap(); assert_eq!( - event.as_log()[&event::log_schema().message_key()], + event.as_log()[&log_schema().message_key()], "more short".into() ); } @@ -308,14 +300,11 @@ mod test { .unwrap(); let event = rx.next().await.unwrap().unwrap(); - assert_eq!( - event.as_log()[&event::log_schema().message_key()], - "short".into() - ); + assert_eq!(event.as_log()[&log_schema().message_key()], "short".into()); let event = rx.next().await.unwrap().unwrap(); assert_eq!( - event.as_log()[&event::log_schema().message_key()], + event.as_log()[&log_schema().message_key()], "more short".into() ); } @@ -366,13 +355,13 @@ mod test { let event = rx.next().await.unwrap().unwrap(); assert_eq!( - event.as_log()[&event::log_schema().message_key()], + event.as_log()[&crate::config::log_schema().message_key()], "short".into() ); let event = rx.next().await.unwrap().unwrap(); assert_eq!( - event.as_log()[&event::log_schema().message_key()], + event.as_log()[&crate::config::log_schema().message_key()], "more short".into() ); } @@ -400,10 +389,7 @@ mod test { .unwrap(); let event = rx.compat().next().await.unwrap().unwrap(); - assert_eq!( - event.as_log()[&event::log_schema().message_key()], - "test".into() - ); + assert_eq!(event.as_log()[&log_schema().message_key()], "test".into()); // Now signal to the Source to shut down. let deadline = Instant::now() + Duration::from_secs(10); @@ -461,7 +447,7 @@ mod test { assert_eq!(100, events.len()); for event in events { assert_eq!( - event.as_log()[&event::log_schema().message_key()], + event.as_log()[&log_schema().message_key()], message.clone().into() ); } @@ -550,7 +536,7 @@ mod test { let events = collect_n(rx, 1).await.unwrap(); assert_eq!( - events[0].as_log()[&event::log_schema().message_key()], + events[0].as_log()[&log_schema().message_key()], "test".into() ); } @@ -564,11 +550,11 @@ mod test { let events = collect_n(rx, 2).await.unwrap(); assert_eq!( - events[0].as_log()[&event::log_schema().message_key()], + events[0].as_log()[&log_schema().message_key()], "test".into() ); assert_eq!( - events[1].as_log()[&event::log_schema().message_key()], + events[1].as_log()[&log_schema().message_key()], "test2".into() ); } @@ -582,11 +568,11 @@ mod test { let events = collect_n(rx, 2).await.unwrap(); assert_eq!( - events[0].as_log()[&event::log_schema().message_key()], + events[0].as_log()[&log_schema().message_key()], "test".into() ); assert_eq!( - events[1].as_log()[&event::log_schema().message_key()], + events[1].as_log()[&log_schema().message_key()], "test2".into() ); } @@ -600,7 +586,7 @@ mod test { let events = collect_n(rx, 1).await.unwrap(); assert_eq!( - events[0].as_log()[&event::log_schema().host_key()], + events[0].as_log()[&log_schema().host_key()], format!("{}", from).into() ); } @@ -614,7 +600,7 @@ mod test { let events = collect_n(rx, 1).await.unwrap(); assert_eq!( - events[0].as_log()[event::log_schema().source_type_key()], + events[0].as_log()[log_schema().source_type_key()], "socket".into() ); } @@ -631,7 +617,7 @@ mod test { let events = collect_n(rx, 1).await.unwrap(); assert_eq!( - events[0].as_log()[&event::log_schema().message_key()], + events[0].as_log()[&log_schema().message_key()], "test".into() ); @@ -668,10 +654,7 @@ mod test { let events = collect_n(rx, 100).await.unwrap(); assert_eq!(100, events.len()); for event in events { - assert_eq!( - event.as_log()[&event::log_schema().message_key()], - "test".into() - ); + assert_eq!(event.as_log()[&log_schema().message_key()], "test".into()); } let deadline = Instant::now() + Duration::from_secs(10); @@ -736,11 +719,11 @@ mod test { assert_eq!(1, events.len()); assert_eq!( - events[0].as_log()[&event::log_schema().message_key()], + events[0].as_log()[&log_schema().message_key()], "test".into() ); assert_eq!( - events[0].as_log()[event::log_schema().source_type_key()], + events[0].as_log()[log_schema().source_type_key()], "socket".into() ); } @@ -756,11 +739,11 @@ mod test { assert_eq!(2, events.len()); assert_eq!( - events[0].as_log()[&event::log_schema().message_key()], + events[0].as_log()[&log_schema().message_key()], "test".into() ); assert_eq!( - events[1].as_log()[&event::log_schema().message_key()], + events[1].as_log()[&log_schema().message_key()], "test2".into() ); } @@ -776,11 +759,11 @@ mod test { assert_eq!(2, events.len()); assert_eq!( - events[0].as_log()[&event::log_schema().message_key()], + events[0].as_log()[&log_schema().message_key()], "test".into() ); assert_eq!( - events[1].as_log()[&event::log_schema().message_key()], + events[1].as_log()[&log_schema().message_key()], "test2".into() ); } diff --git a/src/sources/socket/tcp.rs b/src/sources/socket/tcp.rs index b1af3634b7478a..da0ec1dafb0b6c 100644 --- a/src/sources/socket/tcp.rs +++ b/src/sources/socket/tcp.rs @@ -1,5 +1,5 @@ use crate::{ - event::{self, Event}, + event::Event, internal_events::{SocketEventReceived, SocketMode}, sources::util::{SocketListenAddr, TcpSource}, tls::TlsConfig, @@ -58,11 +58,13 @@ impl TcpSource for RawTcpSource { let byte_size = frame.len(); let mut event = Event::from(frame); - event - .as_mut_log() - .insert(event::log_schema().source_type_key(), Bytes::from("socket")); + event.as_mut_log().insert( + crate::config::log_schema().source_type_key(), + Bytes::from("socket"), + ); - let host_key = (self.config.host_key.as_ref()).unwrap_or(&event::log_schema().host_key()); + let host_key = + (self.config.host_key.as_ref()).unwrap_or(&crate::config::log_schema().host_key()); event.as_mut_log().insert(host_key.clone(), host); diff --git a/src/sources/socket/udp.rs b/src/sources/socket/udp.rs index 5da70257deda60..4ef7267079efb7 100644 --- a/src/sources/socket/udp.rs +++ b/src/sources/socket/udp.rs @@ -1,5 +1,5 @@ use crate::{ - event::{self, Event}, + event::Event, internal_events::{SocketEventReceived, SocketMode, SocketReceiveError}, shutdown::ShutdownSignal, sources::Source, @@ -78,7 +78,7 @@ pub fn udp( event .as_mut_log() - .insert(event::log_schema().source_type_key(), Bytes::from("socket")); + .insert(crate::config::log_schema().source_type_key(), Bytes::from("socket")); event .as_mut_log() .insert(host_key.clone(), address.to_string()); diff --git a/src/sources/socket/unix.rs b/src/sources/socket/unix.rs index 1bde97269bc890..7b210eac5a39f4 100644 --- a/src/sources/socket/unix.rs +++ b/src/sources/socket/unix.rs @@ -1,5 +1,5 @@ use crate::{ - event::{self, Event}, + event::Event, internal_events::{SocketEventReceived, SocketMode}, shutdown::ShutdownSignal, sources::{util::build_unix_source, Source}, @@ -40,9 +40,10 @@ impl UnixConfig { fn build_event(host_key: &str, received_from: Option, line: &str) -> Option { let byte_size = line.len(); let mut event = Event::from(line); - event - .as_mut_log() - .insert(event::log_schema().source_type_key(), Bytes::from("socket")); + event.as_mut_log().insert( + crate::config::log_schema().source_type_key(), + Bytes::from("socket"), + ); if let Some(host) = received_from { event.as_mut_log().insert(host_key, host); } diff --git a/src/sources/splunk_hec.rs b/src/sources/splunk_hec.rs index a3baa1ec4156af..a3d996298a42d3 100644 --- a/src/sources/splunk_hec.rs +++ b/src/sources/splunk_hec.rs @@ -1,6 +1,6 @@ use crate::{ - config::{DataType, GlobalOptions, SourceConfig}, - event::{self, Event, LogEvent, Value}, + config::{log_schema, DataType, GlobalOptions, SourceConfig}, + event::{Event, LogEvent, Value}, internal_events::{ SplunkHECEventReceived, SplunkHECRequestBodyInvalid, SplunkHECRequestError, SplunkHECRequestReceived, @@ -348,7 +348,7 @@ impl EventStream { extractors: [ DefaultExtractor::new_with( "host", - &event::log_schema().host_key(), + &log_schema().host_key(), host.map(|value| value.into_bytes().into()), ), DefaultExtractor::new("index", &INDEX), @@ -400,10 +400,7 @@ impl Stream for EventStream { let log = event.as_mut_log(); // Add source type - log.insert( - event::log_schema().source_type_key(), - Bytes::from("splunk_hec"), - ); + log.insert(log_schema().source_type_key(), Bytes::from("splunk_hec")); // Process event field match json.get_mut("event") { @@ -412,7 +409,7 @@ impl Stream for EventStream { if string.is_empty() { return Err(ApiError::EmptyEventField { event: self.events }.into()); } - log.insert(event::log_schema().message_key().clone(), string); + log.insert(log_schema().message_key().clone(), string); } JsonValue::Object(mut object) => { if object.is_empty() { @@ -427,7 +424,7 @@ impl Stream for EventStream { log.insert("line", line); } _ => { - log.insert(event::log_schema().message_key(), line); + log.insert(log_schema().message_key(), line); } } } @@ -483,8 +480,8 @@ impl Stream for EventStream { // Add time field match self.time.clone() { - Time::Provided(time) => log.insert(event::log_schema().timestamp_key().clone(), time), - Time::Now(time) => log.insert(event::log_schema().timestamp_key().clone(), time), + Time::Provided(time) => log.insert(log_schema().timestamp_key().clone(), time), + Time::Now(time) => log.insert(log_schema().timestamp_key().clone(), time), }; // Extract default extracted fields @@ -608,24 +605,23 @@ fn raw_event( let log = event.as_mut_log(); // Add message - log.insert(event::log_schema().message_key().clone(), message); + log.insert(log_schema().message_key().clone(), message); // Add channel log.insert(CHANNEL.clone(), channel.into_bytes()); // Add host if let Some(host) = host { - log.insert(event::log_schema().host_key().clone(), host.into_bytes()); + log.insert(log_schema().host_key().clone(), host.into_bytes()); } // Add timestamp - log.insert(event::log_schema().timestamp_key().clone(), Utc::now()); + log.insert(log_schema().timestamp_key().clone(), Utc::now()); // Add source type - event.as_mut_log().try_insert( - event::log_schema().source_type_key(), - Bytes::from("splunk_hec"), - ); + event + .as_mut_log() + .try_insert(log_schema().source_type_key(), Bytes::from("splunk_hec")); emit!(SplunkHECEventReceived); @@ -764,8 +760,8 @@ fn event_error(text: &str, code: u16, event: usize) -> Response { mod tests { use super::{parse_timestamp, SplunkConfig}; use crate::{ - config::{GlobalOptions, SinkConfig, SinkContext, SourceConfig}, - event::{self, Event}, + config::{log_schema, GlobalOptions, SinkConfig, SinkContext, SourceConfig}, + event::Event, shutdown::ShutdownSignal, sinks::{ splunk_hec::{Encoding, HecSinkConfig}, @@ -883,16 +879,10 @@ mod tests { let event = channel_n(vec![message], sink, source).await.remove(0); + assert_eq!(event.as_log()[&log_schema().message_key()], message.into()); + assert!(event.as_log().get(&log_schema().timestamp_key()).is_some()); assert_eq!( - event.as_log()[&event::log_schema().message_key()], - message.into() - ); - assert!(event - .as_log() - .get(&event::log_schema().timestamp_key()) - .is_some()); - assert_eq!( - event.as_log()[event::log_schema().source_type_key()], + event.as_log()[log_schema().source_type_key()], "splunk_hec".into() ); } @@ -906,16 +896,10 @@ mod tests { let event = channel_n(vec![message], sink, source).await.remove(0); + assert_eq!(event.as_log()[&log_schema().message_key()], message.into()); + assert!(event.as_log().get(&log_schema().timestamp_key()).is_some()); assert_eq!( - event.as_log()[&event::log_schema().message_key()], - message.into() - ); - assert!(event - .as_log() - .get(&event::log_schema().timestamp_key()) - .is_some()); - assert_eq!( - event.as_log()[event::log_schema().source_type_key()], + event.as_log()[log_schema().source_type_key()], "splunk_hec".into() ); } @@ -933,16 +917,10 @@ mod tests { let events = channel_n(messages.clone(), sink, source).await; for (msg, event) in messages.into_iter().zip(events.into_iter()) { + assert_eq!(event.as_log()[&log_schema().message_key()], msg.into()); + assert!(event.as_log().get(&log_schema().timestamp_key()).is_some()); assert_eq!( - event.as_log()[&event::log_schema().message_key()], - msg.into() - ); - assert!(event - .as_log() - .get(&event::log_schema().timestamp_key()) - .is_some()); - assert_eq!( - event.as_log()[event::log_schema().source_type_key()], + event.as_log()[log_schema().source_type_key()], "splunk_hec".into() ); } @@ -957,16 +935,10 @@ mod tests { let event = channel_n(vec![message], sink, source).await.remove(0); + assert_eq!(event.as_log()[&log_schema().message_key()], message.into()); + assert!(event.as_log().get(&log_schema().timestamp_key()).is_some()); assert_eq!( - event.as_log()[&event::log_schema().message_key()], - message.into() - ); - assert!(event - .as_log() - .get(&event::log_schema().timestamp_key()) - .is_some()); - assert_eq!( - event.as_log()[event::log_schema().source_type_key()], + event.as_log()[log_schema().source_type_key()], "splunk_hec".into() ); } @@ -984,16 +956,10 @@ mod tests { let events = channel_n(messages.clone(), sink, source).await; for (msg, event) in messages.into_iter().zip(events.into_iter()) { + assert_eq!(event.as_log()[&log_schema().message_key()], msg.into()); + assert!(event.as_log().get(&log_schema().timestamp_key()).is_some()); assert_eq!( - event.as_log()[&event::log_schema().message_key()], - msg.into() - ); - assert!(event - .as_log() - .get(&event::log_schema().timestamp_key()) - .is_some()); - assert_eq!( - event.as_log()[event::log_schema().source_type_key()], + event.as_log()[log_schema().source_type_key()], "splunk_hec".into() ); } @@ -1013,12 +979,9 @@ mod tests { let event = collect_n(source, 1).await.unwrap().remove(0); assert_eq!(event.as_log()[&"greeting".into()], "hello".into()); assert_eq!(event.as_log()[&"name".into()], "bob".into()); - assert!(event - .as_log() - .get(&event::log_schema().timestamp_key()) - .is_some()); + assert!(event.as_log().get(&log_schema().timestamp_key()).is_some()); assert_eq!( - event.as_log()[event::log_schema().source_type_key()], + event.as_log()[log_schema().source_type_key()], "splunk_hec".into() ); } @@ -1034,10 +997,7 @@ mod tests { sink.run(stream::once(future::ok(event))).await.unwrap(); let event = collect_n(source, 1).await.unwrap().remove(0); - assert_eq!( - event.as_log()[&event::log_schema().message_key()], - "hello".into() - ); + assert_eq!(event.as_log()[&log_schema().message_key()], "hello".into()); } #[tokio::test] @@ -1050,17 +1010,11 @@ mod tests { assert_eq!(200, post(address, "services/collector/raw", message).await); let event = collect_n(source, 1).await.unwrap().remove(0); - assert_eq!( - event.as_log()[&event::log_schema().message_key()], - message.into() - ); + assert_eq!(event.as_log()[&log_schema().message_key()], message.into()); assert_eq!(event.as_log()[&super::CHANNEL], "guid".into()); - assert!(event - .as_log() - .get(&event::log_schema().timestamp_key()) - .is_some()); + assert!(event.as_log().get(&log_schema().timestamp_key()).is_some()); assert_eq!( - event.as_log()[event::log_schema().source_type_key()], + event.as_log()[log_schema().source_type_key()], "splunk_hec".into() ); } @@ -1097,10 +1051,7 @@ mod tests { let event = channel_n(vec![message], sink, source).await.remove(0); - assert_eq!( - event.as_log()[&event::log_schema().message_key()], - message.into() - ); + assert_eq!(event.as_log()[&log_schema().message_key()], message.into()); } #[tokio::test] @@ -1116,16 +1067,10 @@ mod tests { ); let event = collect_n(source, 1).await.unwrap().remove(0); + assert_eq!(event.as_log()[&log_schema().message_key()], "first".into()); + assert!(event.as_log().get(&log_schema().timestamp_key()).is_some()); assert_eq!( - event.as_log()[&event::log_schema().message_key()], - "first".into() - ); - assert!(event - .as_log() - .get(&event::log_schema().timestamp_key()) - .is_some()); - assert_eq!( - event.as_log()[event::log_schema().source_type_key()], + event.as_log()[log_schema().source_type_key()], "splunk_hec".into() ); } @@ -1145,19 +1090,19 @@ mod tests { let events = collect_n(source, 3).await.unwrap(); assert_eq!( - events[0].as_log()[&event::log_schema().message_key()], + events[0].as_log()[&log_schema().message_key()], "first".into() ); assert_eq!(events[0].as_log()[&super::SOURCE], "main".into()); assert_eq!( - events[1].as_log()[&event::log_schema().message_key()], + events[1].as_log()[&log_schema().message_key()], "second".into() ); assert_eq!(events[1].as_log()[&super::SOURCE], "main".into()); assert_eq!( - events[2].as_log()[&event::log_schema().message_key()], + events[2].as_log()[&log_schema().message_key()], "third".into() ); assert_eq!(events[2].as_log()[&super::SOURCE], "secondary".into()); diff --git a/src/sources/stdin.rs b/src/sources/stdin.rs index b72f2a12f94917..b3fb51208a5246 100644 --- a/src/sources/stdin.rs +++ b/src/sources/stdin.rs @@ -1,6 +1,6 @@ use crate::{ - config::{DataType, GlobalOptions, SourceConfig, SourceDescription}, - event::{self, Event}, + config::{log_schema, DataType, GlobalOptions, SourceConfig, SourceDescription}, + event::Event, internal_events::{StdinEventReceived, StdinReadFailed}, shutdown::ShutdownSignal, Pipeline, @@ -72,7 +72,7 @@ where { let host_key = config .host_key - .unwrap_or_else(|| event::log_schema().host_key().to_string()); + .unwrap_or_else(|| log_schema().host_key().to_string()); let hostname = hostname::get_hostname(); let (mut sender, receiver) = channel(1024); @@ -113,7 +113,7 @@ fn create_event(line: Bytes, host_key: &str, hostname: &Option) -> Event // Add source type event .as_mut_log() - .insert(event::log_schema().source_type_key(), Bytes::from("stdin")); + .insert(log_schema().source_type_key(), Bytes::from("stdin")); if let Some(hostname) = &hostname { event.as_mut_log().insert(host_key, hostname.clone()); @@ -125,7 +125,7 @@ fn create_event(line: Bytes, host_key: &str, hostname: &Option) -> Event #[cfg(test)] mod tests { use super::*; - use crate::{event, test_util::trace_init, Pipeline}; + use crate::{test_util::trace_init, Pipeline}; use futures::compat::Future01CompatExt; use futures01::{Async::*, Stream}; use std::io::Cursor; @@ -140,11 +140,8 @@ mod tests { let log = event.into_log(); assert_eq!(log[&"host".into()], "Some.Machine".into()); - assert_eq!( - log[&event::log_schema().message_key()], - "hello world".into() - ); - assert_eq!(log[event::log_schema().source_type_key()], "stdin".into()); + assert_eq!(log[&log_schema().message_key()], "hello world".into()); + assert_eq!(log[log_schema().source_type_key()], "stdin".into()); } #[tokio::test] @@ -167,7 +164,7 @@ mod tests { assert_eq!( Ready(Some("hello world".into())), event.map(|event| event - .map(|event| event.as_log()[&event::log_schema().message_key()].to_string_lossy())) + .map(|event| event.as_log()[&log_schema().message_key()].to_string_lossy())) ); let event = rx.poll().unwrap(); @@ -175,7 +172,7 @@ mod tests { assert_eq!( Ready(Some("hello world again".into())), event.map(|event| event - .map(|event| event.as_log()[&event::log_schema().message_key()].to_string_lossy())) + .map(|event| event.as_log()[&log_schema().message_key()].to_string_lossy())) ); let event = rx.poll().unwrap(); diff --git a/src/sources/syslog.rs b/src/sources/syslog.rs index c2ac06b42ba076..2413a66bf4f357 100644 --- a/src/sources/syslog.rs +++ b/src/sources/syslog.rs @@ -2,8 +2,8 @@ use super::util::{SocketListenAddr, TcpSource}; #[cfg(unix)] use crate::sources::util::build_unix_source; use crate::{ - config::{DataType, GlobalOptions, SourceConfig, SourceDescription}, - event::{self, Event, Value}, + config::{log_schema, DataType, GlobalOptions, SourceConfig, SourceDescription}, + event::{Event, Value}, internal_events::{SyslogEventReceived, SyslogUdpReadError, SyslogUdpUtf8Error}, shutdown::ShutdownSignal, tls::{MaybeTlsSettings, TlsConfig}, @@ -88,7 +88,7 @@ impl SourceConfig for SyslogConfig { let host_key = self .host_key .clone() - .unwrap_or_else(|| event::log_schema().host_key().to_string()); + .unwrap_or_else(|| log_schema().host_key().to_string()); match self.mode.clone() { Mode::Tcp { address, tls } => { @@ -324,7 +324,7 @@ fn event_from_str(host_key: &str, default_host: Option, line: &str) -> Op // Add source type event .as_mut_log() - .insert(event::log_schema().source_type_key(), Bytes::from("syslog")); + .insert(log_schema().source_type_key(), Bytes::from("syslog")); if let Some(default_host) = default_host.clone() { event.as_mut_log().insert("source_ip", default_host); @@ -341,7 +341,7 @@ fn event_from_str(host_key: &str, default_host: Option, line: &str) -> Op .unwrap_or_else(Utc::now); event .as_mut_log() - .insert(event::log_schema().timestamp_key().clone(), timestamp); + .insert(log_schema().timestamp_key().clone(), timestamp); insert_fields_from_syslog(&mut event, parsed); @@ -397,7 +397,7 @@ fn insert_fields_from_syslog(event: &mut Event, parsed: Message<&str>) { #[cfg(test)] mod test { use super::{event_from_str, SyslogConfig}; - use crate::event::{self, Event}; + use crate::{config::log_schema, event::Event}; use chrono::TimeZone; #[test] @@ -454,10 +454,10 @@ mod test { { let expected = expected.as_mut_log(); expected.insert( - event::log_schema().timestamp_key().clone(), + log_schema().timestamp_key().clone(), chrono::Utc.ymd(2019, 2, 13).and_hms(19, 48, 34), ); - expected.insert(event::log_schema().source_type_key().clone(), "syslog"); + expected.insert(log_schema().source_type_key().clone(), "syslog"); expected.insert("host", "74794bfb6795"); expected.insert("hostname", "74794bfb6795"); @@ -492,12 +492,12 @@ mod test { { let expected = expected.as_mut_log(); expected.insert( - event::log_schema().timestamp_key().clone(), + log_schema().timestamp_key().clone(), chrono::Utc.ymd(2019, 2, 13).and_hms(19, 48, 34), ); - expected.insert(event::log_schema().host_key().clone(), "74794bfb6795"); + expected.insert(log_schema().host_key().clone(), "74794bfb6795"); expected.insert("hostname", "74794bfb6795"); - expected.insert(event::log_schema().source_type_key().clone(), "syslog"); + expected.insert(log_schema().source_type_key().clone(), "syslog"); expected.insert("severity", "notice"); expected.insert("facility", "user"); expected.insert("version", 1); @@ -583,11 +583,11 @@ mod test { { let expected = expected.as_mut_log(); expected.insert( - event::log_schema().timestamp_key().clone(), + log_schema().timestamp_key().clone(), chrono::Utc.ymd(2020, 2, 13).and_hms(20, 7, 26), ); - expected.insert(event::log_schema().host_key().clone(), "74794bfb6795"); - expected.insert(event::log_schema().source_type_key().clone(), "syslog"); + expected.insert(log_schema().host_key().clone(), "74794bfb6795"); + expected.insert(log_schema().source_type_key().clone(), "syslog"); expected.insert("hostname", "74794bfb6795"); expected.insert("severity", "notice"); expected.insert("facility", "user"); @@ -613,10 +613,10 @@ mod test { { let expected = expected.as_mut_log(); expected.insert( - event::log_schema().timestamp_key().clone(), + log_schema().timestamp_key().clone(), chrono::Utc.ymd(2020, 2, 13).and_hms(21, 31, 56), ); - expected.insert(event::log_schema().source_type_key().clone(), "syslog"); + expected.insert(log_schema().source_type_key().clone(), "syslog"); expected.insert("host", "74794bfb6795"); expected.insert("hostname", "74794bfb6795"); expected.insert("severity", "info"); @@ -646,12 +646,12 @@ mod test { { let expected = expected.as_mut_log(); expected.insert( - event::log_schema().timestamp_key().clone(), + log_schema().timestamp_key().clone(), chrono::Utc .ymd(2019, 2, 13) .and_hms_micro(21, 53, 30, 605_850), ); - expected.insert(event::log_schema().source_type_key().clone(), "syslog"); + expected.insert(log_schema().source_type_key().clone(), "syslog"); expected.insert("host", "74794bfb6795"); expected.insert("hostname", "74794bfb6795"); expected.insert("severity", "info"); diff --git a/src/template.rs b/src/template.rs index fbbb9a98405ac5..f1c8278a89ff39 100644 --- a/src/template.rs +++ b/src/template.rs @@ -1,7 +1,4 @@ -use crate::{ - event::{self, Value}, - Event, -}; +use crate::{config::log_schema, event::Value, Event}; use bytes::Bytes; use chrono::{ format::{strftime::StrftimeItems, Item}, @@ -164,7 +161,7 @@ fn render_fields(src: &str, event: &Event) -> Result> { fn render_timestamp(src: &str, event: &Event) -> String { let timestamp = match event { Event::Log(log) => log - .get(&event::log_schema().timestamp_key()) + .get(&log_schema().timestamp_key()) .and_then(Value::as_timestamp), _ => None, }; @@ -339,7 +336,7 @@ mod tests { let mut event = Event::from("hello world"); event .as_mut_log() - .insert(crate::event::log_schema().timestamp_key().clone(), ts); + .insert(log_schema().timestamp_key().clone(), ts); let template = Template::try_from("abcd-%F").unwrap(); @@ -353,7 +350,7 @@ mod tests { let mut event = Event::from("hello world"); event .as_mut_log() - .insert(crate::event::log_schema().timestamp_key().clone(), ts); + .insert(log_schema().timestamp_key().clone(), ts); let template = Template::try_from("abcd-%F_%T").unwrap(); @@ -371,7 +368,7 @@ mod tests { event.as_mut_log().insert("foo", "butts"); event .as_mut_log() - .insert(crate::event::log_schema().timestamp_key().clone(), ts); + .insert(log_schema().timestamp_key().clone(), ts); let template = Template::try_from("{{ foo }}-%F_%T").unwrap(); @@ -389,7 +386,7 @@ mod tests { event.as_mut_log().insert("format", "%F"); event .as_mut_log() - .insert(crate::event::log_schema().timestamp_key().clone(), ts); + .insert(log_schema().timestamp_key().clone(), ts); let template = Template::try_from("nested {{ format }} %T").unwrap(); @@ -407,7 +404,7 @@ mod tests { event.as_mut_log().insert("%F", "foo"); event .as_mut_log() - .insert(crate::event::log_schema().timestamp_key().clone(), ts); + .insert(log_schema().timestamp_key().clone(), ts); let template = Template::try_from("nested {{ %F }} %T").unwrap(); diff --git a/src/test_util/mod.rs b/src/test_util/mod.rs index f337dcde43750e..a6d3d34f36dcc8 100644 --- a/src/test_util/mod.rs +++ b/src/test_util/mod.rs @@ -50,68 +50,6 @@ macro_rules! assert_downcast_matches { }}; } -#[macro_export] -macro_rules! assert_within { - // Adapted from std::assert_eq - ($expr:expr, $low:expr, $high:expr) => ({ - match (&$expr, &$low, &$high) { - (expr, low, high) => { - if *expr < *low { - panic!( - r#"assertion failed: `(expr < low)` -expr: {} = `{:?}`, - low: `{:?}`"#, - stringify!($expr), - &*expr, - &*low - ); - } - if *expr > *high { - panic!( - r#"assertion failed: `(expr > high)` -expr: {} = `{:?}`, -high: `{:?}`"#, - stringify!($expr), - &*expr, - &*high - ); - } - } - } - }); - ($expr:expr, $low:expr, $high:expr, $($arg:tt)+) => ({ - match (&$expr, &$low, &$high) { - (expr, low, high) => { - if *expr < *low { - panic!( - r#"assertion failed: `(expr < low)` -expr: {} = `{:?}`, - low: `{:?}` -{}"#, - stringify!($expr), - &*expr, - &*low, - format_args!($($arg)+) - ); - } - if *expr > *high { - panic!( - r#"assertion failed: `(expr > high)` -expr: {} = `{:?}`, -high: `{:?}` -{}"#, - stringify!($expr), - &*expr, - &*high, - format_args!($($arg)+) - ); - } - } - } - }); - -} - pub fn next_addr() -> SocketAddr { let port = pick_unused_port().unwrap(); SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), port) diff --git a/src/transforms/ansi_stripper.rs b/src/transforms/ansi_stripper.rs index 9a4985eaa0fbe4..b3cf24d999eb45 100644 --- a/src/transforms/ansi_stripper.rs +++ b/src/transforms/ansi_stripper.rs @@ -1,7 +1,7 @@ use super::Transform; use crate::{ config::{DataType, TransformConfig, TransformContext, TransformDescription}, - event::{self, Value}, + event::Value, internal_events::{ ANSIStripperEventProcessed, ANSIStripperFailed, ANSIStripperFieldInvalid, ANSIStripperFieldMissing, @@ -27,7 +27,7 @@ impl TransformConfig for AnsiStripperConfig { let field = self .field .as_ref() - .unwrap_or(&event::log_schema().message_key()); + .unwrap_or(&crate::config::log_schema().message_key()); Ok(Box::new(AnsiStripper { field: field.clone(), @@ -79,7 +79,7 @@ impl Transform for AnsiStripper { mod tests { use super::AnsiStripper; use crate::{ - event::{self, Event, Value}, + event::{Event, Value}, transforms::Transform, }; @@ -94,7 +94,7 @@ mod tests { let event = transform.transform(event).unwrap(); assert_eq!( - event.into_log().remove(&event::log_schema().message_key()).unwrap(), + event.into_log().remove(&crate::config::log_schema().message_key()).unwrap(), Value::from("foo bar") ); )+ diff --git a/src/transforms/dedupe.rs b/src/transforms/dedupe.rs index 7af3bcf4f7d29d..fb9bc0697be206 100644 --- a/src/transforms/dedupe.rs +++ b/src/transforms/dedupe.rs @@ -1,8 +1,8 @@ use super::Transform; use crate::{ - config::{DataType, TransformConfig, TransformContext, TransformDescription}, - event, + config::{log_schema, DataType, TransformConfig, TransformContext, TransformDescription}, event::{Event, Value}, + internal_events::{DedupeEventDiscarded, DedupeEventProcessed}, }; use bytes::Bytes; use lru::LruCache; @@ -53,9 +53,9 @@ impl DedupeConfig { FieldMatchConfig::MatchFields(x) => FieldMatchConfig::MatchFields(x.clone()), FieldMatchConfig::IgnoreFields(y) => FieldMatchConfig::IgnoreFields(y.clone()), FieldMatchConfig::Default => FieldMatchConfig::MatchFields(vec![ - event::log_schema().timestamp_key().into(), - event::log_schema().host_key().into(), - event::log_schema().message_key().into(), + log_schema().timestamp_key().into(), + log_schema().host_key().into(), + log_schema().message_key().into(), ]), }; Self { @@ -183,13 +183,10 @@ fn build_cache_entry(event: &Event, fields: &FieldMatchConfig) -> CacheEntry { impl Transform for Dedupe { fn transform(&mut self, event: Event) -> Option { + emit!(DedupeEventProcessed); let cache_entry = build_cache_entry(&event, &self.config.fields); if self.cache.put(cache_entry, true).is_some() { - warn!( - message = "Encountered duplicate event; discarding", - rate_limit_secs = 30 - ); - trace!(message = "Encountered duplicate event; discarding", ?event); + emit!(DedupeEventDiscarded { event }); None } else { Some(event) diff --git a/src/transforms/grok_parser.rs b/src/transforms/grok_parser.rs index 25b8067aa266c8..6f60a162807fe3 100644 --- a/src/transforms/grok_parser.rs +++ b/src/transforms/grok_parser.rs @@ -1,7 +1,7 @@ use super::Transform; use crate::{ - config::{DataType, TransformConfig, TransformContext, TransformDescription}, - event::{self, Event, PathComponent, PathIter}, + config::{log_schema, DataType, TransformConfig, TransformContext, TransformDescription}, + event::{Event, PathComponent, PathIter}, internal_events::{ GrokParserConversionFailed, GrokParserEventProcessed, GrokParserFailedMatch, GrokParserMissingField, @@ -39,10 +39,7 @@ inventory::submit! { #[typetag::serde(name = "grok_parser")] impl TransformConfig for GrokParserConfig { fn build(&self, _cx: TransformContext) -> crate::Result> { - let field = self - .field - .as_ref() - .unwrap_or(&event::log_schema().message_key()); + let field = self.field.as_ref().unwrap_or(&log_schema().message_key()); let mut grok = grok::Grok::with_patterns(); @@ -131,7 +128,7 @@ mod tests { use super::GrokParserConfig; use crate::event::LogEvent; use crate::{ - config::{TransformConfig, TransformContext}, + config::{log_schema, TransformConfig, TransformContext}, event, Event, }; use pretty_assertions::assert_eq; @@ -195,9 +192,9 @@ mod tests { assert_eq!(2, event.keys().count()); assert_eq!( event::Value::from("Help I'm stuck in an HTTP server"), - event[&event::log_schema().message_key()] + event[&log_schema().message_key()] ); - assert!(!event[&event::log_schema().timestamp_key()] + assert!(!event[&log_schema().timestamp_key()] .to_string_lossy() .is_empty()); } @@ -242,9 +239,9 @@ mod tests { assert_eq!(2, event.keys().count()); assert_eq!( event::Value::from("i am the only field"), - event[&event::log_schema().message_key()] + event[&log_schema().message_key()] ); - assert!(!event[&event::log_schema().timestamp_key()] + assert!(!event[&log_schema().timestamp_key()] .to_string_lossy() .is_empty()); } diff --git a/src/transforms/json_parser.rs b/src/transforms/json_parser.rs index 82ec4868f1dfde..a7c35c845827a3 100644 --- a/src/transforms/json_parser.rs +++ b/src/transforms/json_parser.rs @@ -1,7 +1,7 @@ use super::Transform; use crate::{ - config::{DataType, TransformConfig, TransformContext, TransformDescription}, - event::{self, Event}, + config::{log_schema, DataType, TransformConfig, TransformContext, TransformDescription}, + event::Event, internal_events::{JsonParserEventProcessed, JsonParserFailedParse, JsonParserTargetExists}, }; use serde::{Deserialize, Serialize}; @@ -57,7 +57,7 @@ impl From for JsonParser { let field = if let Some(field) = &config.field { field } else { - &event::log_schema().message_key() + &log_schema().message_key() }; JsonParser { @@ -132,8 +132,7 @@ impl Transform for JsonParser { #[cfg(test)] mod test { use super::{JsonParser, JsonParserConfig}; - use crate::event::{self, Event}; - use crate::transforms::Transform; + use crate::{config::log_schema, event::Event, transforms::Transform}; use serde_json::json; use string_cache::DefaultAtom as Atom; @@ -145,10 +144,7 @@ mod test { let event = parser.transform(event).unwrap(); - assert!(event - .as_log() - .get(&event::log_schema().message_key()) - .is_none()); + assert!(event.as_log().get(&log_schema().message_key()).is_none()); } #[test] @@ -162,10 +158,7 @@ mod test { let event = parser.transform(event).unwrap(); - assert!(event - .as_log() - .get(&event::log_schema().message_key()) - .is_some()); + assert!(event.as_log().get(&log_schema().message_key()).is_some()); } #[test] @@ -182,7 +175,7 @@ mod test { assert_eq!(event.as_log()[&Atom::from("greeting")], "hello".into()); assert_eq!(event.as_log()[&Atom::from("name")], "bob".into()); assert_eq!( - event.as_log()[&event::log_schema().message_key()], + event.as_log()[&log_schema().message_key()], r#"{"greeting": "hello", "name": "bob"}"#.into() ); } @@ -229,7 +222,7 @@ mod test { assert_eq!(event.as_log()[&Atom::from("greeting")], "hello".into()); assert_eq!(event.as_log()[&Atom::from("name")], "bob".into()); assert_eq!( - event.as_log()[&event::log_schema().message_key()], + event.as_log()[&log_schema().message_key()], r#" {"greeting": "hello", "name": "bob"} "#.into() ); } @@ -310,10 +303,7 @@ mod test { let parsed = parser.transform(event.clone()).unwrap(); assert_eq!(event, parsed); - assert_eq!( - event.as_log()[&event::log_schema().message_key()], - invalid.into() - ); + assert_eq!(event.as_log()[&log_schema().message_key()], invalid.into()); // Field let mut parser = JsonParser::from(JsonParserConfig { diff --git a/src/transforms/log_to_metric.rs b/src/transforms/log_to_metric.rs index 8d958b5e00486b..41693c27a1e1e0 100644 --- a/src/transforms/log_to_metric.rs +++ b/src/transforms/log_to_metric.rs @@ -1,8 +1,8 @@ use super::Transform; use crate::{ - config::{DataType, TransformConfig, TransformContext, TransformDescription}, + config::{log_schema, DataType, TransformConfig, TransformContext, TransformDescription}, event::metric::{Metric, MetricKind, MetricValue, StatisticKind}, - event::{self, Value}, + event::Value, internal_events::{ LogToMetricEventProcessed, LogToMetricFieldNotFound, LogToMetricParseError, LogToMetricRenderError, LogToMetricTemplateError, @@ -155,7 +155,7 @@ fn to_metric(config: &MetricConfig, event: &Event) -> Result Self { Self { partial_event_marker_field: event::PARTIAL.clone(), - merge_fields: vec![event::log_schema().message_key().clone()], + merge_fields: vec![crate::config::log_schema().message_key().clone()], stream_discriminant_fields: vec![], } } diff --git a/src/transforms/reduce/mod.rs b/src/transforms/reduce/mod.rs index 57c79c53fcff0f..67bd2e112eaf8a 100644 --- a/src/transforms/reduce/mod.rs +++ b/src/transforms/reduce/mod.rs @@ -4,6 +4,7 @@ use crate::{ config::{DataType, TransformConfig, TransformContext, TransformDescription}, event::discriminant::Discriminant, event::{Event, LogEvent}, + internal_events::{ReduceEventProcessed, ReduceStaleEventFlushed}, }; use async_stream::stream; use futures::{ @@ -180,6 +181,7 @@ impl Reduce { } for k in &flush_discriminants { if let Some(t) = self.reduce_merge_states.remove(k) { + emit!(ReduceStaleEventFlushed); output.push(Event::from(t.flush())); } } @@ -230,6 +232,8 @@ impl Transform for Reduce { } } + emit!(ReduceEventProcessed); + self.flush_into(output); } diff --git a/src/transforms/regex_parser.rs b/src/transforms/regex_parser.rs index 1718af73bb0bbb..75dc91a595354a 100644 --- a/src/transforms/regex_parser.rs +++ b/src/transforms/regex_parser.rs @@ -1,7 +1,7 @@ use super::Transform; use crate::{ config::{DataType, TransformConfig, TransformContext, TransformDescription}, - event::{self, Event, Value}, + event::{Event, Value}, internal_events::{ RegexParserConversionFailed, RegexParserEventProcessed, RegexParserFailedMatch, RegexParserMissingField, RegexParserTargetExists, @@ -136,7 +136,7 @@ impl RegexParser { let field = config .field .as_ref() - .unwrap_or(&event::log_schema().message_key()); + .unwrap_or(&crate::config::log_schema().message_key()); let patterns = match (&config.regex, &config.patterns.len()) { (None, 0) => { diff --git a/src/transforms/sampler.rs b/src/transforms/sampler.rs index 6a10c2d09b6d08..57cac7f6d9e817 100644 --- a/src/transforms/sampler.rs +++ b/src/transforms/sampler.rs @@ -1,7 +1,7 @@ use super::Transform; use crate::{ - config::{DataType, TransformConfig, TransformContext, TransformDescription}, - event::{self, Event}, + config::{log_schema, DataType, TransformConfig, TransformContext, TransformDescription}, + event::Event, internal_events::{SamplerEventDiscarded, SamplerEventProcessed}, }; use regex::RegexSet; // TODO: use regex::bytes @@ -53,7 +53,7 @@ pub struct Sampler { impl Sampler { pub fn new(rate: u64, key_field: Option, pass_list: RegexSet) -> Self { - let key_field = key_field.unwrap_or_else(|| event::log_schema().message_key().clone()); + let key_field = key_field.unwrap_or_else(|| log_schema().message_key().clone()); Self { rate, key_field, @@ -91,8 +91,8 @@ impl Transform for Sampler { #[cfg(test)] mod tests { - use super::Sampler; - use crate::event::{self, Event}; + use super::*; + use crate::event::Event; use crate::transforms::Transform; use approx::assert_relative_eq; use regex::RegexSet; @@ -170,7 +170,7 @@ mod tests { let passing = events .into_iter() .filter(|s| { - !s.as_log()[&event::log_schema().message_key()] + !s.as_log()[&log_schema().message_key()] .to_string_lossy() .contains("na") }) @@ -183,7 +183,7 @@ mod tests { let passing = events .into_iter() .filter(|s| { - !s.as_log()[&event::log_schema().message_key()] + !s.as_log()[&log_schema().message_key()] .to_string_lossy() .contains("na") }) diff --git a/src/transforms/split.rs b/src/transforms/split.rs index 9196f6cc19a67d..7bc69c53a00666 100644 --- a/src/transforms/split.rs +++ b/src/transforms/split.rs @@ -1,7 +1,7 @@ use super::Transform; use crate::{ config::{DataType, TransformConfig, TransformContext, TransformDescription}, - event::{self, Event}, + event::Event, internal_events::{SplitConvertFailed, SplitEventProcessed, SplitFieldMissing}, types::{parse_check_conversion_map, Conversion}, }; @@ -30,7 +30,7 @@ impl TransformConfig for SplitConfig { let field = self .field .as_ref() - .unwrap_or(&event::log_schema().message_key()); + .unwrap_or(&crate::config::log_schema().message_key()); let types = parse_check_conversion_map(&self.types, &self.field_names) .map_err(|err| format!("{}", err))?; diff --git a/src/transforms/tokenizer.rs b/src/transforms/tokenizer.rs index 9047a2345ffe1b..7c3e75f53943f3 100644 --- a/src/transforms/tokenizer.rs +++ b/src/transforms/tokenizer.rs @@ -1,7 +1,8 @@ use super::Transform; use crate::{ config::{DataType, TransformConfig, TransformContext, TransformDescription}, - event::{self, Event, PathComponent, PathIter}, + event::{Event, PathComponent, PathIter}, + internal_events::{TokenizerConvertFailed, TokenizerEventProcessed, TokenizerFieldMissing}, types::{parse_check_conversion_map, Conversion}, }; use nom::{ @@ -36,7 +37,7 @@ impl TransformConfig for TokenizerConfig { let field = self .field .as_ref() - .unwrap_or(&event::log_schema().message_key()); + .unwrap_or(&crate::config::log_schema().message_key()); let types = parse_check_conversion_map(&self.types, &self.field_names)?; @@ -65,7 +66,7 @@ impl TransformConfig for TokenizerConfig { } pub struct Tokenizer { - field_names: Vec<(String, Vec, Conversion)>, + field_names: Vec<(Atom, Vec, Conversion)>, field: Atom, drop_field: bool, } @@ -82,7 +83,7 @@ impl Tokenizer { .map(|name| { let conversion = types.get(&name).unwrap_or(&Conversion::Bytes).clone(); let path: Vec = PathIter::new(&name).collect(); - (name.to_string(), path, conversion) + (name, path, conversion) }) .collect(); @@ -107,12 +108,7 @@ impl Transform for Tokenizer { event.as_mut_log().insert_path(path.clone(), value); } Err(error) => { - debug!( - message = "Could not convert types.", - path = &name[..], - %error, - rate_limit_secs = 30 - ); + emit!(TokenizerConvertFailed { field: name, error }); } } } @@ -120,12 +116,11 @@ impl Transform for Tokenizer { event.as_mut_log().remove(&self.field); } } else { - debug!( - message = "Field does not exist.", - field = self.field.as_ref(), - ); + emit!(TokenizerFieldMissing { field: &self.field }); }; + emit!(TokenizerEventProcessed); + Some(event) } } diff --git a/src/validate.rs b/src/validate.rs index f99d46fa8d9669..d8eb91718ebc00 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -1,6 +1,5 @@ use crate::{ config::{self, Config, ConfigDiff}, - event, topology::{self, builder::Pieces}, }; use colored::*; @@ -88,7 +87,7 @@ async fn validate_components( diff: &ConfigDiff, fmt: &mut Formatter, ) -> Option { - event::LOG_SCHEMA + crate::config::LOG_SCHEMA .set(config.global.log_schema.clone()) .expect("Couldn't set schema"); diff --git a/tests/data/auto-concurrency-template.toml b/tests/data/auto-concurrency-template.toml new file mode 100644 index 00000000000000..974f87d79d8447 --- /dev/null +++ b/tests/data/auto-concurrency-template.toml @@ -0,0 +1,35 @@ +[params] +requests = 000 +delay = 0.050 +# Delete any of these that are not needed +interval = +jitter = 0 +concurrency_scale = 0 +concurrency_defer = 0 +in_flight_limit = "auto" +concurrency_drop = 0 + +[stats.in_flight] +max = [,] +mean = [,] +mode = [,] + +[controller.concurrency_limit] +max = [,] +mean = [,] +mode = [,] + +[controller.in_flight] +max = [,] +mean = [,] +mode = [,] + +[controller.observed_rtt] +min = [,] +max = [,] +mean = [,] + +[controller.averaged_rtt] +min = [,] +max = [,] +mean = [,] diff --git a/tests/data/auto-concurrency/constant-link.toml b/tests/data/auto-concurrency/constant-link.toml new file mode 100644 index 00000000000000..0b3cecf9e68148 --- /dev/null +++ b/tests/data/auto-concurrency/constant-link.toml @@ -0,0 +1,29 @@ +# With a constant response time link and enough responses, the limiter +# will ramp up towards the maximum concurrency. + +[params] +requests = 500 +delay = 0.100 + +[stats.in_flight] +max = [22, 29] +mean = [10.0, 13.0] + +[controller.in_flight] +max = [22, 29] +mean = [10.0, 13.0] + +[controller.concurrency_limit] +max = [22, 30] +mode = [10, 25] +mean = [10.0, 15.0] + +[controller.observed_rtt] +min = [0.100, 0.102] +max = [0.100, 0.102] +mean = [0.100, 0.102] + +[controller.averaged_rtt] +min = [0.100, 0.102] +max = [0.100, 0.102] +mean = [0.100, 0.102] diff --git a/tests/data/auto-concurrency/defers-at-high-concurrency.toml b/tests/data/auto-concurrency/defers-at-high-concurrency.toml new file mode 100644 index 00000000000000..f35e253371b996 --- /dev/null +++ b/tests/data/auto-concurrency/defers-at-high-concurrency.toml @@ -0,0 +1,36 @@ +[params] +requests = 500 +delay = 0.100 +concurrency_defer = 5 + +# With a constant time link that gives deferrals over a certain +# concurrency, the limiter will ramp up to that concurrency and then +# drop down repeatedly. Note that, due to the timing of the adjustment, +# this may actually occasionally go over the error limit above, but it +# will be rare. +[stats.in_flight] +max = [4, 6] +# Since the concurrency will drop down by half each time, the average +# will be below this maximum. +mode = [4, 4] +mean = [4.0, 5.0] + +[controller.in_flight] +max = [5, 6] +mode = [4, 4] +mean = [4.0, 5.0] + +[controller.concurrency_limit] +max = [5, 6] +mode = [2, 5] +mean = [4.0, 5.0] + +[controller.observed_rtt] +min = [0.100, 0.102] +max = [0.100, 0.102] +mean = [0.100, 0.102] + +[controller.averaged_rtt] +min = [0.100, 0.102] +max = [0.100, 0.102] +mean = [0.100, 0.102] diff --git a/tests/data/auto-concurrency/drops-at-high-concurrency.toml b/tests/data/auto-concurrency/drops-at-high-concurrency.toml new file mode 100644 index 00000000000000..b019f6fd6638ec --- /dev/null +++ b/tests/data/auto-concurrency/drops-at-high-concurrency.toml @@ -0,0 +1,33 @@ +[params] +requests = 500 +delay = 0.100 +concurrency_drop = 5 + +[stats.in_flight] +max = [4, 5] +mean = [2.5, 3.0] +mode = [3, 5] + +# Since our internal framework doesn't track the dropped requests, the +# values won't be representative of the actual number of requests in +# flight. + +[controller.in_flight] +max = [13, 15] +mean = [6.0, 7.0] +mode = [3, 10] + +[controller.concurrency_limit] +max = [10, 15] +mean = [7.0, 8.0] +mode = [4, 6] + +[controller.observed_rtt] +min = [0.100, 0.102] +max = [0.100, 0.102] +mean = [0.100, 0.102] + +[controller.averaged_rtt] +min = [0.100, 0.102] +max = [0.100, 0.102] +mean = [0.100, 0.102] diff --git a/tests/data/auto-concurrency/fixed-concurrency.toml b/tests/data/auto-concurrency/fixed-concurrency.toml new file mode 100644 index 00000000000000..8b96bf3eb01e67 --- /dev/null +++ b/tests/data/auto-concurrency/fixed-concurrency.toml @@ -0,0 +1,22 @@ +# Simulate a very jittery link, but with a fixed concurrency. Even with +# jitter, the concurrency limit should never vary. +[params] +requests = 200 +delay = 0.100 +jitter = 0.5 +in_flight_limit = 10 + +[stats.in_flight] +max = [10, 10] +mode = [10, 10] +mean = [8.5, 10.0] + +[controller.in_flight] +max = [10, 10] +mode = [10, 10] +mean = [8.5, 10.0] + +[controller.concurrency_limit] +min = [10, 10] +max = [10, 10] +mode = [10, 10] diff --git a/tests/data/auto-concurrency/jittery-link-small.toml b/tests/data/auto-concurrency/jittery-link-small.toml new file mode 100644 index 00000000000000..b8d7c393e865d5 --- /dev/null +++ b/tests/data/auto-concurrency/jittery-link-small.toml @@ -0,0 +1,25 @@ +[params] +requests = 1000 +delay = 0.100 +jitter = 0.1 + +# Jitter can cause concurrency management to vary widely, though it +# will typically reach high values of requests in flight. + +[stats.in_flight] +max = [20, 35] +mean = [10.0, 20.0] + +[controller.in_flight] +max = [20, 35] +mean = [10.0, 20.0] + +[controller.concurrency_limit] +max = [20, 35] +mean = [10.0, 20.0] + +[controller.observed_rtt] +mean = [0.100, 0.130] + +[controller.averaged_rtt] +mean = [0.100, 0.130] diff --git a/tests/data/auto-concurrency/medium-send.toml b/tests/data/auto-concurrency/medium-send.toml new file mode 100644 index 00000000000000..166c268cbedbff --- /dev/null +++ b/tests/data/auto-concurrency/medium-send.toml @@ -0,0 +1,29 @@ +[params] +requests = 500 +interval = 0.025 +delay = 0.100 + +# With a generator running at four times the speed as the link RTT, +# the limiter will keep around 4-5 requests in flight depending on +# timing jitter. + +[stats.in_flight] +max = [8, 8] +mode = [4, 5] +mean = [4.0, 4.5] + +[controller.in_flight] +max = [8, 8] +mode = [4, 5] +mean = [4.0, 4.5] + +[controller.concurrency_limit] +max = [8, 8] + +[controller.observed_rtt] +min = [0.100, 0.102] +max = [0.100, 0.102] + +[controller.averaged_rtt] +min = [0.100, 0.102] +max = [0.100, 0.102] diff --git a/tests/data/auto-concurrency/slow-link.toml b/tests/data/auto-concurrency/slow-link.toml new file mode 100644 index 00000000000000..a935df0f55524a --- /dev/null +++ b/tests/data/auto-concurrency/slow-link.toml @@ -0,0 +1,30 @@ +[params] +requests = 200 +delay = 0.100 +concurrency_scale = 1.0 + +# With a link that slows down heavily as concurrency increases, the +# limiter will keep the concurrency low (timing skews occasionally +# has it reaching 3, but usually just 2), +[stats.in_flight] +max = [2, 3] +# and it will spend most of its time between 1 and 2. +mode = [2, 2] +mean = [1.5, 2.0] + +[controller.in_flight] +max = [2, 3] +mode = [2, 2] +mean = [1.5, 2.0] + +[controller.concurrency_limit] +mode = [2, 3] +mean = [1.7, 2.0] + +[controller.observed_rtt] +min = [0.100, 0.102] +mean = [0.100, 0.310] + +[controller.averaged_rtt] +min = [0.100, 0.102] +mean = [0.100, 0.310] diff --git a/tests/data/auto-concurrency/slow-send-1.toml b/tests/data/auto-concurrency/slow-send-1.toml new file mode 100644 index 00000000000000..869c9b5a2e2066 --- /dev/null +++ b/tests/data/auto-concurrency/slow-send-1.toml @@ -0,0 +1,29 @@ +[params] +requests = 100 +interval = 0.100 +delay = 0.050 + +# With a generator running slower than the link can process, the +# limiter will never raise the concurrency above 1. + +[stats.in_flight] +max = [1, 1] +mode = [1, 1] +mean = [0.5, 0.55] + +[controller.in_flight] +max = [1, 1] +mode = [1, 1] +mean = [0.5, 0.55] + +[controller.concurrency_limit] +mode = [1, 1] +mean = [1.0, 1.0] + +[controller.observed_rtt] +min = [0.050, 0.052] +mean = [0.050, 0.052] + +[controller.averaged_rtt] +min = [0.050, 0.052] +mean = [0.050, 0.052] diff --git a/tests/data/auto-concurrency/slow-send-2.toml b/tests/data/auto-concurrency/slow-send-2.toml new file mode 100644 index 00000000000000..82b61780c2f1d6 --- /dev/null +++ b/tests/data/auto-concurrency/slow-send-2.toml @@ -0,0 +1,29 @@ +# With a generator running at the same speed as the link RTT, the +# limiter will keep the limit around 2. + +[params] +requests = 100 +interval = 0.050 +delay = 0.050 + +[stats.in_flight] +max = [1, 2] +mode = [1, 1] +mean = [1.0, 1.2] + +[controller.in_flight] +max = [1, 2] +mode = [1, 1] +mean = [1.0, 2.0] + +[controller.concurrency_limit] +mode = [2, 2] +mean = [1.9, 2.0] + +[controller.observed_rtt] +min = [0.050, 0.052] +mean = [0.050, 0.052] + +[controller.averaged_rtt] +min = [0.050, 0.052] +mean = [0.050, 0.052] diff --git a/tests/support/mod.rs b/tests/support/mod.rs index 897920a0360f20..6ae06eea7580c1 100644 --- a/tests/support/mod.rs +++ b/tests/support/mod.rs @@ -16,7 +16,7 @@ use vector::config::{ DataType, GlobalOptions, SinkConfig, SinkContext, SourceConfig, TransformConfig, TransformContext, }; -use vector::event::{self, metric::MetricValue, Event, Value}; +use vector::event::{metric::MetricValue, Event, Value}; use vector::shutdown::ShutdownSignal; use vector::sinks::{util::StreamSink, Healthcheck, VectorSink}; use vector::sources::Source; @@ -156,11 +156,14 @@ impl Transform for MockTransform { match &mut event { Event::Log(log) => { let mut v = log - .get(&event::log_schema().message_key()) + .get(&vector::config::log_schema().message_key()) .unwrap() .to_string_lossy(); v.push_str(&self.suffix); - log.insert(event::log_schema().message_key().clone(), Value::from(v)); + log.insert( + vector::config::log_schema().message_key().clone(), + Value::from(v), + ); } Event::Metric(metric) => match metric.value { MetricValue::Counter { ref mut value } => { diff --git a/tests/topology.rs b/tests/topology.rs index 8c559fc2682203..39a42a00c01271 100644 --- a/tests/topology.rs +++ b/tests/topology.rs @@ -13,12 +13,7 @@ use std::{ }, }; use tokio::time::{delay_for, Duration}; -use vector::{ - config::Config, - event::{self, Event}, - test_util::start_topology, - topology, -}; +use vector::{config::Config, event::Event, test_util::start_topology, topology}; fn basic_config() -> Config { let mut config = Config::builder(); @@ -37,7 +32,7 @@ fn basic_config_with_sink_failing_healthcheck() -> Config { fn into_message(event: Event) -> String { event .as_log() - .get(&event::log_schema().message_key()) + .get(&vector::config::log_schema().message_key()) .unwrap() .to_string_lossy() } @@ -87,7 +82,7 @@ async fn topology_shutdown_while_active() { ); for event in processed_events { assert_eq!( - event.as_log()[&event::log_schema().message_key()], + event.as_log()[&vector::config::log_schema().message_key()], "test transformed".to_owned().into() ); }