Skip to content

Commit

Permalink
[receiver/dockerstats] Featuregate for new mdatagen implementation + …
Browse files Browse the repository at this point in the history
…semantic convention alignment (#12743)

- Allow using the new implementation of scrape (scrapeV2) via a feature gate
- Align the units with the system metrics semantic convention
- Enhance the tests to test for correct units and descriptions (for the new implementation only)

Co-authored-by: Sean Marciniak <[email protected]>
  • Loading branch information
jamesmoessis and MovieStoreGuy authored Aug 1, 2022
1 parent bab6d87 commit 651712b
Show file tree
Hide file tree
Showing 9 changed files with 443 additions and 240 deletions.
15 changes: 15 additions & 0 deletions receiver/dockerstatsreceiver/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,18 @@ with detailed sample configurations [here](./testdata/config.yaml).
[alpha]: https://github.com/open-telemetry/opentelemetry-collector#alpha
[contrib]: https://github.com/open-telemetry/opentelemetry-collector-releases/tree/main/distributions/otelcol-contrib
## Feature Gates
See the [Collector feature gates](https://github.com/open-telemetry/opentelemetry-collector/blob/main/service/featuregate/README.md#collector-feature-gates) for an overview of feature gates in the collector.
**ALPHA**: `receiver.dockerstats.useScraperV2`

The feature gate `receiver.dockerstatsd.useScraperV2` once enabled allows collection of selective metrics that is described in [documentation.md](./documentation.md). When the feature gate is disabled, the metrics settings are mostly ignored and not configurable with minor variation in metric name and attributes.

This is considered a breaking change for existing users of this receiver, and it is recommended to migrate to the new implementation when possible. Any new users planning to adopt this receiver should enable this feature gate to avoid having to migrate any visualisations or alerts.

This feature gate will eventually be enabled by default, and eventually the old implementation will be removed. It aims
to give users time to migrate to the new implementation. The target release for this featuregate to be enabled by default
is 0.60.0.
104 changes: 52 additions & 52 deletions receiver/dockerstatsreceiver/documentation.md

Large diffs are not rendered by default.

21 changes: 18 additions & 3 deletions receiver/dockerstatsreceiver/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,25 @@ import (
"go.opentelemetry.io/collector/config"
"go.opentelemetry.io/collector/consumer"
"go.opentelemetry.io/collector/receiver/scraperhelper"
"go.opentelemetry.io/collector/service/featuregate"

"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/dockerstatsreceiver/internal/metadata"
)

const (
typeStr = "docker_stats"
stability = component.StabilityLevelAlpha
typeStr = "docker_stats"
stability = component.StabilityLevelAlpha
useScraperV2ID = "receiver.dockerstats.useScraperV2"
)

func init() {
featuregate.GetRegistry().MustRegister(featuregate.Gate{
ID: useScraperV2ID,
Description: "When enabled, the receiver will use the function ScrapeV2 to collect metrics. This allows each metric to be turned off/on via config. The new metrics are slightly different to the legacy implementation.",
Enabled: false,
})
}

func NewFactory() component.ReceiverFactory {
return component.NewReceiverFactory(
typeStr,
Expand Down Expand Up @@ -59,7 +69,12 @@ func createMetricsReceiver(
dockerConfig := config.(*Config)
dsr := newReceiver(params, dockerConfig)

scrp, err := scraperhelper.NewScraper(typeStr, dsr.scrape, scraperhelper.WithStart(dsr.start))
scrapeFunc := dsr.scrape
if featuregate.GetRegistry().IsEnabled(useScraperV2ID) {
scrapeFunc = dsr.scrapeV2
}

scrp, err := scraperhelper.NewScraper(typeStr, scrapeFunc, scraperhelper.WithStart(dsr.start))
if err != nil {
return nil, err
}
Expand Down
104 changes: 52 additions & 52 deletions receiver/dockerstatsreceiver/internal/metadata/generated_metrics_v2.go

Large diffs are not rendered by default.

44 changes: 22 additions & 22 deletions receiver/dockerstatsreceiver/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,15 @@ metrics:
container.cpu.throttling_data.periods:
enabled: true
description: "Number of periods with throttling active."
unit: "1"
unit: "{periods}"
sum:
value_type: int
monotonic: true
aggregation: cumulative
container.cpu.throttling_data.throttled_periods:
enabled: true
description: "Number of periods when the container hits its throttling limit."
unit: "1"
unit: "{periods}"
sum:
value_type: int
monotonic: true
Expand Down Expand Up @@ -179,7 +179,7 @@ metrics:
enabled: true
description: "Number of pages read from disk by the cgroup."
extended_documentation: "[More docs](https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt)."
unit: "1"
unit: "{operations}"
sum:
value_type: int
aggregation: cumulative
Expand All @@ -188,7 +188,7 @@ metrics:
enabled: true
description: "Number of pages written to disk by the cgroup."
extended_documentation: "[More docs](https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt)."
unit: "1"
unit: "{operations}"
sum:
value_type: int
aggregation: cumulative
Expand All @@ -202,15 +202,15 @@ metrics:
container.memory.pgfault:
enabled: true
description: "Indicate the number of times that a process of the cgroup triggered a page fault."
unit: "1"
unit: "{faults}"
sum:
value_type: int
aggregation: cumulative
monotonic: true
container.memory.pgmajfault:
enabled: true
description: "Indicate the number of times that a process of the cgroup triggered a major fault."
unit: "1"
unit: "{faults}"
sum:
value_type: int
aggregation: cumulative
Expand Down Expand Up @@ -298,15 +298,15 @@ metrics:
container.memory.total_pgpgin:
enabled: true
description: "Number of pages read from disk by the cgroup and descendant groups."
unit: "1"
unit: "{operations}"
sum:
value_type: int
aggregation: cumulative
monotonic: true
container.memory.total_pgpgout:
enabled: true
description: "Number of pages written to disk by the cgroup and descendant groups."
unit: "1"
unit: "{operations}"
sum:
value_type: int
aggregation: cumulative
Expand All @@ -320,15 +320,15 @@ metrics:
container.memory.total_pgfault:
enabled: true
description: "Indicate the number of times that a process of the cgroup (or descendant cgroups) triggered a page fault."
unit: "1"
unit: "{faults}"
sum:
value_type: int
aggregation: cumulative
monotonic: true
container.memory.total_pgmajfault:
enabled: true
description: "Indicate the number of times that a process of the cgroup (or descendant cgroups) triggered a major fault."
unit: "1"
unit: "{faults}"
sum:
value_type: int
aggregation: cumulative
Expand Down Expand Up @@ -372,7 +372,7 @@ metrics:
enabled: true
description: "Number of bios/requests merged into requests belonging to this cgroup and its descendant cgroups."
extended_documentation: "[More docs](https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt)."
unit: "1"
unit: "{operations}"
sum:
value_type: int
monotonic: true
Expand All @@ -390,7 +390,7 @@ metrics:
enabled: true
description: "Number of requests queued up for this cgroup and its descendant cgroups."
extended_documentation: "[More docs](https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt)."
unit: "1"
unit: "{operations}"
sum:
value_type: int
monotonic: true
Expand Down Expand Up @@ -426,7 +426,7 @@ metrics:
enabled: true
description: "Total amount of time in nanoseconds between request dispatch and request completion for the IOs done by this cgroup and descendant cgroups."
extended_documentation: "[More docs](https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt)."
unit: "1" # Preserving legacy incorrect unit for now. Should be nanoseconds eventually.
unit: ns
sum:
value_type: int
monotonic: true
Expand All @@ -444,7 +444,7 @@ metrics:
enabled: true
description: "Number of IOs (bio) issued to the disk by the group and descendant groups."
extended_documentation: "[More docs](https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt)."
unit: "1"
unit: "{operations}"
sum:
value_type: int
monotonic: true
Expand Down Expand Up @@ -480,7 +480,7 @@ metrics:
enabled: true
description: "Total amount of time the IOs for this cgroup (and descendant cgroups) spent waiting in the scheduler queues for service."
extended_documentation: "[More docs](https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt)."
unit: "1" # Should be in ns but preserving legacy mistake for now
unit: ns
sum:
value_type: int
monotonic: true
Expand All @@ -498,7 +498,7 @@ metrics:
enabled: true
description: "Number of sectors transferred to/from disk by the group and descendant groups."
extended_documentation: "[More docs](https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt)."
unit: "1"
unit: "{sectors}"
sum:
value_type: int
monotonic: true
Expand Down Expand Up @@ -536,7 +536,7 @@ metrics:
container.network.io.usage.rx_dropped:
enabled: true
description: "Incoming packets dropped."
unit: "1"
unit: "{packets}"
sum:
value_type: int
monotonic: true
Expand All @@ -546,7 +546,7 @@ metrics:
container.network.io.usage.tx_dropped:
enabled: true
description: "Outgoing packets dropped."
unit: "1"
unit: "{packets}"
sum:
value_type: int
monotonic: true
Expand All @@ -556,7 +556,7 @@ metrics:
container.network.io.usage.rx_errors:
enabled: true
description: "Received errors."
unit: "1"
unit: "{errors}"
sum:
value_type: int
monotonic: true
Expand All @@ -566,7 +566,7 @@ metrics:
container.network.io.usage.tx_errors:
enabled: true
description: "Sent errors."
unit: "1"
unit: "{errors}"
sum:
value_type: int
monotonic: true
Expand All @@ -576,7 +576,7 @@ metrics:
container.network.io.usage.rx_packets:
enabled: true
description: "Packets received."
unit: "1"
unit: "{packets}"
sum:
value_type: int
monotonic: true
Expand All @@ -586,7 +586,7 @@ metrics:
container.network.io.usage.tx_packets:
enabled: true
description: "Packets sent."
unit: "1"
unit: "{packets}"
sum:
value_type: int
monotonic: true
Expand Down
46 changes: 4 additions & 42 deletions receiver/dockerstatsreceiver/receiver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,12 @@ import (
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.opentelemetry.io/collector/component/componenttest"
"go.opentelemetry.io/collector/pdata/pmetric"
"go.opentelemetry.io/collector/receiver/scraperhelper"

"github.com/open-telemetry/opentelemetry-collector-contrib/internal/scrapertest"
Expand Down Expand Up @@ -77,7 +75,7 @@ func TestErrorsInStart(t *testing.T) {
assert.Contains(t, err.Error(), "context deadline exceeded")
}

func TestScrapes(t *testing.T) {
func TestScrapeV2(t *testing.T) {
containerIDs := []string{
"10b703fb312b25e8368ab5a3bce3a1610d1cee5d71a94920f1a7adbc5b0cb326",
"89d28931fd8b95c8806343a532e9e76bf0a0b76ee8f19452b8f75dee1ebcebb7",
Expand All @@ -103,39 +101,16 @@ func TestScrapes(t *testing.T) {

testCases := []struct {
desc string
scrape func(*receiver) (pmetric.Metrics, error)
expectedMetricsFile string
mockDockerEngine *httptest.Server
}{
{
desc: "scrapeV1_single_container",
scrape: func(rcv *receiver) (pmetric.Metrics, error) {
return rcv.scrape(context.Background())
},
desc: "scrapeV2_single_container",
expectedMetricsFile: filepath.Join(mockFolder, "single_container", "expected_metrics.json"),
mockDockerEngine: singleContainerEngineMock,
},
{
desc: "scrapeV2_single_container",
scrape: func(rcv *receiver) (pmetric.Metrics, error) {
return rcv.scrapeV2(context.Background())
},
expectedMetricsFile: filepath.Join(mockFolder, "single_container", "expected_metrics.json"),
mockDockerEngine: singleContainerEngineMock,
},
{
desc: "scrapeV1_two_containers",
scrape: func(rcv *receiver) (pmetric.Metrics, error) {
return rcv.scrape(context.Background())
},
expectedMetricsFile: filepath.Join(mockFolder, "two_containers", "expected_metrics.json"),
mockDockerEngine: twoContainerEngineMock,
},
{
desc: "scrapeV2_two_containers",
scrape: func(rcv *receiver) (pmetric.Metrics, error) {
return rcv.scrapeV2(context.Background())
},
desc: "scrapeV2_two_containers",
expectedMetricsFile: filepath.Join(mockFolder, "two_containers", "expected_metrics.json"),
mockDockerEngine: twoContainerEngineMock,
},
Expand All @@ -153,24 +128,11 @@ func TestScrapes(t *testing.T) {
err := receiver.start(context.Background(), componenttest.NewNopHost())
require.NoError(t, err)

actualMetrics, err := tc.scrape(receiver)
actualMetrics, err := receiver.scrapeV2(context.Background())
require.NoError(t, err)

expectedMetrics, err := golden.ReadMetrics(tc.expectedMetricsFile)

if !strings.HasPrefix(tc.desc, "scrapeV1") {
// Unset various fields for comparison purposes (non-mdatagen implementation doesn't have these set)
for i := 0; i < actualMetrics.ResourceMetrics().Len(); i++ {
for j := 0; j < actualMetrics.ResourceMetrics().At(i).ScopeMetrics().Len(); j++ {
sm := actualMetrics.ResourceMetrics().At(i).ScopeMetrics().At(j)
sm.Scope().SetName("")
sm.Scope().SetVersion("")
for k := 0; k < sm.Metrics().Len(); k++ {
sm.Metrics().At(k).SetDescription("")
}
}
}
}
assert.NoError(t, err)
assert.NoError(t, scrapertest.CompareMetrics(expectedMetrics, actualMetrics))
})
Expand Down
Loading

0 comments on commit 651712b

Please sign in to comment.