diff --git a/CHANGELOG.md b/CHANGELOG.md index 14e2029b..b11178cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## Unreleased +### Added + - Add `WithMetricsBuiltinsEnabled()` option and environment variable `LS_METRICS_BUILTINS_ENABLED`, which defaults to true. When metrics builtins are enabled, @@ -15,6 +17,9 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm and [host](https://github.com/open-telemetry/opentelemetry-go-contrib/tree/main/instrumentation/host) metrics instrumentation will be reported automatically. + [#265](https://github.com/lightstep/otel-launcher-go/pull/265) +- Proposed replacement for go-contrib instrumentation/runtime added as lightstep/instrumentation/runtime. + [#267](https://github.com/lightstep/otel-launcher-go/pull/267) ## [1.10.1](https://github.com/lightstep/otel-launcher-go/releases/tag/v1.10.1) - 2022-08-29 diff --git a/go.mod b/go.mod index 77837835..2ab30da7 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,7 @@ require ( go.opentelemetry.io/otel v1.9.0 go.opentelemetry.io/otel/metric v0.31.0 go.opentelemetry.io/otel/sdk v1.9.0 + go.opentelemetry.io/otel/sdk/metric v0.31.1-0.20220826135333-55b49c407e07 go.opentelemetry.io/otel/trace v1.9.0 ) @@ -39,7 +40,6 @@ require ( go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.31.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.9.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.9.0 // indirect - go.opentelemetry.io/otel/sdk/metric v0.31.1-0.20220826135333-55b49c407e07 // indirect go.opentelemetry.io/proto/otlp v0.18.0 // indirect go.uber.org/atomic v1.7.0 // indirect go.uber.org/multierr v1.8.0 // indirect diff --git a/lightstep/instrumentation/runtime/builtin.go b/lightstep/instrumentation/runtime/builtin.go new file mode 100644 index 00000000..5597073d --- /dev/null +++ b/lightstep/instrumentation/runtime/builtin.go @@ -0,0 +1,273 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package runtime // import "github.com/lightstep/otel-launcher-go/lightstep/instrumentation/runtime" + +import ( + "context" + "fmt" + "runtime/metrics" + "strings" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/metric/global" + "go.opentelemetry.io/otel/metric/instrument" + "go.opentelemetry.io/otel/metric/unit" +) + +// LibraryName is the value of instrumentation.Library.Name. +const LibraryName = "otel-launcher-go/runtime" + +// config contains optional settings for reporting runtime metrics. +type config struct { + // MeterProvider sets the metric.MeterProvider. If nil, the global + // Provider will be used. + MeterProvider metric.MeterProvider +} + +// Option supports configuring optional settings for runtime metrics. +type Option interface { + apply(*config) +} + +// WithMeterProvider sets the Metric implementation to use for +// reporting. If this option is not used, the global metric.MeterProvider +// will be used. `provider` must be non-nil. +func WithMeterProvider(provider metric.MeterProvider) Option { + return metricProviderOption{provider} +} + +type metricProviderOption struct{ metric.MeterProvider } + +func (o metricProviderOption) apply(c *config) { + if o.MeterProvider != nil { + c.MeterProvider = o.MeterProvider + } +} + +// newConfig computes a config from the supplied Options. +func newConfig(opts ...Option) config { + c := config{ + MeterProvider: global.MeterProvider(), + } + for _, opt := range opts { + opt.apply(&c) + } + return c +} + +// Start initializes reporting of runtime metrics using the supplied config. +func Start(opts ...Option) error { + c := newConfig(opts...) + if c.MeterProvider == nil { + c.MeterProvider = global.MeterProvider() + } + meter := c.MeterProvider.Meter( + LibraryName, + ) + + r := newBuiltinRuntime(meter, metrics.All, metrics.Read) + return r.register() +} + +type allFunc = func() []metrics.Description +type readFunc = func([]metrics.Sample) + +type builtinRuntime struct { + meter metric.Meter + allFunc allFunc + readFunc readFunc +} + +type int64Observer interface { + Observe(ctx context.Context, x int64, attrs ...attribute.KeyValue) +} + +type float64Observer interface { + Observe(ctx context.Context, x float64, attrs ...attribute.KeyValue) +} + +func newBuiltinRuntime(meter metric.Meter, af allFunc, rf readFunc) *builtinRuntime { + return &builtinRuntime{ + meter: meter, + allFunc: af, + readFunc: rf, + } +} + +func getAttributeName(n string) string { + x := strings.Split(n, ".") + // It's a plural, make it singular. + switch x[len(x)-1] { + case "cycles": + return "cycle" + case "classes": + return "class" + } + panic("unrecognized attribute name") +} + +func (r *builtinRuntime) register() error { + all := r.allFunc() + totals := map[string]bool{} + counts := map[string]int{} + toName := func(in string) (string, string) { + n, statedUnits, _ := strings.Cut(in, ":") + n = "process.runtime.go" + strings.ReplaceAll(n, "/", ".") + return n, statedUnits + } + + for _, m := range all { + name, _ := toName(m.Name) + + // Totals map includes the '.' suffix. + if strings.HasSuffix(name, ".total") { + totals[name[:len(name)-len("total")]] = true + } + + counts[name]++ + } + + var samples []metrics.Sample + var instruments []instrument.Asynchronous + var totalAttrs [][]attribute.KeyValue + + for _, m := range all { + n, statedUnits := toName(m.Name) + + if strings.HasSuffix(n, ".total") { + continue + } + + var u string + switch statedUnits { + case "bytes", "seconds": + // Real units + u = statedUnits + default: + // Pseudo-units + u = "{" + statedUnits + "}" + } + + // Remove any ".total" suffix, this is redundant for Prometheus. + var totalAttrVal string + for totalize := range totals { + if strings.HasPrefix(n, totalize) { + // Units is unchanged. + // Name becomes the overall prefix. + // Remember which attribute to use. + totalAttrVal = n[len(totalize):] + n = totalize[:len(totalize)-1] + break + } + } + + if counts[n] > 1 { + if totalAttrVal != "" { + // This has not happened, hopefully never will. + // Indicates the special case for objects/bytes + // overlaps with the special case for total. + panic("special case collision") + } + + // This is treated as a special case, we know this happens + // with "objects" and "bytes" in the standard Go 1.19 runtime. + switch statedUnits { + case "objects": + // In this case, use `.objects` suffix. + n = n + ".objects" + u = "{objects}" + case "bytes": + // In this case, use no suffix. In Prometheus this will + // be appended as a suffix. + default: + panic(fmt.Sprint( + "unrecognized duplicate metrics names, ", + "attention required: ", + n, + )) + } + } + + opts := []instrument.Option{ + instrument.WithUnit(unit.Unit(u)), + instrument.WithDescription(m.Description), + } + var inst instrument.Asynchronous + var err error + if m.Cumulative { + switch m.Kind { + case metrics.KindUint64: + inst, err = r.meter.AsyncInt64().Counter(n, opts...) + case metrics.KindFloat64: + inst, err = r.meter.AsyncFloat64().Counter(n, opts...) + case metrics.KindFloat64Histogram: + // Not implemented Histogram[float64]. + continue + } + } else { + switch m.Kind { + case metrics.KindUint64: + inst, err = r.meter.AsyncInt64().UpDownCounter(n, opts...) + case metrics.KindFloat64: + // Note: this has never been used. + inst, err = r.meter.AsyncFloat64().Gauge(n, opts...) + case metrics.KindFloat64Histogram: + // Not implemented GaugeHistogram[float64]. + continue + } + } + if err != nil { + return err + } + + samp := metrics.Sample{ + Name: m.Name, + } + samples = append(samples, samp) + instruments = append(instruments, inst) + if totalAttrVal == "" { + totalAttrs = append(totalAttrs, nil) + } else { + // Append a singleton list. + totalAttrs = append(totalAttrs, []attribute.KeyValue{ + attribute.String(getAttributeName(n), totalAttrVal), + }) + } + } + + if err := r.meter.RegisterCallback(instruments, func(ctx context.Context) { + r.readFunc(samples) + + for idx, samp := range samples { + + switch samp.Value.Kind() { + case metrics.KindUint64: + instruments[idx].(int64Observer).Observe(ctx, int64(samp.Value.Uint64()), totalAttrs[idx]...) + case metrics.KindFloat64: + instruments[idx].(float64Observer).Observe(ctx, samp.Value.Float64(), totalAttrs[idx]...) + default: + // KindFloat64Histogram (unsupported in OTel) and KindBad + // (unsupported by runtime/metrics). Neither should happen + // if runtime/metrics and the code above are working correctly. + otel.Handle(fmt.Errorf("invalid runtime/metrics value kind: %v", samp.Value.Kind())) + } + } + }); err != nil { + return err + } + return nil +} diff --git a/lightstep/instrumentation/runtime/builtin_118_test.go b/lightstep/instrumentation/runtime/builtin_118_test.go new file mode 100644 index 00000000..fb4acf7c --- /dev/null +++ b/lightstep/instrumentation/runtime/builtin_118_test.go @@ -0,0 +1,30 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build go1.18 && !go1.19 + +package runtime + +var expectRuntimeMetrics = map[string]int{ + "gc.cycles": 2, + "gc.heap.allocs": 1, + "gc.heap.allocs.objects": 1, + "gc.heap.frees": 1, + "gc.heap.frees.objects": 1, + "gc.heap.goal": 1, + "gc.heap.objects": 1, + "gc.heap.tiny.allocs": 1, + "memory.classes": 13, + "sched.goroutines": 1, +} diff --git a/lightstep/instrumentation/runtime/builtin_119_test.go b/lightstep/instrumentation/runtime/builtin_119_test.go new file mode 100644 index 00000000..860f8369 --- /dev/null +++ b/lightstep/instrumentation/runtime/builtin_119_test.go @@ -0,0 +1,34 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build go1.19 + +package runtime + +var expectRuntimeMetrics = map[string]int{ + "cgo-to-c-calls": 1, + "gc.cycles": 2, + "gc.heap.allocs": 1, + "gc.heap.allocs.objects": 1, + "gc.heap.frees": 1, + "gc.heap.frees.objects": 1, + "gc.heap.goal": 1, + "gc.heap.objects": 1, + "gc.heap.tiny.allocs": 1, + "gc.limiter.last-enabled": 1, + "gc.stack.starting-size": 1, + "memory.classes": 13, + "sched.gomaxprocs": 1, + "sched.goroutines": 1, +} diff --git a/lightstep/instrumentation/runtime/builtin_test.go b/lightstep/instrumentation/runtime/builtin_test.go new file mode 100644 index 00000000..e9fa12a5 --- /dev/null +++ b/lightstep/instrumentation/runtime/builtin_test.go @@ -0,0 +1,216 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package runtime + +import ( + "context" + "runtime/metrics" + "strings" + "testing" + + "github.com/stretchr/testify/require" + + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/sdk/metric/metrictest" +) + +// prefix is mandatory for this library, however the "go." part is not. +const expectPrefix = "process.runtime.go." + +var expectLib = metrictest.Scope{ + InstrumentationName: "otel-launcher-go/runtime", + InstrumentationVersion: "", + SchemaURL: "", +} + +// TestBuiltinRuntimeMetrics tests the real output of the library to +// ensure expected prefix, instrumentation scope, and empty +// attributes. +func TestBuiltinRuntimeMetrics(t *testing.T) { + provider, exp := metrictest.NewTestMeterProvider() + + err := Start(WithMeterProvider(provider)) + + require.NoError(t, err) + + require.NoError(t, exp.Collect(context.Background())) + + // Counts are >1 for metrics that are totalized. + expect := expectRuntimeMetrics + allNames := map[string]int{} + + // Note: metrictest library lacks a way to distinguish + // monotonic vs not or to test the unit. This will be fixed in + // the new SDK, all the pieces untested here. + for _, rec := range exp.Records { + require.True(t, strings.HasPrefix(rec.InstrumentName, expectPrefix), "%s", rec.InstrumentName) + name := rec.InstrumentName[len(expectPrefix):] + + require.Equal(t, expectLib, rec.InstrumentationLibrary) + + if expect[name] > 1 { + require.Equal(t, 1, len(rec.Attributes)) + } else { + require.Equal(t, 1, expect[name]) + require.Equal(t, []attribute.KeyValue(nil), rec.Attributes) + } + allNames[name]++ + } + + require.Equal(t, expect, allNames) +} + +func makeTestCase() (allFunc, readFunc, map[string]map[string]metrics.Value) { + // Note: the library provides no way to generate values, so use the + // builtin library to get some. Since we can't generate a Float64 value + // we can't even test the Gauge logic in this package. + ints := map[metrics.Value]bool{} + + real := metrics.All() + realSamples := make([]metrics.Sample, len(real)) + for i := range real { + realSamples[i].Name = real[i].Name + } + metrics.Read(realSamples) + for i, rs := range realSamples { + switch real[i].Kind { + case metrics.KindUint64: + ints[rs.Value] = true + default: + // Histograms and Floats are not tested. + // The 1.19 runtime generates no Floats and + // exports no test constructors. + } + } + + var allInts []metrics.Value + + for iv := range ints { + allInts = append(allInts, iv) + } + + af := func() []metrics.Description { + return []metrics.Description{ + { + Name: "/cntr/things:things", + Description: "a counter of things", + Kind: metrics.KindUint64, + Cumulative: true, + }, + { + Name: "/updowncntr/things:things", + Description: "an updowncounter of things", + Kind: metrics.KindUint64, + Cumulative: false, + }, + { + Name: "/process/count:objects", + Description: "a process counter of objects", + Kind: metrics.KindUint64, + Cumulative: true, + }, + { + Name: "/process/count:bytes", + Description: "a process counter of bytes", + Kind: metrics.KindUint64, + Cumulative: true, + }, + } + } + mapping := map[string]metrics.Value{ + "/cntr/things:things": allInts[0], + "/updowncntr/things:things": allInts[1], + "/process/count:objects": allInts[2], + "/process/count:bytes": allInts[3], + "/waste/cycles/ocean:cycles": allInts[4], + "/waste/cycles/sea:cycles": allInts[5], + "/waste/cycles/lake:cycles": allInts[6], + "/waste/cycles/pond:cycles": allInts[7], + "/waste/cycles/puddle:cycles": allInts[8], + "/waste/cycles/total:cycles": allInts[9], + } + rf := func(samples []metrics.Sample) { + for i := range samples { + v, ok := mapping[samples[i].Name] + if ok { + samples[i].Value = v + } else { + panic("outcome uncertain") + } + } + } + return af, rf, map[string]map[string]metrics.Value{ + "cntr.things": {"": allInts[0]}, + "updowncntr.things": {"": allInts[1]}, + "process.count.objects": {"": allInts[2]}, + "process.count": {"": allInts[3]}, + + // This uses "cycles", one of the two known + // multi-variate metrics as of go-1.19. + "waste.cycles": { + "ocean": allInts[4], + "sea": allInts[5], + "lake": allInts[6], + "pond": allInts[7], + "puddle": allInts[8], + }, + } +} + +// TestMetricTranslation validates the translation logic using +// synthetic metric names and values. +func TestMetricTranslation(t *testing.T) { + provider, exp := metrictest.NewTestMeterProvider() + + af, rf, mapping := makeTestCase() + br := newBuiltinRuntime(provider.Meter("test"), af, rf) + br.register() + + expectRecords := 0 + for _, values := range mapping { + expectRecords += len(values) + if len(values) > 1 { + // Counts the total + expectRecords++ + } + } + + require.NoError(t, exp.Collect(context.Background())) + require.Equal(t, 10, expectRecords) + + for _, rec := range exp.Records { + // Test the special cases are present always: + + require.True(t, strings.HasPrefix(rec.InstrumentName, expectPrefix), "%s", rec.InstrumentName) + name := rec.InstrumentName[len(expectPrefix):] + + // Note: only int64 is tested, we have no way to + // generate Float64 values and Float64Hist values are + // not implemented for testing. + m := mapping[name] + if len(m) == 1 { + require.Equal(t, mapping[name][""].Uint64(), uint64(rec.Sum.AsInt64())) + + // no attributes + require.Equal(t, []attribute.KeyValue(nil), rec.Attributes) + } else { + require.Equal(t, 5, len(m)) + require.Equal(t, 1, len(rec.Attributes)) + require.Equal(t, rec.Attributes[0].Key, "class") + feature := rec.Attributes[0].Value.AsString() + require.Equal(t, mapping[name][feature].Uint64(), uint64(rec.Sum.AsInt64())) + } + } +} diff --git a/lightstep/instrumentation/runtime/doc.go b/lightstep/instrumentation/runtime/doc.go new file mode 100644 index 00000000..c2771873 --- /dev/null +++ b/lightstep/instrumentation/runtime/doc.go @@ -0,0 +1,63 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// package runtime geneartes metrics run the Golang runtime/metrics package. +// +// There are two special policies that are used to translate these +// metrics into the OpenTelemetry model. +// +// 1. The runtime/metrics name is split into its name and unit part; +// when there are two metrics with the same name and different +// units, the only known case is where "objects" and "bytes" are +// present. In this case, the outputs are a unitless metric (with +// suffix, e.g., ending `gc.heap.allocs.objects`) and a unitful +// metric with no suffix (e.g., ending `gc.heap.allocs` having +// bytes units). +// 2. When there are >= 2 metrics with the same prefix and one +// matching `prefix.total`, the total is skipped and the other +// members are assembled into a single Counter or UpDownCounter +// metric with multiple attribute values. The supported cases +// are for `class` and `cycle` attributes. +// +// The following metrics are generated in go-1.19. +// +// Name Unit Instrument +// ------------------------------------------------------------------------------------ +// process.runtime.go.cgo.go-to-c-calls {calls} Counter[int64] +// process.runtime.go.gc.cycles{cycle=forced,automatic} {gc-cycles} Counter[int64] +// process.runtime.go.gc.heap.allocs bytes (*) Counter[int64] +// process.runtime.go.gc.heap.allocs.objects {objects} (*) Counter[int64] +// process.runtime.go.gc.heap.allocs-by-size bytes Histogram[float64] (**) +// process.runtime.go.gc.heap.frees bytes (*) Counter[int64] +// process.runtime.go.gc.heap.frees.objects {objects} (*) Counter[int64] +// process.runtime.go.gc.heap.frees-by-size bytes Histogram[float64] (**) +// process.runtime.go.gc.heap.goal bytes UpDownCounter[int64] +// process.runtime.go.gc.heap.objects {objects} UpDownCounter[int64] +// process.runtime.go.gc.heap.tiny.allocs {objects} Counter[int64] +// process.runtime.go.gc.limiter.last-enabled {gc-cycle} UpDownCounter[int64] +// process.runtime.go.gc.pauses seconds Histogram[float64] (**) +// process.runtime.go.gc.stack.starting-size bytes UpDownCounter[int64] +// process.runtime.go.memory.classes{class=...} bytes UpDownCounter[int64] +// process.runtime.go.sched.gomaxprocs {threads} UpDownCounter[int64] +// process.runtime.go.sched.goroutines {goroutines} UpDownCounter[int64] +// process.runtime.go.sched.latencies seconds GaugeHistogram[float64] (**) +// +// (*) Empty unit strings are cases where runtime/metric produces +// duplicate names ignoring the unit string (see policy #1). +// (**) Histograms are not currently implemented, see the related +// issues for an explanation: +// https://github.com/open-telemetry/opentelemetry-specification/issues/2713 +// https://github.com/open-telemetry/opentelemetry-specification/issues/2714 + +package runtime // import "github.com/lightstep/otel-launcher-go/lightstep/instrumentation/runtime"