Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add agent injector telemetry #703

Merged
merged 5 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions agent-inject/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"io"
"net/http"
"strings"
"time"

"github.com/hashicorp/go-hclog"
"github.com/hashicorp/vault-k8s/agent-inject/agent"
Expand All @@ -25,6 +26,8 @@ import (
"k8s.io/client-go/kubernetes"
)

const warningInitOnlyInjection = "workload uses initContainer only; consider migrating to VSO"

var (
admissionScheme = runtime.NewScheme()
deserializer = func() runtime.Decoder {
Expand Down Expand Up @@ -85,6 +88,14 @@ type Handler struct {
func (h *Handler) Handle(w http.ResponseWriter, r *http.Request) {
h.Log.Info("Request received", "Method", r.Method, "URL", r.URL)

// Measure request processing duration and monitor request queue
requestQueue.Inc()
requestStart := time.Now()
defer func() {
requestProcessingTime.Observe(float64(time.Since(requestStart).Milliseconds()))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if would be worth tracking the failures as well. We do something like this in VSO here: https://github.com/hashicorp/vault-secrets-operator/blob/6ab056c22acb5dc4812620233e3b141fb9e02f9c/vault/client.go#L810

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense, and it would unblock @tvoran's feedback here.

@benashz Given that it goes beyond the scope we were asked for, is it OK if I push that to a follow-on PR?

requestQueue.Dec()
}()

if ct := r.Header.Get("Content-Type"); ct != "application/json" {
msg := fmt.Sprintf("Invalid content-type: %q", ct)
http.Error(w, msg, http.StatusBadRequest)
Expand Down Expand Up @@ -142,11 +153,20 @@ func (h *Handler) Handle(w http.ResponseWriter, r *http.Request) {
msg := fmt.Sprintf("error marshalling admission response: %s", err)
http.Error(w, msg, http.StatusInternalServerError)
h.Log.Error("error on request", "Error", msg, "Code", http.StatusInternalServerError)
incrementInjectionFailures(admReq.Request.Namespace)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may also want to call incrementInjectionFailures() for the error returns further up in this function too? But this should suffice as-is; I could even see those being a different error metric added in the future.

return
}

if _, err := w.Write(resp); err != nil {
h.Log.Error("error writing response", "Error", err)
incrementInjectionFailures(admReq.Request.Namespace)
return
}

if admResp.Response.Allowed {
incrementInjections(admReq.Request.Namespace, *admResp.Response)
} else {
incrementInjectionFailures(admReq.Request.Namespace)
}
}

Expand Down Expand Up @@ -245,6 +265,13 @@ func (h *Handler) Mutate(req *admissionv1.AdmissionRequest) *admissionv1.Admissi
return admissionError(req.UID, err)
}

// Expose when workloads are injecting initContainers only. This is useful to identify
// workloads that are likely good candidates for migration to Vault Secrets Operator for
// greater scalability and performance.
if agentSidecar.PrePopulateOnly {
resp.Warnings = append(resp.Warnings, warningInitOnlyInjection)
}

resp.Patch = patch
patchType := admissionv1.PatchTypeJSONPatch
resp.PatchType = &patchType
Expand Down
73 changes: 73 additions & 0 deletions agent-inject/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package agent_inject

import (
"slices"

"github.com/prometheus/client_golang/prometheus"
admissionv1 "k8s.io/api/admission/v1"
)

const (
metricsNamespace = "vault"
metricsSubsystem = "agent_injector"
metricsLabelNamespace = "namespace"
metricsLabelType = "injection_type"
metricsLabelTypeDefault = "sidecar"
metricsLabelTypeInitOnly = "init_only"
)

var (
requestQueue = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: metricsNamespace,
Subsystem: metricsSubsystem,
Name: "request_queue_total",
Help: "Total count of webhook requests in the queue",
})

requestProcessingTime = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: metricsNamespace,
Subsystem: metricsSubsystem,
Name: "request_processing_duration_ms",
Help: "Histogram of webhook request processing times in milliseconds",
Buckets: []float64{5, 10, 25, 50, 75, 100, 250, 500, 1000, 2500, 5000, 7500, 10000},
})

injectionsByNamespace = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: metricsSubsystem,
Name: "injections_by_namespace_total",
Help: "Total count of Agent Sidecar injections by namespace",
}, []string{metricsLabelNamespace, metricsLabelType})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose that the number of namespaces should be relatively low, so we should be okay with this high cardinality metric.


failedInjectionsByNamespace = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: metricsSubsystem,
Name: "failed_injections_by_namespace_total",
Help: "Total count of failed Agent Sidecar injections by namespace",
}, []string{metricsLabelNamespace})
)

func incrementInjections(namespace string, res admissionv1.AdmissionResponse) {
typeLabel := metricsLabelTypeDefault
if slices.Contains(res.Warnings, warningInitOnlyInjection) {
typeLabel = metricsLabelTypeInitOnly
}

injectionsByNamespace.With(prometheus.Labels{
metricsLabelNamespace: namespace,
metricsLabelType: typeLabel,
}).Inc()
}

func incrementInjectionFailures(namespace string) {
failedInjectionsByNamespace.With(prometheus.Labels{metricsLabelNamespace: namespace}).Inc()
}

func MustRegisterInjectorMetrics(registry prometheus.Registerer) {
registry.MustRegister(
requestQueue,
requestProcessingTime,
injectionsByNamespace,
failedInjectionsByNamespace,
)
}
2 changes: 2 additions & 0 deletions subcommand/injector/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/hashicorp/vault-k8s/leader"
"github.com/hashicorp/vault-k8s/version"
"github.com/mitchellh/cli"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
adminv1 "k8s.io/api/admissionregistration/v1"
adminv1beta "k8s.io/api/admissionregistration/v1beta1"
Expand Down Expand Up @@ -231,6 +232,7 @@ func (c *Command) Run(args []string) int {

// Registering path to expose metrics
if c.flagTelemetryPath != "" {
agentInject.MustRegisterInjectorMetrics(prometheus.DefaultRegisterer)
c.UI.Info(fmt.Sprintf("Registering telemetry path on %q", c.flagTelemetryPath))
mux.Handle(c.flagTelemetryPath, promhttp.Handler())
}
Expand Down