From 3af7ea1b6164e5d636d0a80cb4ac83c5171f4c05 Mon Sep 17 00:00:00 2001 From: Aditya Kolhar Date: Thu, 30 Sep 2021 12:17:10 +0530 Subject: [PATCH] Changes to add 2 metrics to external-dns. 1. external_dns_source_errors_total - Indicates if there are any source/registry errors at this point in time.Value can be either 0 or 1 ,1 indicating presence of an error - Gauge Metric 2. external_dns_controller_verified_records - No of DNS A-records that exists both in source and registry - Gauge Metric --- controller/controller.go | 50 ++++++++++++++++++++++++++++++++++- controller/controller_test.go | 29 ++++++++++++++++++++ docs/faq.md | 4 +++ 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/controller/controller.go b/controller/controller.go index 7d083cce15..3777372025 100644 --- a/controller/controller.go +++ b/controller/controller.go @@ -94,6 +94,22 @@ var ( Help: "Number of Source errors.", }, ) + errorIndicator = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: "external_dns", + Subsystem: "controller", + Name: "current_errors", + Help: "Indicates if there are any source/registry errors at this point in time.", + }, + ) + verifiedRecords = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: "external_dns", + Subsystem: "controller", + Name: "verified_records", + Help: "No of DNS A-records that exists both in source and registry.", + }, + ) ) func init() { @@ -105,6 +121,8 @@ func init() { prometheus.MustRegister(deprecatedRegistryErrors) prometheus.MustRegister(deprecatedSourceErrors) prometheus.MustRegister(controllerNoChangesTotal) + prometheus.MustRegister(errorIndicator) + prometheus.MustRegister(verifiedRecords) } // Controller is responsible for orchestrating the different components. @@ -134,8 +152,10 @@ type Controller struct { // RunOnce runs a single iteration of a reconciliation loop. func (c *Controller) RunOnce(ctx context.Context) error { + errorIndicator.Set(0) records, err := c.Registry.Records(ctx) if err != nil { + errorIndicator.Set(1) registryErrorsTotal.Inc() deprecatedRegistryErrors.Inc() return err @@ -146,12 +166,14 @@ func (c *Controller) RunOnce(ctx context.Context) error { endpoints, err := c.Source.Endpoints(ctx) if err != nil { + errorIndicator.Set(1) sourceErrorsTotal.Inc() deprecatedSourceErrors.Inc() return err } sourceEndpointsTotal.Set(float64(len(endpoints))) - + vRecords := verifyARecords(endpoints, records) + verifiedRecords.Set(float64(len(vRecords))) endpoints = c.Registry.AdjustEndpoints(endpoints) plan := &plan.Plan{ @@ -181,6 +203,32 @@ func (c *Controller) RunOnce(ctx context.Context) error { return nil } +func verifyARecords(endpoints []*endpoint.Endpoint, registryRecords []*endpoint.Endpoint) []string { + aRecords := filterARecords(endpoints) + recordsMap := make(map[string]struct{}) + for _, regRecord := range registryRecords{ + recordsMap[regRecord.DNSName] = struct{}{} + } + var cm []string + for _, sourceRecord := range aRecords{ + if _, found := recordsMap[sourceRecord]; found { + cm = append(cm, sourceRecord) + } + } + return cm +} + +func filterARecords(endpoints []*endpoint.Endpoint) []string { + var aRecords []string + for _, endPoint := range endpoints{ + if endPoint.RecordType == endpoint.RecordTypeA { + aRecords = append(aRecords, endPoint.DNSName) + } + } + return aRecords + +} + // ScheduleRunOnce makes sure execution happens at most once per interval. func (c *Controller) ScheduleRunOnce(now time.Time) { c.nextRunAtMux.Lock() diff --git a/controller/controller_test.go b/controller/controller_test.go index dff2a6f62b..6f6846c23c 100644 --- a/controller/controller_test.go +++ b/controller/controller_test.go @@ -19,6 +19,8 @@ package controller import ( "context" "errors" + "github.com/prometheus/client_golang/prometheus" + "math" "reflect" "testing" "time" @@ -49,6 +51,10 @@ type filteredMockProvider struct { ApplyChangesCalls []*plan.Changes } +type errorMockProvider struct { + mockProvider +} + func (p *filteredMockProvider) GetDomainFilter() endpoint.DomainFilterInterface { return p.domainFilter } @@ -70,6 +76,10 @@ func (p *mockProvider) Records(ctx context.Context) ([]*endpoint.Endpoint, error return p.RecordsStore, nil } +func (p *errorMockProvider) Records(ctx context.Context) ([]*endpoint.Endpoint, error) { + return nil, errors.New("error for testing") +} + // ApplyChanges validates that the passed in changes satisfy the assumptions. func (p *mockProvider) ApplyChanges(ctx context.Context, changes *plan.Changes) error { if len(changes.Create) != len(p.ExpectChanges.Create) { @@ -180,7 +190,26 @@ func TestRunOnce(t *testing.T) { // Validate that the mock source was called. source.AssertExpectations(t) + // check the verified records + assert.Equal(t, math.Float64bits(1), valueFromMetric(verifiedRecords)) } +// Test the errors currently in the system +func TestCurrentErrors(t *testing.T) { + provider := &errorMockProvider{} + r, _ := registry.NewNoopRegistry(provider) + ctrl := &Controller{Interval: 10 * time.Minute, MinEventSyncInterval: 5 * time.Second, Registry: r} + assert.Error(t, ctrl.RunOnce(context.Background()), "error for testing") + ref := reflect.ValueOf(errorIndicator) + actualValue := (reflect.Indirect(ref).FieldByName("valBits").Uint()) + intendedValue := math.Float64bits(1) + assert.Equal(t, actualValue, intendedValue) +} + +func valueFromMetric(metric prometheus.Gauge) uint64 { + ref := reflect.ValueOf(metric) + return reflect.Indirect(ref).FieldByName("valBits").Uint() +} + func TestShouldRunOnce(t *testing.T) { ctrl := &Controller{Interval: 10 * time.Minute, MinEventSyncInterval: 5 * time.Second} diff --git a/docs/faq.md b/docs/faq.md index 67662a782b..e1e9ca32f7 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -185,6 +185,10 @@ Here is the full list of available metrics provided by ExternalDNS: | external_dns_registry_errors_total | Number of Registry errors | Counter | | external_dns_source_endpoints_total | Number of Endpoints in the registry | Gauge | | external_dns_source_errors_total | Number of Source errors | Counter | +| external_dns_controller_current_errors | Indicates if there are any source/registry errors at | Gauge | +| | this point in time.Value can be either 0 or 1 , | | +| | 1 indicating presence of an error | | +| external_dns_controller_verified_records | No of DNS A-records that exists both in source®istry | Gauge | ### How can I run ExternalDNS under a specific GCP Service Account, e.g. to access DNS records in other projects?