Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(analysis): Add Measurements Retention Limit Option for Metrics #1729

Merged
merged 1 commit into from
Jan 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 20 additions & 5 deletions analysis/analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,16 @@ func (c *Controller) reconcileAnalysisRun(origRun *v1alpha1.AnalysisRun) *v1alph
return run
}

measurementRetentionMetricsMap, err := analysisutil.GetMeasurementRetentionMetrics(run.Spec.MeasurementRetention, resolvedMetrics)
if err != nil {
message := fmt.Sprintf("Analysis spec invalid: %v", err)
logger.Warn(message)
run.Status.Phase = v1alpha1.AnalysisPhaseError
run.Status.Message = message
c.recordAnalysisRunCompletionEvent(run)
return run
}

tasks := generateMetricTasks(run, resolvedMetrics)
logger.Infof("Taking %d Measurement(s)...", len(tasks))
err = c.runMeasurements(run, tasks, dryRunMetricsMap)
Expand All @@ -101,7 +111,7 @@ func (c *Controller) reconcileAnalysisRun(origRun *v1alpha1.AnalysisRun) *v1alph
}
}

err = c.garbageCollectMeasurements(run, DefaultMeasurementHistoryLimit)
err = c.garbageCollectMeasurements(run, measurementRetentionMetricsMap, DefaultMeasurementHistoryLimit)
if err != nil {
// TODO(jessesuen): surface errors to controller so they can be retried
logger.Warnf("Failed to garbage collect measurements: %v", err)
Expand Down Expand Up @@ -693,7 +703,7 @@ func calculateNextReconcileTime(run *v1alpha1.AnalysisRun, metrics []v1alpha1.Me
}

// garbageCollectMeasurements trims the measurement history to the specified limit and GCs old measurements
func (c *Controller) garbageCollectMeasurements(run *v1alpha1.AnalysisRun, limit int) error {
func (c *Controller) garbageCollectMeasurements(run *v1alpha1.AnalysisRun, measurementRetentionMetricNamesMap map[string]*v1alpha1.MeasurementRetention, limit int) error {
var errors []error

metricsByName := make(map[string]v1alpha1.Metric)
Expand All @@ -703,7 +713,12 @@ func (c *Controller) garbageCollectMeasurements(run *v1alpha1.AnalysisRun, limit

for i, result := range run.Status.MetricResults {
length := len(result.Measurements)
if length > limit {
measurementRetentionObject := measurementRetentionMetricNamesMap[result.Name]
measurementsLimit := limit
if measurementRetentionObject != nil && measurementRetentionObject.Limit > 0 {
measurementsLimit = measurementRetentionObject.Limit
}
if length > measurementsLimit {
metric, ok := metricsByName[result.Name]
if !ok {
continue
Expand All @@ -714,11 +729,11 @@ func (c *Controller) garbageCollectMeasurements(run *v1alpha1.AnalysisRun, limit
errors = append(errors, err)
continue
}
err = provider.GarbageCollect(run, metric, limit)
err = provider.GarbageCollect(run, metric, measurementsLimit)
if err != nil {
return err
}
result.Measurements = result.Measurements[length-limit : length]
result.Measurements = result.Measurements[length-measurementsLimit : length]
}
run.Status.MetricResults[i] = result
}
Expand Down
71 changes: 69 additions & 2 deletions analysis/analysis_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1055,7 +1055,8 @@ func TestTrimMeasurementHistory(t *testing.T) {

{
run := newRun()
c.garbageCollectMeasurements(run, 2)
err := c.garbageCollectMeasurements(run, map[string]*v1alpha1.MeasurementRetention{}, 2)
assert.Nil(t, err)
assert.Len(t, run.Status.MetricResults[0].Measurements, 1)
assert.Equal(t, "1", run.Status.MetricResults[0].Measurements[0].Value)
assert.Len(t, run.Status.MetricResults[1].Measurements, 2)
Expand All @@ -1064,12 +1065,37 @@ func TestTrimMeasurementHistory(t *testing.T) {
}
{
run := newRun()
c.garbageCollectMeasurements(run, 1)
err := c.garbageCollectMeasurements(run, map[string]*v1alpha1.MeasurementRetention{}, 1)
assert.Nil(t, err)
assert.Len(t, run.Status.MetricResults[0].Measurements, 1)
assert.Equal(t, "1", run.Status.MetricResults[0].Measurements[0].Value)
assert.Len(t, run.Status.MetricResults[1].Measurements, 1)
assert.Equal(t, "3", run.Status.MetricResults[1].Measurements[0].Value)
}
{
run := newRun()
var measurementRetentionMetricsMap = map[string]*v1alpha1.MeasurementRetention{}
measurementRetentionMetricsMap["metric2"] = &v1alpha1.MeasurementRetention{MetricName: "*", Limit: 2}
err := c.garbageCollectMeasurements(run, measurementRetentionMetricsMap, 1)
assert.Nil(t, err)
assert.Len(t, run.Status.MetricResults[0].Measurements, 1)
assert.Equal(t, "1", run.Status.MetricResults[0].Measurements[0].Value)
assert.Len(t, run.Status.MetricResults[1].Measurements, 2)
assert.Equal(t, "2", run.Status.MetricResults[1].Measurements[0].Value)
assert.Equal(t, "3", run.Status.MetricResults[1].Measurements[1].Value)
}
{
run := newRun()
var measurementRetentionMetricsMap = map[string]*v1alpha1.MeasurementRetention{}
measurementRetentionMetricsMap["metric2"] = &v1alpha1.MeasurementRetention{MetricName: "metric2", Limit: 2}
err := c.garbageCollectMeasurements(run, measurementRetentionMetricsMap, 1)
assert.Nil(t, err)
assert.Len(t, run.Status.MetricResults[0].Measurements, 1)
assert.Equal(t, "1", run.Status.MetricResults[0].Measurements[0].Value)
assert.Len(t, run.Status.MetricResults[1].Measurements, 2)
assert.Equal(t, "2", run.Status.MetricResults[1].Measurements[0].Value)
assert.Equal(t, "3", run.Status.MetricResults[1].Measurements[1].Value)
}
}

func TestResolveMetricArgsUnableToSubstitute(t *testing.T) {
Expand Down Expand Up @@ -1675,3 +1701,44 @@ func TestInvalidDryRunConfigThrowsError(t *testing.T) {
assert.Equal(t, v1alpha1.AnalysisPhaseError, newRun.Status.Phase)
assert.Equal(t, "Analysis spec invalid: dryRun[0]: Rule didn't match any metric name(s)", newRun.Status.Message)
}

func TestInvalidMeasurementsRetentionConfigThrowsError(t *testing.T) {
f := newFixture(t)
defer f.Close()
c, _, _ := f.newController(noResyncPeriodFunc)

// Mocks terminate to cancel the in-progress measurement
f.provider.On("Terminate", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(newMeasurement(v1alpha1.AnalysisPhaseSuccessful), nil)

var measurementsRetentionArray []v1alpha1.MeasurementRetention
measurementsRetentionArray = append(measurementsRetentionArray, v1alpha1.MeasurementRetention{MetricName: "error-rate"})
now := metav1.Now()
run := &v1alpha1.AnalysisRun{
Spec: v1alpha1.AnalysisRunSpec{
Terminate: true,
Args: []v1alpha1.Argument{
{
Name: "service",
Value: pointer.StringPtr("rollouts-demo-canary.default.svc.cluster.local"),
},
},
Metrics: []v1alpha1.Metric{{
Name: "success-rate",
InitialDelay: "20s",
Interval: "20s",
SuccessCondition: "result[0] > 0.90",
Provider: v1alpha1.MetricProvider{
Web: &v1alpha1.WebMetric{},
},
}},
MeasurementRetention: measurementsRetentionArray,
},
Status: v1alpha1.AnalysisRunStatus{
StartedAt: &now,
Phase: v1alpha1.AnalysisPhaseRunning,
},
}
newRun := c.reconcileAnalysisRun(run)
assert.Equal(t, v1alpha1.AnalysisPhaseError, newRun.Status.Phase)
assert.Equal(t, "Analysis spec invalid: measurementRetention[0]: Rule didn't match any metric name(s)", newRun.Status.Message)
}
73 changes: 73 additions & 0 deletions docs/features/analysis.md
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,79 @@ spec:
- metricName: test.*
```

## Measurements Retention

!!! important
Available since v1.2

`measurementRetention` can be used to retain other than the latest ten results for the metrics running in any mode
(dry/non-dry). Setting this option to `0` would disable it and, the controller will revert to the existing behavior of
retaining the latest ten measurements.

The following example queries Prometheus every 5 minutes to get the total number of 4XX and 5XX errors and retains the
latest twenty measurements for the 5XX metric run results instead of the default ten.

```yaml hl_lines="1 2 3"
measurementRetention:
- metricName: total-5xx-errors
limit: 20
metrics:
- name: total-5xx-errors
interval: 5m
failureCondition: result[0] >= 10
failureLimit: 3
provider:
prometheus:
address: http://prometheus.example.com:9090
query: |
sum(irate(
istio_requests_total{reporter="source",destination_service=~"{{args.service-name}}",response_code~"5.*"}[5m]
))
- name: total-4xx-errors
interval: 5m
failureCondition: result[0] >= 10
failureLimit: 3
provider:
prometheus:
address: http://prometheus.example.com:9090
query: |
sum(irate(
istio_requests_total{reporter="source",destination_service=~"{{args.service-name}}",response_code~"4.*"}[5m]
))
```

RegEx matches are also supported. `.*` can be used to apply the same retention rule to all the metrics. In the following
example, the controller will retain the latest twenty run results for all the metrics instead of the default ten results.

```yaml hl_lines="1 2 3"
measurementRetention:
- metricName: .*
limit: 20
metrics:
- name: total-5xx-errors
interval: 5m
failureCondition: result[0] >= 10
failureLimit: 3
provider:
prometheus:
address: http://prometheus.example.com:9090
query: |
sum(irate(
istio_requests_total{reporter="source",destination_service=~"{{args.service-name}}",response_code~"5.*"}[5m]
))
- name: total-4xx-errors
interval: 5m
failureCondition: result[0] >= 10
failureLimit: 3
provider:
prometheus:
address: http://prometheus.example.com:9090
query: |
sum(irate(
istio_requests_total{reporter="source",destination_service=~"{{args.service-name}}",response_code~"4.*"}[5m]
))
```

## Inconclusive Runs

Analysis runs can also be considered `Inconclusive`, which indicates the run was neither successful,
Expand Down
12 changes: 12 additions & 0 deletions manifests/crds/analysis-run-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,18 @@ spec:
- metricName
type: object
type: array
measurementRetention:
items:
properties:
limit:
type: integer
metricName:
type: string
required:
- limit
- metricName
type: object
type: array
metrics:
items:
properties:
Expand Down
12 changes: 12 additions & 0 deletions manifests/crds/analysis-template-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,18 @@ spec:
- metricName
type: object
type: array
measurementRetention:
items:
properties:
limit:
type: integer
metricName:
type: string
required:
- limit
- metricName
type: object
type: array
metrics:
items:
properties:
Expand Down
12 changes: 12 additions & 0 deletions manifests/crds/cluster-analysis-template-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,18 @@ spec:
- metricName
type: object
type: array
measurementRetention:
items:
properties:
limit:
type: integer
metricName:
type: string
required:
- limit
- metricName
type: object
type: array
metrics:
items:
properties:
Expand Down
36 changes: 36 additions & 0 deletions manifests/install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,18 @@ spec:
- metricName
type: object
type: array
measurementRetention:
items:
properties:
limit:
type: integer
metricName:
type: string
required:
- limit
- metricName
type: object
type: array
metrics:
items:
properties:
Expand Down Expand Up @@ -2815,6 +2827,18 @@ spec:
- metricName
type: object
type: array
measurementRetention:
items:
properties:
limit:
type: integer
metricName:
type: string
required:
- limit
- metricName
type: object
type: array
metrics:
items:
properties:
Expand Down Expand Up @@ -5442,6 +5466,18 @@ spec:
- metricName
type: object
type: array
measurementRetention:
items:
properties:
limit:
type: integer
metricName:
type: string
required:
- limit
- metricName
type: object
type: array
metrics:
items:
properties:
Expand Down
Loading