-
Notifications
You must be signed in to change notification settings - Fork 83
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adaptive Load Controller protos #398
Changes from all commits
8ea442d
5ac755a
b8c25a5
1c19c68
7050686
0776563
16fd8f6
c383010
6e1a483
4ef1140
4111bf4
871a959
1fd77c1
edc36b2
4d0364e
aed6d94
d9ae87d
a05a6f5
8cd4d21
d814a96
5f5a885
7e20a78
9048267
306c0ec
d33f543
442cca9
677b783
cefb366
f3684df
b71f060
eb495b8
3659817
bbc8d32
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
load("@envoy_api//bazel:api_build_system.bzl", "api_cc_py_proto_library") | ||
|
||
licenses(["notice"]) # Apache 2 | ||
|
||
api_cc_py_proto_library( | ||
name = "adaptive_load_proto", | ||
srcs = [ | ||
"adaptive_load.proto", | ||
"benchmark_result.proto", | ||
"input_variable_setter_impl.proto", | ||
"metric_spec.proto", | ||
"metrics_plugin_impl.proto", | ||
"scoring_function_impl.proto", | ||
"step_controller_impl.proto", | ||
], | ||
visibility = ["//visibility:public"], | ||
deps = [ | ||
"@envoy_api//envoy/config/core/v3:pkg", | ||
"@nighthawk//api/client:base", | ||
], | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
// Top-level session spec and output protos for the adaptive load controller. | ||
|
||
syntax = "proto3"; | ||
eric846 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
package nighthawk.adaptive_load; | ||
|
||
import "api/adaptive_load/benchmark_result.proto"; | ||
import "api/adaptive_load/metric_spec.proto"; | ||
import "api/client/options.proto"; | ||
import "envoy/config/core/v3/extension.proto"; | ||
import "google/protobuf/duration.proto"; | ||
import "google/rpc/status.proto"; | ||
import "validate/validate.proto"; | ||
|
||
// Parameters describing the adjusting and testing stages of an adaptive load | ||
// session, which consists of a series of Nighthawk benchmarks probing for | ||
// the optimal load on the system, followed by a longer benchmark to validate | ||
// the values. Load adjustments are calculated by the selected StepController | ||
// plugin. Metrics can come from Nighthawk stats and counters or | ||
// platform-specific data sources via MetricsPlugins. | ||
message AdaptiveLoadSessionSpec { | ||
eric846 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Settings for MetricsPlugins that obtain metrics from outside sources. | ||
// An entry is required for every plugin referred to by metric_thresholds, | ||
// other than the "nighthawk.builtin" plugin. Optional. | ||
repeated envoy.config.core.v3.TypedExtensionConfig metrics_plugin_configs = 1; | ||
// Metrics and thresholds that determine load adjustments. The order of | ||
// metrics is not significant. Required. | ||
repeated MetricSpecWithThreshold metric_thresholds = 2 [(validate.rules).repeated .min_items = 1]; | ||
// Metrics that are collected and included in the output but not taken into | ||
// account when adjusting the load. May be used for debugging or | ||
// visualization. Optional. | ||
repeated MetricSpec informational_metric_specs = 3; | ||
eric846 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// A proto describing Nighthawk Service traffic. See | ||
// https://github.com/envoyproxy/nighthawk/blob/master/api/client/options.proto | ||
// | ||
// The adaptive load controller will return an error if the |duration| or | ||
// |open_loop| fields are set within |nighthawk_traffic_options|. The | ||
// controller will also be configured to overwrite at least one of the | ||
// numerical fields during the search, such as requests_per_second, so any | ||
// value of those fields specified here will be ignored. | ||
// | ||
// All other fields in |nighthawk_traffic_options| are passed through to the | ||
// Nighthawk Service. | ||
// | ||
// Note that |concurrency| in |nighthawk_traffic_options| multiplies the total | ||
// RPS sent. | ||
// | ||
// Required. | ||
nighthawk.client.CommandLineOptions nighthawk_traffic_template = 4 | ||
eric846 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
[(validate.rules).message.required = true]; | ||
// The duration of each short benchmark during the adjusting stage. Optional, default 10 seconds. | ||
google.protobuf.Duration measuring_period = 5 [(validate.rules).duration.gt.seconds = 0]; | ||
oschaaf marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Maximum amount of time the adjusting stage should wait for convergence | ||
// before returning an error. Optional, default 300 seconds. | ||
google.protobuf.Duration convergence_deadline = 6 [(validate.rules).duration.gt.seconds = 0]; | ||
// The duration of the single benchmark session of the testing stage to | ||
// confirm the performance at the level of load found in the adjusting stage. | ||
// Optional, default 30 seconds. | ||
google.protobuf.Duration testing_stage_duration = 7; | ||
// Selects and configures a StepController plugin. Required. | ||
envoy.config.core.v3.TypedExtensionConfig step_controller_config = 8 | ||
[(validate.rules).message.required = true]; | ||
} | ||
|
||
// Complete description of an adaptive load session, including metric scores | ||
// for every degree of load attempted during the adjusting stage. | ||
message AdaptiveLoadSessionOutput { | ||
// Overall status of the session with error detail. INVALID_ARGUMENT if the input spec contained | ||
// errors, DEADLINE_EXCEEDED if convergence did not occur before the deadline, ABORTED if the step | ||
// controller determined it can never converge. | ||
google.rpc.Status session_status = 1; | ||
// Results of each short benchmark performed during the adjusting stage. | ||
repeated BenchmarkResult adjusting_stage_results = 2; | ||
// Result of the single benchmark of the testing stage. | ||
BenchmarkResult testing_stage_result = 3; | ||
// Metrics and thresholds that were used to determine load adjustments, as referenced in the | ||
// BenchmarkResults. | ||
repeated MetricSpecWithThreshold metric_thresholds = 4; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this be different than the AdaptiveLoadSessionSpec metric_thresholds. If not, then maybe this is redundant. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's copied directly from metric_thresholds in the input. I included it in the output to preserve the context when dumping the output proto to an archive. Otherwise the archive would only have the value and the score, but not the threshold. If somebody just archived the full input proto alongside the output proto, this field would be redundant. |
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
// Protos describing the results of running a single Nighthawk benchmark and | ||
// scoring the resulting metrics against thresholds. | ||
|
||
syntax = "proto3"; | ||
|
||
package nighthawk.adaptive_load; | ||
|
||
import "api/client/output.proto"; | ||
import "google/rpc/status.proto"; | ||
|
||
// Records the status of a single metric during a benchmark session. | ||
message MetricEvaluation { | ||
// Identifier for the metric that was evaluated (<plugin name>/<metric name>). | ||
string metric_id = 1; | ||
// Numerical value of the metric measured during this benchmark session. | ||
double metric_value = 2; | ||
// Score returned by a ScoringFunction plugin. This expresses how close the | ||
// metric was to the threshold by an arbitrary formula selected and | ||
// configured in the ThresholdSpec, such as a sigmoid curve. The controller | ||
// can choose to make larger input adjustments when the score is larger, in | ||
// order to converge faster. Not set if the metric was only informational. | ||
double threshold_score = 3; | ||
// Configured weight of the metric. 0.0 if the metric was only informational. | ||
double weight = 4; | ||
} | ||
|
||
// Summary of a single Nighthawk Service benchmark session with evaluation | ||
// results. | ||
message BenchmarkResult { | ||
// Raw Nighthawk Service output. Includes start/end times and full Nighthawk | ||
// Service input spec. | ||
nighthawk.client.Output nighthawk_service_output = 1; | ||
// Execution status of this call to the Nighthawk Service. This will record errors connecting to | ||
// the Nighthawk Service and internal errors returned from the Nighthawk Service. | ||
google.rpc.Status status = 2; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we add more comments on what the status should we get for special errors. For example what is the error if we never converge? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Clarified the comment on this field, which applies narrowly to failures to call the Nighthawk Service and internal errors returned by Nighthawk Service. Also extended the comment in AdaptiveLoadSessionOutput where things like convergence status are recorded. |
||
// Status of all declared metrics during this benchmark session. Not present | ||
// in the event of Nighthawk Service errors. | ||
repeated MetricEvaluation metric_evaluations = 3; | ||
eric846 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
// Plugin-specific config protos for InputVariableSetter plugins. | ||
|
||
syntax = "proto3"; | ||
|
||
package nighthawk.adaptive_load; | ||
|
||
// Configuration for RequestsPerSecondInputVariableSetter (plugin name: "nighthawk.rps") | ||
// that sets |requests_per_second| within CommandLineOptions to a numeric value | ||
// being varied by a StepController. | ||
message RequestsPerSecondInputVariableSetterConfig { | ||
// This plugin does not need any configuration. | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
// Protos for identifying metrics and specifying thresholds. | ||
|
||
syntax = "proto3"; | ||
|
||
package nighthawk.adaptive_load; | ||
|
||
import "envoy/config/core/v3/extension.proto"; | ||
import "google/protobuf/wrappers.proto"; | ||
import "validate/validate.proto"; | ||
|
||
// Identifies a feedback metric. | ||
message MetricSpec { | ||
// Name of the metric to evaluate. For the set of built-in metric names, see | ||
// source/adaptive_load/metrics_plugin_impl.cc. Required. | ||
string metric_name = 1 [(validate.rules).string.min_len = 1]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How do plugins and metric names relate? Does each plugin only export out a fixed number of metric names? Or is metric name an opaque ID? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Each plugin will support its own fixed set of metric names. We will query each plugin for its metric names at startup in order to validate the MetricSpec in the adaptive load session input proto. For nighthawk.builtin:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SGTM. I'm going to be OOTO next two weeks, so please go ahead with this set of protos. I think as long as we're the only consumers, which is highly likely for the near future, it's fine to iterate as needed. |
||
// Name of the MetricsPlugin providing the metric. Optional, default "nighthawk.builtin". | ||
string metrics_plugin_name = 2; | ||
} | ||
|
||
// Specifies how to score a metric against a threshold. | ||
message ThresholdSpec { | ||
// Selection and configuration of a ScoringFunction that measures proximity | ||
// to a threshold. 0.0 means the value equals the threshold, positive means | ||
// the value is within the threshold so the input should ramp up, and | ||
// negative means the value is outside the threshold so input should ramp | ||
// down. | ||
envoy.config.core.v3.TypedExtensionConfig scoring_function = 1 | ||
[(validate.rules).message.required = true]; | ||
// Relative importance of this threshold when adjusting based on multiple | ||
// metrics. Optional, default 1.0. | ||
google.protobuf.DoubleValue weight = 2 [(validate.rules).double.gt = 0.0]; | ||
} | ||
|
||
// Identifies a feedback metric and specifies a threshold for it. | ||
message MetricSpecWithThreshold { | ||
// Identifies a metric to collect and evaluate. Required. | ||
MetricSpec metric_spec = 1 [(validate.rules).message.required = true]; | ||
// Specifies a threshold for this metric. Required. | ||
ThresholdSpec threshold_spec = 2 [(validate.rules).message.required = true]; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
// Plugin-specific config protos for MetricsPlugin plugins. | ||
|
||
syntax = "proto3"; | ||
|
||
package nighthawk.adaptive_load; | ||
|
||
// Plugin-specific config protos for MetricsPlugins that ship with Nighthawk should go here. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
// Plugin-specific config protos for ScoringFunction plugins. | ||
|
||
syntax = "proto3"; | ||
|
||
package nighthawk.adaptive_load; | ||
|
||
import "google/protobuf/wrappers.proto"; | ||
|
||
// Configuration for BinaryScoringFunction (plugin name: "nighthawk.binary") that is 1.0 | ||
// when the value is within thresholds and -1.0 otherwise. | ||
message BinaryScoringFunctionConfig { | ||
// The minimum allowed value of the metric. Optional, default -infinity. | ||
google.protobuf.DoubleValue lower_threshold = 1; | ||
// The maximum allowed value of the metric. Optional, default infinity. | ||
google.protobuf.DoubleValue upper_threshold = 2; | ||
} | ||
|
||
// Configuration for LinearScoringFunction (plugin name: "nighthawk.linear") that | ||
// calculates a metric score as k * (threshold - value), where k is a scaling | ||
// constant. The score is 0.0 when the value exactly equals the threshold, | ||
// positive below the threshold (meaning input should ramp up), and negative | ||
// above the threshold. The score is proportional to the difference from the | ||
// threshold. | ||
message LinearScoringFunctionConfig { | ||
// The target value of the metric. Required. | ||
double threshold = 1; | ||
// Scaling constant: k in k * (threshold - value). Use this in combination | ||
// with step controller constants to produce reasonable input increments for | ||
// reasonable differences from the threshold. Required. | ||
double k = 2; | ||
} | ||
|
||
// Configuration for SigmoidScoringFunction (plugin name: "nighthawk.sigmoid") that | ||
// calculates a metric score as 1 - 2 / (1 + exp(-k(value - threshold))), an | ||
// upside-down sigmoid curve centered on a threshold. The output is 0.0 when the | ||
// metric equals the threshold, approaches 1.0 for values far below the | ||
// threshold, and approaches -1.0 for values far above the threshold. | ||
message SigmoidScoringFunctionConfig { | ||
// The target value of the metric. Required. | ||
double threshold = 1; | ||
// Tuning constant: k in 1 - 2 / (1 + exp(-k(value - threshold))). k should | ||
// be around the same size as 1/threshold. Required. | ||
double k = 2; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
// Plugin-specific config protos for StepController plugins. | ||
|
||
syntax = "proto3"; | ||
|
||
package nighthawk.adaptive_load; | ||
|
||
import "envoy/config/core/v3/extension.proto"; | ||
|
||
// Configuration for ExponentialSearchStepController (plugin name: | ||
// "nighthawk.exponential-search") that performs an exponential search for the optimal | ||
// value of a single Nighthawk input variable (e.g. RPS). Exponential search | ||
// starts with the input set to |initial_value| and increases the input by | ||
// |exponential_factor| until the metric goes outside thresholds at some input | ||
// value X (i.e. the sign of the score becomes negative); then it performs a | ||
// binary search with input values between (X/exponential_factor) and X for the | ||
// highest input value for which the metric is within thresholds. | ||
message ExponentialSearchStepControllerConfig { | ||
// Selects a plugin that knows how to apply a numeric value generated by the | ||
// StepController within CommandLineOptions. Optional, defaults to "nighthawk.rps" | ||
// plugin, which sets |requests_per_second| in CommandLineOptions. | ||
envoy.config.core.v3.TypedExtensionConfig input_variable_setter = 1; | ||
// Initial value of the input variable that should be attempted. Required. | ||
double initial_value = 2; | ||
// Factor to increase the input variable during the exponential phase. | ||
// Optional, default 2.0. | ||
double exponential_factor = 3; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have some nits but LGTM