diff --git a/api/envoy/config/cluster/v3/cluster.proto b/api/envoy/config/cluster/v3/cluster.proto index 6123bd59c14e..b4ea53bb0933 100644 --- a/api/envoy/config/cluster/v3/cluster.proto +++ b/api/envoy/config/cluster/v3/cluster.proto @@ -43,7 +43,7 @@ message ClusterCollection { } // Configuration for a single upstream cluster. -// [#next-free-field: 49] +// [#next-free-field: 50] message Cluster { option (udpa.annotations.versioning).previous_message_type = "envoy.api.v2.Cluster"; @@ -856,7 +856,12 @@ message Cluster { // request. These show what percentage of a request's per try and global timeout was used. A value // of 0 would indicate that none of the timeout was used or that the timeout was infinite. A value // of 100 would indicate that the request took the entirety of the timeout given to it. - bool track_timeout_budgets = 47; + // + // .. attention:: + // + // This field has been deprecated in favor of `timeout_budgets`, part of + // :ref:`track_cluster_stats `. + bool track_timeout_budgets = 47 [deprecated = true]; // Optional customization and configuration of upstream connection pool, and upstream type. // @@ -876,6 +881,9 @@ message Cluster { // CONNECT only if a custom filter indicates it is appropriate, the custom factories // can be registered and configured here. core.v3.TypedExtensionConfig upstream_config = 48; + + // Configuration to track optional cluster stats. + TrackClusterStats track_cluster_stats = 49; } // [#not-implemented-hide:] Extensible load balancing policy configuration. @@ -936,3 +944,17 @@ message UpstreamConnectionOptions { // If set then set SO_KEEPALIVE on the socket to enable TCP Keepalives. core.v3.TcpKeepalive tcp_keepalive = 1; } + +message TrackClusterStats { + // If timeout_budgets is true, the :ref:`timeout budget histograms + // ` will be published for each + // request. These show what percentage of a request's per try and global timeout was used. A value + // of 0 would indicate that none of the timeout was used or that the timeout was infinite. A value + // of 100 would indicate that the request took the entirety of the timeout given to it. + bool timeout_budgets = 1; + + // If request_response_sizes is true, then the :ref:`histograms + // ` tracking header and body sizes + // of requests and responses will be published. + bool request_response_sizes = 2; +} diff --git a/api/envoy/config/cluster/v4alpha/cluster.proto b/api/envoy/config/cluster/v4alpha/cluster.proto index 6c1302d28941..4172b07e0538 100644 --- a/api/envoy/config/cluster/v4alpha/cluster.proto +++ b/api/envoy/config/cluster/v4alpha/cluster.proto @@ -45,7 +45,7 @@ message ClusterCollection { } // Configuration for a single upstream cluster. -// [#next-free-field: 49] +// [#next-free-field: 50] message Cluster { option (udpa.annotations.versioning).previous_message_type = "envoy.config.cluster.v3.Cluster"; @@ -545,9 +545,9 @@ message Cluster { google.protobuf.Duration max_interval = 2 [(validate.rules).duration = {gt {nanos: 1000000}}]; } - reserved 12, 15, 7, 11, 35; + reserved 12, 15, 7, 11, 35, 47; - reserved "hosts", "tls_context", "extension_protocol_options"; + reserved "hosts", "tls_context", "extension_protocol_options", "track_timeout_budgets"; // Configuration to use different transport sockets for different endpoints. // The entry of *envoy.transport_socket_match* in the @@ -855,13 +855,6 @@ message Cluster { // from the LRS stream here.] core.v4alpha.ConfigSource lrs_server = 42; - // If track_timeout_budgets is true, the :ref:`timeout budget histograms - // ` will be published for each - // request. These show what percentage of a request's per try and global timeout was used. A value - // of 0 would indicate that none of the timeout was used or that the timeout was infinite. A value - // of 100 would indicate that the request took the entirety of the timeout given to it. - bool track_timeout_budgets = 47; - // Optional customization and configuration of upstream connection pool, and upstream type. // // Currently this field only applies for HTTP traffic but is designed for eventual use for custom @@ -880,6 +873,9 @@ message Cluster { // CONNECT only if a custom filter indicates it is appropriate, the custom factories // can be registered and configured here. core.v4alpha.TypedExtensionConfig upstream_config = 48; + + // Configuration to track optional cluster stats. + TrackClusterStats track_cluster_stats = 49; } // [#not-implemented-hide:] Extensible load balancing policy configuration. @@ -942,3 +938,20 @@ message UpstreamConnectionOptions { // If set then set SO_KEEPALIVE on the socket to enable TCP Keepalives. core.v4alpha.TcpKeepalive tcp_keepalive = 1; } + +message TrackClusterStats { + option (udpa.annotations.versioning).previous_message_type = + "envoy.config.cluster.v3.TrackClusterStats"; + + // If timeout_budgets is true, the :ref:`timeout budget histograms + // ` will be published for each + // request. These show what percentage of a request's per try and global timeout was used. A value + // of 0 would indicate that none of the timeout was used or that the timeout was infinite. A value + // of 100 would indicate that the request took the entirety of the timeout given to it. + bool timeout_budgets = 1; + + // If request_response_sizes is true, then the :ref:`histograms + // ` tracking header and body sizes + // of requests and responses will be published. + bool request_response_sizes = 2; +} diff --git a/docs/root/configuration/upstream/cluster_manager/cluster_stats.rst b/docs/root/configuration/upstream/cluster_manager/cluster_stats.rst index 8fe03e0b55a4..5d956c28d2b3 100644 --- a/docs/root/configuration/upstream/cluster_manager/cluster_stats.rst +++ b/docs/root/configuration/upstream/cluster_manager/cluster_stats.rst @@ -314,3 +314,20 @@ Statistics for monitoring effective host weights when using the min_entries_per_host, Gauge, Minimum number of entries for a single host max_entries_per_host, Gauge, Maximum number of entries for a single host + +.. _config_cluster_manager_cluster_stats_request_response_sizes: + +Request Response Size statistics +-------------------------------- + +If :ref:`request response size statistics ` are tracked, +statistics will be added to *cluster.* and contain the following: + +.. csv-table:: + :header: Name, Type, Description + :widths: 1, 1, 2 + + upstream_rq_headers_size, Histogram, Request headers size in bytes per upstream + upstream_rq_body_size, Histogram, Request body size in bytes per upstream + upstream_rs_headers_size, Histogram, Response headers size in bytes per upstream + upstream_rs_body_size, Histogram, Response body size in bytes per upstream diff --git a/docs/root/version_history/current.rst b/docs/root/version_history/current.rst index e3263fee3d41..47e8e2fbec95 100644 --- a/docs/root/version_history/current.rst +++ b/docs/root/version_history/current.rst @@ -36,13 +36,16 @@ Removed Config or Runtime New Features ------------ - * ext_authz filter: added support for emitting dynamic metadata for both :ref:`HTTP ` and :ref:`network ` filters. * grpc-json: support specifying `response_body` field in for `google.api.HttpBody` message. * http: introduced new HTTP/1 and HTTP/2 codec implementations that will remove the use of exceptions for control flow due to high risk factors and instead use error statuses. The old behavior is deprecated, but can be used during the removal period by setting the runtime feature `envoy.reloadable_features.new_codec_behavior` to false. The removal period will be one month. * load balancer: added a :ref:`configuration` option to specify the active request bias used by the least request load balancer. * redis: added fault injection support :ref:`fault injection for redis proxy `, described further in :ref:`configuration documentation `. +* stats: added optional histograms to :ref:`cluster stats ` + that track headers and body sizes of requests and responses. * tap: added :ref:`generic body matcher` to scan http requests and responses for text or hex patterns. Deprecated ---------- +* The :ref:`track_timeout_budgets ` + field has been deprecated in favor of `timeout_budgets` part of an :ref:`Optional Configuration `. diff --git a/generated_api_shadow/envoy/config/cluster/v3/cluster.proto b/generated_api_shadow/envoy/config/cluster/v3/cluster.proto index cf6b9cb652b3..ac93934e72bf 100644 --- a/generated_api_shadow/envoy/config/cluster/v3/cluster.proto +++ b/generated_api_shadow/envoy/config/cluster/v3/cluster.proto @@ -44,7 +44,7 @@ message ClusterCollection { } // Configuration for a single upstream cluster. -// [#next-free-field: 49] +// [#next-free-field: 50] message Cluster { option (udpa.annotations.versioning).previous_message_type = "envoy.api.v2.Cluster"; @@ -854,7 +854,12 @@ message Cluster { // request. These show what percentage of a request's per try and global timeout was used. A value // of 0 would indicate that none of the timeout was used or that the timeout was infinite. A value // of 100 would indicate that the request took the entirety of the timeout given to it. - bool track_timeout_budgets = 47; + // + // .. attention:: + // + // This field has been deprecated in favor of `timeout_budgets`, part of + // :ref:`track_cluster_stats `. + bool track_timeout_budgets = 47 [deprecated = true]; // Optional customization and configuration of upstream connection pool, and upstream type. // @@ -875,6 +880,9 @@ message Cluster { // can be registered and configured here. core.v3.TypedExtensionConfig upstream_config = 48; + // Configuration to track optional cluster stats. + TrackClusterStats track_cluster_stats = 49; + repeated core.v3.Address hidden_envoy_deprecated_hosts = 7 [deprecated = true]; envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext hidden_envoy_deprecated_tls_context = @@ -940,3 +948,17 @@ message UpstreamConnectionOptions { // If set then set SO_KEEPALIVE on the socket to enable TCP Keepalives. core.v3.TcpKeepalive tcp_keepalive = 1; } + +message TrackClusterStats { + // If timeout_budgets is true, the :ref:`timeout budget histograms + // ` will be published for each + // request. These show what percentage of a request's per try and global timeout was used. A value + // of 0 would indicate that none of the timeout was used or that the timeout was infinite. A value + // of 100 would indicate that the request took the entirety of the timeout given to it. + bool timeout_budgets = 1; + + // If request_response_sizes is true, then the :ref:`histograms + // ` tracking header and body sizes + // of requests and responses will be published. + bool request_response_sizes = 2; +} diff --git a/generated_api_shadow/envoy/config/cluster/v4alpha/cluster.proto b/generated_api_shadow/envoy/config/cluster/v4alpha/cluster.proto index 6c1302d28941..facc5d38d16c 100644 --- a/generated_api_shadow/envoy/config/cluster/v4alpha/cluster.proto +++ b/generated_api_shadow/envoy/config/cluster/v4alpha/cluster.proto @@ -45,7 +45,7 @@ message ClusterCollection { } // Configuration for a single upstream cluster. -// [#next-free-field: 49] +// [#next-free-field: 50] message Cluster { option (udpa.annotations.versioning).previous_message_type = "envoy.config.cluster.v3.Cluster"; @@ -860,7 +860,12 @@ message Cluster { // request. These show what percentage of a request's per try and global timeout was used. A value // of 0 would indicate that none of the timeout was used or that the timeout was infinite. A value // of 100 would indicate that the request took the entirety of the timeout given to it. - bool track_timeout_budgets = 47; + // + // .. attention:: + // + // This field has been deprecated in favor of `timeout_budgets`, part of + // :ref:`track_cluster_stats `. + bool hidden_envoy_deprecated_track_timeout_budgets = 47 [deprecated = true]; // Optional customization and configuration of upstream connection pool, and upstream type. // @@ -880,6 +885,9 @@ message Cluster { // CONNECT only if a custom filter indicates it is appropriate, the custom factories // can be registered and configured here. core.v4alpha.TypedExtensionConfig upstream_config = 48; + + // Configuration to track optional cluster stats. + TrackClusterStats track_cluster_stats = 49; } // [#not-implemented-hide:] Extensible load balancing policy configuration. @@ -942,3 +950,20 @@ message UpstreamConnectionOptions { // If set then set SO_KEEPALIVE on the socket to enable TCP Keepalives. core.v4alpha.TcpKeepalive tcp_keepalive = 1; } + +message TrackClusterStats { + option (udpa.annotations.versioning).previous_message_type = + "envoy.config.cluster.v3.TrackClusterStats"; + + // If timeout_budgets is true, the :ref:`timeout budget histograms + // ` will be published for each + // request. These show what percentage of a request's per try and global timeout was used. A value + // of 0 would indicate that none of the timeout was used or that the timeout was infinite. A value + // of 100 would indicate that the request took the entirety of the timeout given to it. + bool timeout_budgets = 1; + + // If request_response_sizes is true, then the :ref:`histograms + // ` tracking header and body sizes + // of requests and responses will be published. + bool request_response_sizes = 2; +} diff --git a/include/envoy/upstream/upstream.h b/include/envoy/upstream/upstream.h index 70c9dd9755c8..cd15d0bb3dff 100644 --- a/include/envoy/upstream/upstream.h +++ b/include/envoy/upstream/upstream.h @@ -622,6 +622,15 @@ class PrioritySet { REMAINING_GAUGE(remaining_retries, Accumulate) \ REMAINING_GAUGE(remaining_rq, Accumulate) +/** + * All stats tracking request/response headers and body sizes. Not used by default. + */ +#define ALL_CLUSTER_REQUEST_RESPONSE_SIZE_STATS(HISTOGRAM) \ + HISTOGRAM(upstream_rq_headers_size, Bytes) \ + HISTOGRAM(upstream_rq_body_size, Bytes) \ + HISTOGRAM(upstream_rs_headers_size, Bytes) \ + HISTOGRAM(upstream_rs_body_size, Bytes) + /** * All stats around timeout budgets. Not used by default. */ @@ -650,6 +659,17 @@ struct ClusterCircuitBreakersStats { ALL_CLUSTER_CIRCUIT_BREAKERS_STATS(GENERATE_GAUGE_STRUCT, GENERATE_GAUGE_STRUCT) }; +/** + * Struct definition for cluster timeout budget stats. @see stats_macros.h + */ +struct ClusterRequestResponseSizeStats { + ALL_CLUSTER_REQUEST_RESPONSE_SIZE_STATS(GENERATE_HISTOGRAM_STRUCT) +}; + +using ClusterRequestResponseSizeStatsPtr = std::unique_ptr; +using ClusterRequestResponseSizeStatsOptRef = + absl::optional>; + /** * Struct definition for cluster timeout budget stats. @see stats_macros.h */ @@ -657,6 +677,10 @@ struct ClusterTimeoutBudgetStats { ALL_CLUSTER_TIMEOUT_BUDGET_STATS(GENERATE_HISTOGRAM_STRUCT) }; +using ClusterTimeoutBudgetStatsPtr = std::unique_ptr; +using ClusterTimeoutBudgetStatsOptRef = + absl::optional>; + /** * All extension protocol specific options returned by the method at * NamedNetworkFilterConfigFactory::createProtocolOptions @@ -851,9 +875,16 @@ class ClusterInfo { virtual ClusterLoadReportStats& loadReportStats() const PURE; /** - * @return absl::optional& stats on timeout budgets for this cluster. + * @return absl::optional> stats to track + * headers/body sizes of request/response for this cluster. + */ + virtual ClusterRequestResponseSizeStatsOptRef requestResponseSizeStats() const PURE; + + /** + * @return absl::optional> stats on timeout + * budgets for this cluster. */ - virtual const absl::optional& timeoutBudgetStats() const PURE; + virtual ClusterTimeoutBudgetStatsOptRef timeoutBudgetStats() const PURE; /** * Returns an optional source address for upstream connections to bind to. diff --git a/source/common/http/conn_manager_impl.cc b/source/common/http/conn_manager_impl.cc index b37a31afe165..c633ff28ca68 100644 --- a/source/common/http/conn_manager_impl.cc +++ b/source/common/http/conn_manager_impl.cc @@ -607,6 +607,17 @@ ConnectionManagerImpl::ActiveStream::ActiveStream(ConnectionManagerImpl& connect ConnectionManagerImpl::ActiveStream::~ActiveStream() { stream_info_.onRequestComplete(); + Upstream::HostDescriptionConstSharedPtr upstream_host = + connection_manager_.read_callbacks_->upstreamHost(); + + if (upstream_host != nullptr) { + Upstream::ClusterRequestResponseSizeStatsOptRef req_resp_stats = + upstream_host->cluster().requestResponseSizeStats(); + if (req_resp_stats.has_value()) { + req_resp_stats->get().upstream_rq_body_size_.recordValue(stream_info_.bytesReceived()); + req_resp_stats->get().upstream_rs_body_size_.recordValue(stream_info_.bytesSent()); + } + } // A downstream disconnect can be identified for HTTP requests when the upstream returns with a 0 // response code and when no other response flags are set. @@ -722,6 +733,17 @@ void ConnectionManagerImpl::ActiveStream::chargeStats(const ResponseHeaderMap& h return; } + Upstream::HostDescriptionConstSharedPtr upstream_host = + connection_manager_.read_callbacks_->upstreamHost(); + + if (upstream_host != nullptr) { + Upstream::ClusterRequestResponseSizeStatsOptRef req_resp_stats = + upstream_host->cluster().requestResponseSizeStats(); + if (req_resp_stats.has_value()) { + req_resp_stats->get().upstream_rs_headers_size_.recordValue(headers.byteSize()); + } + } + connection_manager_.stats_.named_.downstream_rq_completed_.inc(); connection_manager_.listener_stats_.downstream_rq_completed_.inc(); if (CodeUtility::is1xx(response_code)) { @@ -769,6 +791,16 @@ void ConnectionManagerImpl::ActiveStream::decodeHeaders(RequestHeaderMapPtr&& he ScopeTrackerScopeState scope(this, connection_manager_.read_callbacks_->connection().dispatcher()); request_headers_ = std::move(headers); + Upstream::HostDescriptionConstSharedPtr upstream_host = + connection_manager_.read_callbacks_->upstreamHost(); + + if (upstream_host != nullptr) { + Upstream::ClusterRequestResponseSizeStatsOptRef req_resp_stats = + upstream_host->cluster().requestResponseSizeStats(); + if (req_resp_stats.has_value()) { + req_resp_stats->get().upstream_rq_headers_size_.recordValue(request_headers_->byteSize()); + } + } // Both saw_connection_close_ and is_head_request_ affect local replies: set // them as early as possible. diff --git a/source/common/router/router.cc b/source/common/router/router.cc index b2d587c33434..c7fd54d418ba 100644 --- a/source/common/router/router.cc +++ b/source/common/router/router.cc @@ -948,12 +948,13 @@ void Filter::chargeUpstreamAbort(Http::Code code, bool dropped, UpstreamRequest& void Filter::onUpstreamTimeoutAbort(StreamInfo::ResponseFlag response_flags, absl::string_view details) { - if (cluster_->timeoutBudgetStats().has_value()) { + Upstream::ClusterTimeoutBudgetStatsOptRef tb_stats = cluster()->timeoutBudgetStats(); + if (tb_stats.has_value()) { Event::Dispatcher& dispatcher = callbacks_->dispatcher(); std::chrono::milliseconds response_time = std::chrono::duration_cast( dispatcher.timeSource().monotonicTime() - downstream_request_complete_time_); - cluster_->timeoutBudgetStats()->upstream_rq_timeout_budget_percent_used_.recordValue( + tb_stats->get().upstream_rq_timeout_budget_percent_used_.recordValue( FilterUtility::percentageOfTimeout(response_time, timeout_.global_timeout_)); } @@ -1340,8 +1341,9 @@ void Filter::onUpstreamComplete(UpstreamRequest& upstream_request) { std::chrono::milliseconds response_time = std::chrono::duration_cast( dispatcher.timeSource().monotonicTime() - downstream_request_complete_time_); - if (cluster_->timeoutBudgetStats().has_value()) { - cluster_->timeoutBudgetStats()->upstream_rq_timeout_budget_percent_used_.recordValue( + Upstream::ClusterTimeoutBudgetStatsOptRef tb_stats = cluster()->timeoutBudgetStats(); + if (tb_stats.has_value()) { + tb_stats->get().upstream_rq_timeout_budget_percent_used_.recordValue( FilterUtility::percentageOfTimeout(response_time, timeout_.global_timeout_)); } diff --git a/source/common/router/upstream_request.cc b/source/common/router/upstream_request.cc index e4d97c82f0bb..c2db2ca31af8 100644 --- a/source/common/router/upstream_request.cc +++ b/source/common/router/upstream_request.cc @@ -90,10 +90,9 @@ UpstreamRequest::~UpstreamRequest() { const MonotonicTime end_time = dispatcher.timeSource().monotonicTime(); const std::chrono::milliseconds response_time = std::chrono::duration_cast(end_time - start_time_); - parent_.cluster() - ->timeoutBudgetStats() - ->upstream_rq_timeout_budget_per_try_percent_used_.recordValue( - FilterUtility::percentageOfTimeout(response_time, parent_.timeout().per_try_timeout_)); + Upstream::ClusterTimeoutBudgetStatsOptRef tb_stats = parent_.cluster()->timeoutBudgetStats(); + tb_stats->get().upstream_rq_timeout_budget_per_try_percent_used_.recordValue( + FilterUtility::percentageOfTimeout(response_time, parent_.timeout().per_try_timeout_)); } stream_info_.setUpstreamTiming(upstream_timing_); diff --git a/source/common/upstream/upstream_impl.cc b/source/common/upstream/upstream_impl.cc index 269745bd765a..44fc4860a931 100644 --- a/source/common/upstream/upstream_impl.cc +++ b/source/common/upstream/upstream_impl.cc @@ -26,6 +26,7 @@ #include "envoy/ssl/context_manager.h" #include "envoy/stats/scope.h" #include "envoy/upstream/health_checker.h" +#include "envoy/upstream/upstream.h" #include "common/common/enum_to_int.h" #include "common/common/fmt.h" @@ -618,6 +619,11 @@ ClusterStats ClusterInfoImpl::generateStats(Stats::Scope& scope) { return {ALL_CLUSTER_STATS(POOL_COUNTER(scope), POOL_GAUGE(scope), POOL_HISTOGRAM(scope))}; } +ClusterRequestResponseSizeStats +ClusterInfoImpl::generateRequestResponseSizeStats(Stats::Scope& scope) { + return {ALL_CLUSTER_REQUEST_RESPONSE_SIZE_STATS(POOL_HISTOGRAM(scope))}; +} + ClusterLoadReportStats ClusterInfoImpl::generateLoadReportStats(Stats::Scope& scope) { return {ALL_CLUSTER_LOAD_REPORT_STATS(POOL_COUNTER(scope))}; } @@ -687,10 +693,9 @@ ClusterInfoImpl::ClusterInfoImpl( socket_matcher_(std::move(socket_matcher)), stats_scope_(std::move(stats_scope)), stats_(generateStats(*stats_scope_)), load_report_stats_store_(stats_scope_->symbolTable()), load_report_stats_(generateLoadReportStats(load_report_stats_store_)), - timeout_budget_stats_(config.track_timeout_budgets() - ? absl::make_optional( - generateTimeoutBudgetStats(*stats_scope_)) - : absl::nullopt), + optional_cluster_stats_((config.has_track_cluster_stats() || config.track_timeout_budgets()) + ? std::make_unique(config, *stats_scope_) + : nullptr), features_(parseFeatures(config)), http1_settings_(Http::Utility::parseHttp1Settings(config.http_protocol_options())), http2_options_(Http2::Utility::initializeAndValidateOptions(config.http2_protocol_options())), @@ -1095,6 +1100,17 @@ void ClusterImplBase::validateEndpointsForZoneAwareRouting( } } +ClusterInfoImpl::OptionalClusterStats::OptionalClusterStats( + const envoy::config::cluster::v3::Cluster& config, Stats::Scope& stats_scope) + : timeout_budget_stats_( + (config.track_cluster_stats().timeout_budgets() || config.track_timeout_budgets()) + ? std::make_unique(generateTimeoutBudgetStats(stats_scope)) + : nullptr), + request_response_size_stats_(config.track_cluster_stats().request_response_sizes() + ? std::make_unique( + generateRequestResponseSizeStats(stats_scope)) + : nullptr) {} + ClusterInfoImpl::ResourceManagers::ResourceManagers( const envoy::config::cluster::v3::Cluster& config, Runtime::Loader& runtime, const std::string& cluster_name, Stats::Scope& stats_scope) { diff --git a/source/common/upstream/upstream_impl.h b/source/common/upstream/upstream_impl.h index 5cdb994b3f41..3b42d06818c3 100644 --- a/source/common/upstream/upstream_impl.h +++ b/source/common/upstream/upstream_impl.h @@ -523,6 +523,7 @@ class ClusterInfoImpl : public ClusterInfo, protected Logger::Loggablerequest_response_size_stats_ == nullptr) { + return absl::nullopt; + } + + return std::ref(*(optional_cluster_stats_->request_response_size_stats_)); + } + ClusterLoadReportStats& loadReportStats() const override { return load_report_stats_; } - const absl::optional& timeoutBudgetStats() const override { - return timeout_budget_stats_; + + ClusterTimeoutBudgetStatsOptRef timeoutBudgetStats() const override { + if (optional_cluster_stats_ == nullptr || + optional_cluster_stats_->timeout_budget_stats_ == nullptr) { + return absl::nullopt; + } + + return std::ref(*(optional_cluster_stats_->timeout_budget_stats_)); } + const Network::Address::InstanceConstSharedPtr& sourceAddress() const override { return source_address_; }; @@ -622,6 +640,13 @@ class ClusterInfoImpl : public ClusterInfo, protected Logger::Loggable timeout_budget_stats_; + const std::unique_ptr optional_cluster_stats_; const uint64_t features_; const Http::Http1Settings http1_settings_; const envoy::config::core::v3::Http2ProtocolOptions http2_options_; diff --git a/test/common/http/conn_manager_impl_test.cc b/test/common/http/conn_manager_impl_test.cc index af2c7325c46f..88b6712252d9 100644 --- a/test/common/http/conn_manager_impl_test.cc +++ b/test/common/http/conn_manager_impl_test.cc @@ -64,6 +64,7 @@ using testing::InSequence; using testing::Invoke; using testing::InvokeWithoutArgs; using testing::NiceMock; +using testing::Property; using testing::Ref; using testing::Return; using testing::ReturnRef; @@ -6060,6 +6061,188 @@ TEST_F(HttpConnectionManagerImplTest, NewConnection) { EXPECT_EQ(1U, stats_.named_.downstream_cx_http3_active_.value()); } +TEST_F(HttpConnectionManagerImplTest, TestUpstreamRequestHeadersSize) { + // Test with Headers only request, No Data, No response. + setup(false, ""); + EXPECT_CALL(*codec_, dispatch(_)).WillOnce(Invoke([&](Buffer::Instance&) -> Http::Status { + RequestDecoder* decoder = &conn_manager_->newStream(response_encoder_); + RequestHeaderMapPtr headers{ + new TestRequestHeaderMapImpl{{":authority", "host"}, {":path", "/"}, {":method", "GET"}}}; + decoder->decodeHeaders(std::move(headers), true); + return Http::okStatus(); + })); + + setupFilterChain(1, 0); + + EXPECT_CALL(*decoder_filters_[0], decodeHeaders(_, true)) + .WillOnce(Return(FilterHeadersStatus::StopIteration)); + EXPECT_CALL(*decoder_filters_[0], decodeComplete()); + + std::shared_ptr> host_{ + new NiceMock()}; + filter_callbacks_.upstreamHost(host_); + + EXPECT_CALL( + host_->cluster_.request_response_size_stats_store_, + deliverHistogramToSinks(Property(&Stats::Metric::name, "upstream_rq_headers_size"), 30)); + EXPECT_CALL(host_->cluster_.request_response_size_stats_store_, + deliverHistogramToSinks(Property(&Stats::Metric::name, "upstream_rq_body_size"), 0)); + EXPECT_CALL(host_->cluster_.request_response_size_stats_store_, + deliverHistogramToSinks(Property(&Stats::Metric::name, "upstream_rs_body_size"), 0)); + + Buffer::OwnedImpl fake_input("1234"); + conn_manager_->onData(fake_input, false); +} + +TEST_F(HttpConnectionManagerImplTest, TestUpstreamRequestBodySize) { + // Test Request with Headers and Data, No response. + setup(false, ""); + EXPECT_CALL(*codec_, dispatch(_)).WillOnce(Invoke([&](Buffer::Instance&) -> Http::Status { + RequestDecoder* decoder = &conn_manager_->newStream(response_encoder_); + RequestHeaderMapPtr headers{ + new TestRequestHeaderMapImpl{{":authority", "host"}, {":path", "/"}, {":method", "GET"}}}; + decoder->decodeHeaders(std::move(headers), false); + + Buffer::OwnedImpl fake_data("12345"); + decoder->decodeData(fake_data, true); + return Http::okStatus(); + })); + + setupFilterChain(1, 0); + + EXPECT_CALL(*decoder_filters_[0], decodeHeaders(_, false)) + .WillOnce(Return(FilterHeadersStatus::StopIteration)); + EXPECT_CALL(*decoder_filters_[0], decodeData(_, true)) + .WillOnce(Return(FilterDataStatus::StopIterationNoBuffer)); + + EXPECT_CALL(*decoder_filters_[0], decodeComplete()); + + std::shared_ptr> host_{ + new NiceMock()}; + filter_callbacks_.upstreamHost(host_); + + EXPECT_CALL( + host_->cluster_.request_response_size_stats_store_, + deliverHistogramToSinks(Property(&Stats::Metric::name, "upstream_rq_headers_size"), 30)); + EXPECT_CALL(host_->cluster_.request_response_size_stats_store_, + deliverHistogramToSinks(Property(&Stats::Metric::name, "upstream_rq_body_size"), 5)); + EXPECT_CALL(host_->cluster_.request_response_size_stats_store_, + deliverHistogramToSinks(Property(&Stats::Metric::name, "upstream_rs_body_size"), 0)); + + Buffer::OwnedImpl fake_input("1234"); + conn_manager_->onData(fake_input, false); +} + +TEST_F(HttpConnectionManagerImplTest, TestUpstreamResponseHeadersSize) { + // Test with Header only response. + setup(false, ""); + + EXPECT_CALL(*codec_, dispatch(_)).WillOnce(Invoke([&](Buffer::Instance&) -> Http::Status { + RequestDecoder* decoder = &conn_manager_->newStream(response_encoder_); + RequestHeaderMapPtr headers{ + new TestRequestHeaderMapImpl{{":authority", "host"}, {":path", "/"}, {":method", "GET"}}}; + decoder->decodeHeaders(std::move(headers), false); + + Buffer::OwnedImpl fake_data("1234"); + decoder->decodeData(fake_data, true); + + return Http::okStatus(); + })); + + setupFilterChain(1, 0); + + EXPECT_CALL(*decoder_filters_[0], decodeHeaders(_, false)) + .WillOnce(Return(FilterHeadersStatus::StopIteration)); + EXPECT_CALL(*decoder_filters_[0], decodeData(_, true)) + .WillOnce(Return(FilterDataStatus::StopIterationNoBuffer)); + + EXPECT_CALL(*decoder_filters_[0], decodeComplete()); + + std::shared_ptr> host_{ + new NiceMock()}; + filter_callbacks_.upstreamHost(host_); + + EXPECT_CALL( + host_->cluster_.request_response_size_stats_store_, + deliverHistogramToSinks(Property(&Stats::Metric::name, "upstream_rq_headers_size"), 30)); + + // Response headers are internally mutated and we record final response headers. + // for example in the below test case, response headers are modified as + // {':status', '200' 'date', 'Mon, 06 Jul 2020 06:08:55 GMT' 'server', ''} + // whose size is 49 instead of original response headers size 10({":status", "200"}). + EXPECT_CALL( + host_->cluster_.request_response_size_stats_store_, + deliverHistogramToSinks(Property(&Stats::Metric::name, "upstream_rs_headers_size"), 49)); + EXPECT_CALL(host_->cluster_.request_response_size_stats_store_, + deliverHistogramToSinks(Property(&Stats::Metric::name, "upstream_rq_body_size"), 4)); + EXPECT_CALL(host_->cluster_.request_response_size_stats_store_, + deliverHistogramToSinks(Property(&Stats::Metric::name, "upstream_rs_body_size"), 0)); + + Buffer::OwnedImpl fake_input("1234"); + conn_manager_->onData(fake_input, false); + + EXPECT_CALL(response_encoder_, encodeHeaders(_, true)); + expectOnDestroy(); + + decoder_filters_[0]->callbacks_->encodeHeaders( + ResponseHeaderMapPtr{new TestResponseHeaderMapImpl{{":status", "200"}}}, true); +} + +TEST_F(HttpConnectionManagerImplTest, TestUpstreamResponseBodySize) { + // Test with response headers and body. + setup(false, ""); + + EXPECT_CALL(*codec_, dispatch(_)).WillOnce(Invoke([&](Buffer::Instance&) -> Http::Status { + RequestDecoder* decoder = &conn_manager_->newStream(response_encoder_); + RequestHeaderMapPtr headers{ + new TestRequestHeaderMapImpl{{":authority", "host"}, {":path", "/"}, {":method", "GET"}}}; + decoder->decodeHeaders(std::move(headers), false); + + Buffer::OwnedImpl fake_data("1234"); + decoder->decodeData(fake_data, true); + + return Http::okStatus(); + })); + + setupFilterChain(1, 0); + + EXPECT_CALL(*decoder_filters_[0], decodeHeaders(_, false)) + .WillOnce(Return(FilterHeadersStatus::StopIteration)); + EXPECT_CALL(*decoder_filters_[0], decodeData(_, true)) + .WillOnce(Return(FilterDataStatus::StopIterationNoBuffer)); + + EXPECT_CALL(*decoder_filters_[0], decodeComplete()); + + std::shared_ptr> host_{ + new NiceMock()}; + filter_callbacks_.upstreamHost(host_); + + EXPECT_CALL( + host_->cluster_.request_response_size_stats_store_, + deliverHistogramToSinks(Property(&Stats::Metric::name, "upstream_rq_headers_size"), 30)); + EXPECT_CALL( + host_->cluster_.request_response_size_stats_store_, + deliverHistogramToSinks(Property(&Stats::Metric::name, "upstream_rs_headers_size"), 49)); + EXPECT_CALL(host_->cluster_.request_response_size_stats_store_, + deliverHistogramToSinks(Property(&Stats::Metric::name, "upstream_rq_body_size"), 4)); + EXPECT_CALL(host_->cluster_.request_response_size_stats_store_, + deliverHistogramToSinks(Property(&Stats::Metric::name, "upstream_rs_body_size"), 11)); + + Buffer::OwnedImpl fake_input("1234"); + conn_manager_->onData(fake_input, false); + + EXPECT_CALL(response_encoder_, encodeHeaders(_, false)); + + decoder_filters_[0]->callbacks_->encodeHeaders( + ResponseHeaderMapPtr{new TestResponseHeaderMapImpl{{":status", "200"}}}, false); + + EXPECT_CALL(response_encoder_, encodeData(_, true)); + expectOnDestroy(); + + Buffer::OwnedImpl fake_response("hello-world"); + decoder_filters_[0]->callbacks_->encodeData(fake_response, true); +} + TEST_F(HttpConnectionManagerImplTest, HeaderOnlyRequestAndResponseUsingHttp3) { setup(false, "envoy-custom-server", false); diff --git a/test/common/upstream/upstream_impl_test.cc b/test/common/upstream/upstream_impl_test.cc index 624637edcbee..2adbf136be49 100644 --- a/test/common/upstream/upstream_impl_test.cc +++ b/test/common/upstream/upstream_impl_test.cc @@ -13,6 +13,7 @@ #include "envoy/http/codec.h" #include "envoy/stats/scope.h" #include "envoy/upstream/cluster_manager.h" +#include "envoy/upstream/upstream.h" #include "common/config/metadata.h" #include "common/network/utility.h" @@ -2411,6 +2412,63 @@ TEST_F(ClusterInfoImplTest, OneofExtensionProtocolOptionsForUnknownFilter) { "Only one of typed_extension_protocol_options or " "extension_protocol_options can be specified"); } + +TEST_F(ClusterInfoImplTest, TestTrackRequestResponseSizesNotSetInConfig) { + const std::string yaml_disabled = R"EOF( + name: name + connect_timeout: 0.25s + type: STRICT_DNS + lb_policy: ROUND_ROBIN + )EOF"; + + auto cluster = makeCluster(yaml_disabled); + // By default, histograms tracking request/response sizes are not published. + EXPECT_FALSE(cluster->info()->requestResponseSizeStats().has_value()); + + const std::string yaml_disabled2 = R"EOF( + name: name + connect_timeout: 0.25s + type: STRICT_DNS + lb_policy: ROUND_ROBIN + track_cluster_stats: { timeout_budgets : true } + )EOF"; + + cluster = makeCluster(yaml_disabled2); + EXPECT_FALSE(cluster->info()->requestResponseSizeStats().has_value()); + + const std::string yaml_disabled3 = R"EOF( + name: name + connect_timeout: 0.25s + type: STRICT_DNS + lb_policy: ROUND_ROBIN + track_cluster_stats: { request_response_sizes : false } + )EOF"; + + cluster = makeCluster(yaml_disabled3); + EXPECT_FALSE(cluster->info()->requestResponseSizeStats().has_value()); +} + +TEST_F(ClusterInfoImplTest, TestTrackRequestResponseSizes) { + const std::string yaml = R"EOF( + name: name + connect_timeout: 0.25s + type: STRICT_DNS + lb_policy: ROUND_ROBIN + track_cluster_stats: { request_response_sizes : true } + )EOF"; + + auto cluster = makeCluster(yaml); + // The stats should be created. + ASSERT_TRUE(cluster->info()->requestResponseSizeStats().has_value()); + + Upstream::ClusterRequestResponseSizeStats req_resp_stats = + cluster->info()->requestResponseSizeStats()->get(); + + EXPECT_EQ(Stats::Histogram::Unit::Bytes, req_resp_stats.upstream_rq_headers_size_.unit()); + EXPECT_EQ(Stats::Histogram::Unit::Bytes, req_resp_stats.upstream_rq_body_size_.unit()); + EXPECT_EQ(Stats::Histogram::Unit::Bytes, req_resp_stats.upstream_rs_body_size_.unit()); +} + TEST_F(ClusterInfoImplTest, TestTrackRemainingResourcesGauges) { const std::string yaml = R"EOF( name: name @@ -2502,7 +2560,64 @@ TEST_F(ClusterInfoImplTest, Timeouts) { EXPECT_FALSE(cluster3->info()->idleTimeout().has_value()); } +TEST_F(ClusterInfoImplTest, TestTrackTimeoutBudgetsNotSetInConfig) { + // Check that without the flag specified, the histogram is null. + const std::string yaml_disabled = R"EOF( + name: name + connect_timeout: 0.25s + type: STRICT_DNS + lb_policy: ROUND_ROBIN + )EOF"; + + auto cluster = makeCluster(yaml_disabled); + // The stats will be null if they have not been explicitly turned on. + EXPECT_FALSE(cluster->info()->timeoutBudgetStats().has_value()); + + const std::string yaml_disabled2 = R"EOF( + name: name + connect_timeout: 0.25s + type: STRICT_DNS + lb_policy: ROUND_ROBIN + track_cluster_stats: { request_response_sizes : true } + )EOF"; + + cluster = makeCluster(yaml_disabled2); + EXPECT_FALSE(cluster->info()->timeoutBudgetStats().has_value()); + + const std::string yaml_disabled3 = R"EOF( + name: name + connect_timeout: 0.25s + type: STRICT_DNS + lb_policy: ROUND_ROBIN + track_cluster_stats: { timeout_budgets : false } + )EOF"; + + cluster = makeCluster(yaml_disabled3); + EXPECT_FALSE(cluster->info()->timeoutBudgetStats().has_value()); +} + TEST_F(ClusterInfoImplTest, TestTrackTimeoutBudgets) { + // Check that with the flag, the histogram is created. + const std::string yaml = R"EOF( + name: name + connect_timeout: 0.25s + type: STRICT_DNS + lb_policy: ROUND_ROBIN + track_cluster_stats: { timeout_budgets : true } + )EOF"; + + auto cluster = makeCluster(yaml); + // The stats should be created. + ASSERT_TRUE(cluster->info()->timeoutBudgetStats().has_value()); + + Upstream::ClusterTimeoutBudgetStats tb_stats = cluster->info()->timeoutBudgetStats()->get(); + EXPECT_EQ(Stats::Histogram::Unit::Unspecified, + tb_stats.upstream_rq_timeout_budget_percent_used_.unit()); + EXPECT_EQ(Stats::Histogram::Unit::Unspecified, + tb_stats.upstream_rq_timeout_budget_per_try_percent_used_.unit()); +} + +TEST_F(ClusterInfoImplTest, DEPRECATED_FEATURE_TEST(TestTrackTimeoutBudgetsOld)) { // Check that without the flag specified, the histogram is null. const std::string yaml_disabled = R"EOF( name: name @@ -2526,9 +2641,13 @@ TEST_F(ClusterInfoImplTest, TestTrackTimeoutBudgets) { cluster = makeCluster(yaml); // The stats should be created. - EXPECT_TRUE(cluster->info()->timeoutBudgetStats().has_value()); + ASSERT_TRUE(cluster->info()->timeoutBudgetStats().has_value()); + + Upstream::ClusterTimeoutBudgetStats tb_stats = cluster->info()->timeoutBudgetStats()->get(); + EXPECT_EQ(Stats::Histogram::Unit::Unspecified, + tb_stats.upstream_rq_timeout_budget_percent_used_.unit()); EXPECT_EQ(Stats::Histogram::Unit::Unspecified, - cluster->info()->timeoutBudgetStats()->upstream_rq_timeout_budget_percent_used_.unit()); + tb_stats.upstream_rq_timeout_budget_per_try_percent_used_.unit()); } // Validates HTTP2 SETTINGS config. diff --git a/test/extensions/upstreams/http/tcp/upstream_request_test.cc b/test/extensions/upstreams/http/tcp/upstream_request_test.cc index 114648c00b00..cb5ee535357d 100644 --- a/test/extensions/upstreams/http/tcp/upstream_request_test.cc +++ b/test/extensions/upstreams/http/tcp/upstream_request_test.cc @@ -35,7 +35,8 @@ class MockRouterFilterInterface : public RouterFilterInterface { MockRouterFilterInterface() : config_("prefix.", context_, ShadowWriterPtr(new MockShadowWriter()), router_proto) { auto cluster_info = new NiceMock(); - cluster_info->timeout_budget_stats_ = absl::nullopt; + cluster_info->timeout_budget_stats_ = nullptr; + ON_CALL(*cluster_info, timeoutBudgetStats()).WillByDefault(Return(absl::nullopt)); cluster_info_.reset(cluster_info); ON_CALL(*this, callbacks()).WillByDefault(Return(&callbacks_)); ON_CALL(*this, config()).WillByDefault(ReturnRef(config_)); diff --git a/test/integration/stats_integration_test.cc b/test/integration/stats_integration_test.cc index 9b339b8e4dc1..8d4fe14d4d41 100644 --- a/test/integration/stats_integration_test.cc +++ b/test/integration/stats_integration_test.cc @@ -284,6 +284,8 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithFakeSymbolTable) { // in callback manager. // 2020/07/07 11252 44971 46000 Introduce Least Request LB active request bias config // 2020/07/15 11748 45003 46000 Stream error on invalid messaging + // 2020/07/20 11559 44747 46000 stats: add histograms for request/response headers + // and body sizes. // Note: when adjusting this value: EXPECT_MEMORY_EQ is active only in CI // 'release' builds, where we control the platform and tool-chain. So you @@ -301,7 +303,7 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithFakeSymbolTable) { // We only run the exact test for ipv6 because ipv4 in some cases may allocate a // different number of bytes. We still run the approximate test. if (ip_version_ != Network::Address::IpVersion::v6) { - EXPECT_MEMORY_EQ(m_per_cluster, 45003); + EXPECT_MEMORY_EQ(m_per_cluster, 44747); } EXPECT_MEMORY_LE(m_per_cluster, 46000); // Round up to allow platform variations. } @@ -357,6 +359,8 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithRealSymbolTable) { // in callback manager. // 2020/07/07 11252 37083 38000 Introduce Least Request LB active request bias config // 2020/07/15 11748 37115 38000 Stream error on invalid messaging + // 2020/07/20 11559 36859 38000 stats: add histograms for request/response headers + // and body sizes. // Note: when adjusting this value: EXPECT_MEMORY_EQ is active only in CI // 'release' builds, where we control the platform and tool-chain. So you @@ -374,7 +378,7 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithRealSymbolTable) { // We only run the exact test for ipv6 because ipv4 in some cases may allocate a // different number of bytes. We still run the approximate test. if (ip_version_ != Network::Address::IpVersion::v6) { - EXPECT_MEMORY_EQ(m_per_cluster, 37115); + EXPECT_MEMORY_EQ(m_per_cluster, 36859); } EXPECT_MEMORY_LE(m_per_cluster, 38000); // Round up to allow platform variations. } diff --git a/test/mocks/upstream/cluster_info.cc b/test/mocks/upstream/cluster_info.cc index 168395895ec7..e45cfe0bc521 100644 --- a/test/mocks/upstream/cluster_info.cc +++ b/test/mocks/upstream/cluster_info.cc @@ -42,7 +42,9 @@ MockClusterInfo::MockClusterInfo() stats_(ClusterInfoImpl::generateStats(stats_store_)), transport_socket_matcher_(new NiceMock()), load_report_stats_(ClusterInfoImpl::generateLoadReportStats(load_report_stats_store_)), - timeout_budget_stats_(absl::make_optional( + request_response_size_stats_(std::make_unique( + ClusterInfoImpl::generateRequestResponseSizeStats(request_response_size_stats_store_))), + timeout_budget_stats_(std::make_unique( ClusterInfoImpl::generateTimeoutBudgetStats(timeout_budget_stats_store_))), circuit_breakers_stats_( ClusterInfoImpl::generateCircuitBreakersStats(stats_store_, "default", true)), @@ -71,7 +73,12 @@ MockClusterInfo::MockClusterInfo() .WillByDefault( Invoke([this]() -> TransportSocketMatcher& { return *transport_socket_matcher_; })); ON_CALL(*this, loadReportStats()).WillByDefault(ReturnRef(load_report_stats_)); - ON_CALL(*this, timeoutBudgetStats()).WillByDefault(ReturnRef(timeout_budget_stats_)); + ON_CALL(*this, requestResponseSizeStats()) + .WillByDefault(Return( + std::reference_wrapper(*request_response_size_stats_))); + ON_CALL(*this, timeoutBudgetStats()) + .WillByDefault( + Return(std::reference_wrapper(*timeout_budget_stats_))); ON_CALL(*this, sourceAddress()).WillByDefault(ReturnRef(source_address_)); ON_CALL(*this, resourceManager(_)) .WillByDefault(Invoke( diff --git a/test/mocks/upstream/cluster_info.h b/test/mocks/upstream/cluster_info.h index f8bbe8363a81..e8f3d47869ca 100644 --- a/test/mocks/upstream/cluster_info.h +++ b/test/mocks/upstream/cluster_info.h @@ -119,7 +119,8 @@ class MockClusterInfo : public ClusterInfo { MOCK_METHOD(ClusterStats&, stats, (), (const)); MOCK_METHOD(Stats::Scope&, statsScope, (), (const)); MOCK_METHOD(ClusterLoadReportStats&, loadReportStats, (), (const)); - MOCK_METHOD(absl::optional&, timeoutBudgetStats, (), (const)); + MOCK_METHOD(ClusterRequestResponseSizeStatsOptRef, requestResponseSizeStats, (), (const)); + MOCK_METHOD(ClusterTimeoutBudgetStatsOptRef, timeoutBudgetStats, (), (const)); MOCK_METHOD(const Network::Address::InstanceConstSharedPtr&, sourceAddress, (), (const)); MOCK_METHOD(const LoadBalancerSubsetInfo&, lbSubsetInfo, (), (const)); MOCK_METHOD(const envoy::config::core::v3::Metadata&, metadata, (), (const)); @@ -150,8 +151,10 @@ class MockClusterInfo : public ClusterInfo { Upstream::TransportSocketMatcherPtr transport_socket_matcher_; NiceMock load_report_stats_store_; ClusterLoadReportStats load_report_stats_; + NiceMock request_response_size_stats_store_; + ClusterRequestResponseSizeStatsPtr request_response_size_stats_; NiceMock timeout_budget_stats_store_; - absl::optional timeout_budget_stats_; + ClusterTimeoutBudgetStatsPtr timeout_budget_stats_; ClusterCircuitBreakersStats circuit_breakers_stats_; NiceMock runtime_; std::unique_ptr resource_manager_;