From a90028aa541eb12939bf75b4be386e443e784e9e Mon Sep 17 00:00:00 2001 From: Kamil Kisiela Date: Thu, 21 Jul 2022 14:39:23 +0200 Subject: [PATCH] graphql: metrics for validation and parsing phases --- graph/src/components/graphql.rs | 8 ++ graph/src/lib.rs | 2 +- graphql/src/execution/query.rs | 5 +- graphql/src/lib.rs | 6 +- graphql/src/metrics.rs | 136 +++++++++++++++++++++++++++++++ graphql/src/runner.rs | 74 ++++------------- graphql/src/store/prefetch.rs | 6 +- graphql/src/store/resolver.rs | 14 ++-- graphql/src/subscription/mod.rs | 27 +++--- graphql/tests/introspection.rs | 12 ++- graphql/tests/query.rs | 4 +- node/src/main.rs | 10 +-- server/http/src/server.rs | 14 +--- server/http/src/service.rs | 92 +++++---------------- server/http/tests/server.rs | 28 ++++--- server/index-node/src/service.rs | 18 +++- store/test-store/src/store.rs | 11 +-- 17 files changed, 271 insertions(+), 196 deletions(-) create mode 100644 graphql/src/metrics.rs diff --git a/graph/src/components/graphql.rs b/graph/src/components/graphql.rs index dadcac02ea7..78c5472cf71 100644 --- a/graph/src/components/graphql.rs +++ b/graph/src/components/graphql.rs @@ -43,6 +43,14 @@ pub trait GraphQlRunner: Send + Sync + 'static { ) -> Result; fn load_manager(&self) -> Arc; + + fn metrics(&self) -> Arc; +} + +pub trait GraphQLMetrics: Send + Sync + 'static { + fn observe_query_execution(&self, duration: Duration, results: &QueryResults); + fn observe_query_parsing(&self, duration: Duration, results: &QueryResults); + fn observe_query_validation(&self, duration: Duration, id: &DeploymentHash); } #[async_trait] diff --git a/graph/src/lib.rs b/graph/src/lib.rs index b4435dfc9c5..451a33356cc 100644 --- a/graph/src/lib.rs +++ b/graph/src/lib.rs @@ -103,7 +103,7 @@ pub mod prelude { LightEthereumBlockExt, }; pub use crate::components::graphql::{ - GraphQlRunner, QueryLoadManager, SubscriptionResultFuture, + GraphQLMetrics, GraphQlRunner, QueryLoadManager, SubscriptionResultFuture, }; pub use crate::components::link_resolver::{JsonStreamValue, JsonValueStream, LinkResolver}; pub use crate::components::metrics::{ diff --git a/graphql/src/execution/query.rs b/graphql/src/execution/query.rs index 71b7713b396..f0d4edd5b82 100644 --- a/graphql/src/execution/query.rs +++ b/graphql/src/execution/query.rs @@ -18,7 +18,7 @@ use graph::data::query::QueryExecutionError; use graph::data::query::{Query as GraphDataQuery, QueryVariables}; use graph::data::schema::ApiSchema; use graph::prelude::{ - info, o, q, r, s, warn, BlockNumber, CheapClone, Logger, TryFromValue, ENV_VARS, + info, o, q, r, s, warn, BlockNumber, CheapClone, GraphQLMetrics, Logger, TryFromValue, ENV_VARS, }; use crate::execution::ast as a; @@ -204,8 +204,11 @@ impl Query { query: GraphDataQuery, max_complexity: Option, max_depth: u8, + metrics: Arc, ) -> Result, Vec> { + let validation_phase_start = Instant::now(); validate_query(logger, &query, &schema.document())?; + metrics.observe_query_validation(validation_phase_start.elapsed(), schema.id()); let mut operation = None; let mut fragments = HashMap::new(); diff --git a/graphql/src/lib.rs b/graphql/src/lib.rs index 2979da473e4..310f58a98d4 100644 --- a/graphql/src/lib.rs +++ b/graphql/src/lib.rs @@ -24,6 +24,9 @@ mod store; /// The external interface for actually running queries mod runner; +/// Utilities for working with Prometheus. +mod metrics; + /// Prelude that exports the most important traits and types. pub mod prelude { pub use super::execution::{ast as a, ExecutionContext, Query, Resolver}; @@ -34,12 +37,13 @@ pub mod prelude { pub use super::subscription::SubscriptionExecutionOptions; pub use super::values::MaybeCoercible; + pub use super::metrics::GraphQLMetrics; pub use super::runner::GraphQlRunner; pub use graph::prelude::s::ObjectType; } #[cfg(debug_assertions)] pub mod test_support { - pub use super::runner::ResultSizeMetrics; + pub use super::metrics::GraphQLMetrics; pub use super::runner::INITIAL_DEPLOYMENT_STATE_FOR_TESTS; } diff --git a/graphql/src/metrics.rs b/graphql/src/metrics.rs new file mode 100644 index 00000000000..5163e8cc0ba --- /dev/null +++ b/graphql/src/metrics.rs @@ -0,0 +1,136 @@ +use std::collections::HashMap; +use std::fmt; +use std::sync::Arc; +use std::time::Duration; + +use graph::data::query::QueryResults; +use graph::prelude::{DeploymentHash, GraphQLMetrics as GraphQLMetricsTrait, MetricsRegistry}; +use graph::prometheus::{Gauge, Histogram, HistogramVec}; + +pub struct GraphQLMetrics { + query_execution_time: Box, + query_parsing_time: Box, + query_validation_time: Box, + query_result_size: Box, + query_result_size_max: Box, +} + +impl fmt::Debug for GraphQLMetrics { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "GraphQLMetrics {{ }}") + } +} + +impl GraphQLMetricsTrait for GraphQLMetrics { + fn observe_query_execution(&self, duration: Duration, results: &QueryResults) { + let id = results + .deployment_hash() + .map(|h| h.as_str()) + .unwrap_or_else(|| { + if results.not_found() { + "notfound" + } else { + "unknown" + } + }); + let status = if results.has_errors() { + "failed" + } else { + "success" + }; + self.query_execution_time + .with_label_values(&[id, status]) + .observe(duration.as_secs_f64()); + } + + fn observe_query_parsing(&self, duration: Duration, results: &QueryResults) { + let id = results + .deployment_hash() + .map(|h| h.as_str()) + .unwrap_or_else(|| { + if results.not_found() { + "notfound" + } else { + "unknown" + } + }); + self.query_parsing_time + .with_label_values(&[id]) + .observe(duration.as_secs_f64()); + } + + fn observe_query_validation(&self, duration: Duration, id: &DeploymentHash) { + self.query_validation_time + .with_label_values(&[id.as_str()]) + .observe(duration.as_secs_f64()); + } +} + +impl GraphQLMetrics { + pub fn new(registry: Arc) -> Self { + let query_execution_time = registry + .new_histogram_vec( + "query_execution_time", + "Execution time for successful GraphQL queries", + vec![String::from("deployment"), String::from("status")], + vec![0.1, 0.5, 1.0, 10.0, 100.0], + ) + .expect("failed to create `query_execution_time` histogram"); + let query_parsing_time = registry + .new_histogram_vec( + "query_parsing_time", + "Parsing time for GraphQL queries", + vec![String::from("deployment")], + vec![0.1, 0.5, 1.0, 10.0, 100.0], + ) + .expect("failed to create `query_parsing_time` histogram"); + + let query_validation_time = registry + .new_histogram_vec( + "query_validation_time", + "Validation time for GraphQL queries", + vec![String::from("deployment")], + vec![0.1, 0.5, 1.0, 10.0, 100.0], + ) + .expect("failed to create `query_validation_time` histogram"); + + let bins = (10..32).map(|n| 2u64.pow(n) as f64).collect::>(); + let query_result_size = registry + .new_histogram( + "query_result_size", + "the size of the result of successful GraphQL queries (in CacheWeight)", + bins, + ) + .unwrap(); + + let query_result_size_max = registry + .new_gauge( + "query_result_max", + "the maximum size of a query result (in CacheWeight)", + HashMap::new(), + ) + .unwrap(); + + Self { + query_execution_time, + query_parsing_time, + query_validation_time, + query_result_size, + query_result_size_max, + } + } + + // Tests need to construct one of these, but normal code doesn't + #[cfg(debug_assertions)] + pub fn make(registry: Arc) -> Self { + Self::new(registry) + } + + pub fn observe_query_result_size(&self, size: usize) { + let size = size as f64; + self.query_result_size.observe(size); + if self.query_result_size_max.get() < size { + self.query_result_size_max.set(size); + } + } +} diff --git a/graphql/src/runner.rs b/graphql/src/runner.rs index 90410eae8ba..b83943159a9 100644 --- a/graphql/src/runner.rs +++ b/graphql/src/runner.rs @@ -1,17 +1,17 @@ -use std::collections::HashMap; use std::sync::Arc; use std::time::Instant; +use crate::metrics::GraphQLMetrics; use crate::prelude::{QueryExecutionOptions, StoreResolver, SubscriptionExecutionOptions}; use crate::query::execute_query; use crate::subscription::execute_prepared_subscription; use graph::prelude::MetricsRegistry; -use graph::prometheus::{Gauge, Histogram}; use graph::{ components::store::SubscriptionManager, prelude::{ - async_trait, o, CheapClone, DeploymentState, GraphQlRunner as GraphQlRunnerTrait, Logger, - Query, QueryExecutionError, Subscription, SubscriptionError, SubscriptionResult, ENV_VARS, + async_trait, o, CheapClone, DeploymentState, GraphQLMetrics as GraphQLMetricsTrait, + GraphQlRunner as GraphQlRunnerTrait, Logger, Query, QueryExecutionError, Subscription, + SubscriptionError, SubscriptionResult, ENV_VARS, }, }; use graph::{data::graphql::effort::LoadManager, prelude::QueryStoreManager}; @@ -20,59 +20,13 @@ use graph::{ prelude::QueryStore, }; -pub struct ResultSizeMetrics { - histogram: Box, - max_gauge: Box, -} - -impl ResultSizeMetrics { - fn new(registry: Arc) -> Self { - // Divide the Histogram into exponentially sized buckets between 1k and 4G - let bins = (10..32).map(|n| 2u64.pow(n) as f64).collect::>(); - let histogram = registry - .new_histogram( - "query_result_size", - "the size of the result of successful GraphQL queries (in CacheWeight)", - bins, - ) - .unwrap(); - - let max_gauge = registry - .new_gauge( - "query_result_max", - "the maximum size of a query result (in CacheWeight)", - HashMap::new(), - ) - .unwrap(); - - Self { - histogram, - max_gauge, - } - } - - // Tests need to construct one of these, but normal code doesn't - #[cfg(debug_assertions)] - pub fn make(registry: Arc) -> Self { - Self::new(registry) - } - - pub fn observe(&self, size: usize) { - let size = size as f64; - self.histogram.observe(size); - if self.max_gauge.get() < size { - self.max_gauge.set(size); - } - } -} - /// GraphQL runner implementation for The Graph. pub struct GraphQlRunner { logger: Logger, store: Arc, subscription_manager: Arc, load_manager: Arc, - result_size: Arc, + graphql_metrics: Arc, } #[cfg(debug_assertions)] @@ -95,13 +49,13 @@ where registry: Arc, ) -> Self { let logger = logger.new(o!("component" => "GraphQlRunner")); - let result_size = Arc::new(ResultSizeMetrics::new(registry)); + let graphql_metrics = Arc::new(GraphQLMetrics::new(registry)); GraphQlRunner { logger, store, subscription_manager, load_manager, - result_size, + graphql_metrics, } } @@ -143,7 +97,7 @@ where max_depth: Option, max_first: Option, max_skip: Option, - result_size: Arc, + metrics: Arc, ) -> Result { // We need to use the same `QueryStore` for the entire query to ensure // we have a consistent view if the world, even when replicas, which @@ -175,6 +129,7 @@ where query, max_complexity, max_depth, + metrics.cheap_clone(), )?; self.load_manager .decide( @@ -197,7 +152,7 @@ where bc, error_policy, query.schema.id().clone(), - result_size.cheap_clone(), + metrics.cheap_clone(), ) .await?; max_block = max_block.max(resolver.block_number()); @@ -259,7 +214,7 @@ where max_depth, max_first, max_skip, - self.result_size.cheap_clone(), + self.graphql_metrics.clone(), ) .await .unwrap_or_else(|e| e) @@ -281,6 +236,7 @@ where subscription.query, ENV_VARS.graphql.max_complexity, ENV_VARS.graphql.max_depth, + self.graphql_metrics.cheap_clone(), )?; if let Err(err) = self @@ -306,7 +262,7 @@ where max_depth: ENV_VARS.graphql.max_depth, max_first: ENV_VARS.graphql.max_first, max_skip: ENV_VARS.graphql.max_skip, - result_size: self.result_size.clone(), + graphql_metrics: self.graphql_metrics.clone(), }, ) } @@ -314,4 +270,8 @@ where fn load_manager(&self) -> Arc { self.load_manager.clone() } + + fn metrics(&self) -> Arc { + self.graphql_metrics.clone() + } } diff --git a/graphql/src/store/prefetch.rs b/graphql/src/store/prefetch.rs index 0866ca9bf3d..a4e0e122da9 100644 --- a/graphql/src/store/prefetch.rs +++ b/graphql/src/store/prefetch.rs @@ -24,7 +24,7 @@ use graph::{ }; use crate::execution::{ast as a, ExecutionContext, Resolver}; -use crate::runner::ResultSizeMetrics; +use crate::metrics::GraphQLMetrics; use crate::schema::ast as sast; use crate::store::query::build_query; use crate::store::StoreResolver; @@ -480,10 +480,10 @@ pub fn run( resolver: &StoreResolver, ctx: &ExecutionContext, selection_set: &a::SelectionSet, - result_size: &ResultSizeMetrics, + graphql_metrics: &GraphQLMetrics, ) -> Result> { execute_root_selection_set(resolver, ctx, selection_set).map(|nodes| { - result_size.observe(nodes.weight()); + graphql_metrics.observe_query_result_size(nodes.weight()); let obj = Object::from_iter( nodes .into_iter() diff --git a/graphql/src/store/resolver.rs b/graphql/src/store/resolver.rs index a9781e9ac5c..75ff96b4739 100644 --- a/graphql/src/store/resolver.rs +++ b/graphql/src/store/resolver.rs @@ -11,8 +11,8 @@ use graph::prelude::*; use graph::{components::store::*, data::schema::BLOCK_FIELD_TYPE}; use crate::execution::ast as a; +use crate::metrics::GraphQLMetrics; use crate::query::ext::BlockConstraint; -use crate::runner::ResultSizeMetrics; use crate::schema::ast as sast; use crate::{prelude::*, schema::api::ErrorPolicy}; @@ -29,7 +29,7 @@ pub struct StoreResolver { deployment: DeploymentHash, has_non_fatal_errors: bool, error_policy: ErrorPolicy, - result_size: Arc, + graphql_metrics: Arc, } impl CheapClone for StoreResolver {} @@ -44,7 +44,7 @@ impl StoreResolver { deployment: DeploymentHash, store: Arc, subscription_manager: Arc, - result_size: Arc, + graphql_metrics: Arc, ) -> Self { StoreResolver { logger: logger.new(o!("component" => "StoreResolver")), @@ -56,7 +56,7 @@ impl StoreResolver { // Checking for non-fatal errors does not work with subscriptions. has_non_fatal_errors: false, error_policy: ErrorPolicy::Deny, - result_size, + graphql_metrics, } } @@ -73,7 +73,7 @@ impl StoreResolver { bc: BlockConstraint, error_policy: ErrorPolicy, deployment: DeploymentHash, - result_size: Arc, + graphql_metrics: Arc, ) -> Result { let store_clone = store.cheap_clone(); let block_ptr = Self::locate_block(store_clone.as_ref(), bc, state).await?; @@ -90,7 +90,7 @@ impl StoreResolver { deployment, has_non_fatal_errors, error_policy, - result_size, + graphql_metrics, }; Ok(resolver) } @@ -221,7 +221,7 @@ impl Resolver for StoreResolver { ctx: &ExecutionContext, selection_set: &a::SelectionSet, ) -> Result, Vec> { - super::prefetch::run(self, ctx, selection_set, &self.result_size).map(Some) + super::prefetch::run(self, ctx, selection_set, &self.graphql_metrics).map(Some) } fn resolve_objects( diff --git a/graphql/src/subscription/mod.rs b/graphql/src/subscription/mod.rs index 6f6dbe7cff5..a6d405dbc0c 100644 --- a/graphql/src/subscription/mod.rs +++ b/graphql/src/subscription/mod.rs @@ -4,7 +4,7 @@ use std::time::{Duration, Instant}; use graph::components::store::UnitStream; use graph::{components::store::SubscriptionManager, prelude::*}; -use crate::runner::ResultSizeMetrics; +use crate::metrics::GraphQLMetrics; use crate::{ execution::ast as a, execution::*, @@ -37,7 +37,7 @@ pub struct SubscriptionExecutionOptions { /// Maximum value for the `skip` argument. pub max_skip: u32, - pub result_size: Arc, + pub graphql_metrics: Arc, } pub fn execute_subscription( @@ -52,6 +52,7 @@ pub fn execute_subscription( subscription.query, options.max_complexity, options.max_depth, + options.graphql_metrics.cheap_clone(), )?; execute_prepared_subscription(query, options) } @@ -86,7 +87,7 @@ fn create_source_event_stream( query.schema.id().clone(), options.store.clone(), options.subscription_manager.cheap_clone(), - options.result_size.cheap_clone(), + options.graphql_metrics.cheap_clone(), ); let ctx = ExecutionContext { logger: options.logger.cheap_clone(), @@ -152,7 +153,7 @@ fn map_source_to_response_stream( max_depth: _, max_first, max_skip, - result_size, + graphql_metrics, } = options; trigger_stream @@ -166,7 +167,7 @@ fn map_source_to_response_stream( timeout, max_first, max_skip, - result_size.cheap_clone(), + graphql_metrics.cheap_clone(), ) .boxed() }) @@ -181,14 +182,14 @@ async fn execute_subscription_event( timeout: Option, max_first: u32, max_skip: u32, - result_size: Arc, + metrics: Arc, ) -> Arc { async fn make_resolver( store: Arc, logger: &Logger, subscription_manager: Arc, query: &Arc, - result_size: Arc, + metrics: Arc, ) -> Result { let state = store.deployment_state().await?; StoreResolver::at_block( @@ -199,16 +200,16 @@ async fn execute_subscription_event( BlockConstraint::Latest, ErrorPolicy::Deny, query.schema.id().clone(), - result_size, + metrics, ) .await } - let resolver = - match make_resolver(store, &logger, subscription_manager, &query, result_size).await { - Ok(resolver) => resolver, - Err(e) => return Arc::new(e.into()), - }; + let resolver = match make_resolver(store, &logger, subscription_manager, &query, metrics).await + { + Ok(resolver) => resolver, + Err(e) => return Arc::new(e.into()), + }; let block_ptr = resolver.block_ptr.clone(); diff --git a/graphql/tests/introspection.rs b/graphql/tests/introspection.rs index d72f05f99a5..06283ab0f22 100644 --- a/graphql/tests/introspection.rs +++ b/graphql/tests/introspection.rs @@ -12,6 +12,7 @@ use graph_graphql::prelude::{ a, api_schema, execute_query, ExecutionContext, Query as PreparedQuery, QueryExecutionOptions, Resolver, }; +use test_store::graphql_metrics; use test_store::LOAD_MANAGER; /// Mock resolver used in tests that don't need a resolver. @@ -571,10 +572,13 @@ async fn introspection_query(schema: Schema, query: &str) -> QueryResult { }; let schema = Arc::new(ApiSchema::from_api_schema(schema).unwrap()); - let result = match PreparedQuery::new(&logger, schema, None, query, None, 100) { - Ok(query) => Ok(Arc::try_unwrap(execute_query(query, None, None, options).await).unwrap()), - Err(e) => Err(e), - }; + let result = + match PreparedQuery::new(&logger, schema, None, query, None, 100, graphql_metrics()) { + Ok(query) => { + Ok(Arc::try_unwrap(execute_query(query, None, None, options).await).unwrap()) + } + Err(e) => Err(e), + }; QueryResult::from(result) } diff --git a/graphql/tests/query.rs b/graphql/tests/query.rs index 125f1956c61..8681a5f2cc3 100644 --- a/graphql/tests/query.rs +++ b/graphql/tests/query.rs @@ -33,7 +33,7 @@ use graph::{ }; use graph_graphql::{prelude::*, subscription::execute_subscription}; use test_store::{ - deployment_state, execute_subgraph_query_with_deadline, result_size_metrics, revert_block, + deployment_state, execute_subgraph_query_with_deadline, graphql_metrics, revert_block, run_test_sequentially, transact_errors, Store, BLOCK_ONE, GENESIS_PTR, LOAD_MANAGER, LOGGER, METRICS_REGISTRY, STORE, SUBSCRIPTION_MANAGER, }; @@ -405,7 +405,7 @@ async fn run_subscription( max_depth: 100, max_first: std::u32::MAX, max_skip: std::u32::MAX, - result_size: result_size_metrics(), + graphql_metrics: graphql_metrics(), }; let schema = STORE.subgraph_store().api_schema(&deployment.hash).unwrap(); diff --git a/node/src/main.rs b/node/src/main.rs index 23e37429ccf..ce2470073cc 100644 --- a/node/src/main.rs +++ b/node/src/main.rs @@ -329,14 +329,10 @@ async fn main() { network_store.clone(), subscription_manager.clone(), load_manager, - metrics_registry.clone(), - )); - let mut graphql_server = GraphQLQueryServer::new( - &logger_factory, graphql_metrics_registry, - graphql_runner.clone(), - node_id.clone(), - ); + )); + let mut graphql_server = + GraphQLQueryServer::new(&logger_factory, graphql_runner.clone(), node_id.clone()); let subscription_server = GraphQLSubscriptionServer::new(&logger, graphql_runner.clone(), network_store.clone()); diff --git a/server/http/src/server.rs b/server/http/src/server.rs index 24133cb0667..e605f798c2c 100644 --- a/server/http/src/server.rs +++ b/server/http/src/server.rs @@ -3,7 +3,7 @@ use std::net::{Ipv4Addr, SocketAddrV4}; use hyper::service::make_service_fn; use hyper::Server; -use crate::service::{GraphQLService, GraphQLServiceMetrics}; +use crate::service::GraphQLService; use graph::prelude::{GraphQLServer as GraphQLServerTrait, *}; use thiserror::Error; @@ -23,19 +23,13 @@ impl From for GraphQLServeError { /// A GraphQL server based on Hyper. pub struct GraphQLServer { logger: Logger, - metrics: Arc, graphql_runner: Arc, node_id: NodeId, } impl GraphQLServer { /// Creates a new GraphQL server. - pub fn new( - logger_factory: &LoggerFactory, - metrics_registry: Arc, - graphql_runner: Arc, - node_id: NodeId, - ) -> Self { + pub fn new(logger_factory: &LoggerFactory, graphql_runner: Arc, node_id: NodeId) -> Self { let logger = logger_factory.component_logger( "GraphQLServer", Some(ComponentLoggerConfig { @@ -44,10 +38,8 @@ impl GraphQLServer { }), }), ); - let metrics = Arc::new(GraphQLServiceMetrics::new(metrics_registry)); GraphQLServer { logger, - metrics, graphql_runner, node_id, } @@ -78,12 +70,10 @@ where // incoming queries to the query sink. let logger_for_service = self.logger.clone(); let graphql_runner = self.graphql_runner.clone(); - let metrics = self.metrics.clone(); let node_id = self.node_id.clone(); let new_service = make_service_fn(move |_| { futures03::future::ok::<_, Error>(GraphQLService::new( logger_for_service.clone(), - metrics.clone(), graphql_runner.clone(), ws_port, node_id.clone(), diff --git a/server/http/src/service.rs b/server/http/src/service.rs index d8ac4b50f29..588f76ffb5b 100644 --- a/server/http/src/service.rs +++ b/server/http/src/service.rs @@ -1,11 +1,9 @@ use std::convert::TryFrom; -use std::fmt; use std::pin::Pin; use std::task::Context; use std::task::Poll; use std::time::Instant; -use graph::data::query::QueryResults; use graph::prelude::*; use graph::{components::server::query::GraphQLServerError, data::query::QueryTarget}; use http::header; @@ -18,54 +16,6 @@ use hyper::{Body, Method, Request, Response, StatusCode}; use crate::request::parse_graphql_request; -pub struct GraphQLServiceMetrics { - query_execution_time: Box, -} - -impl fmt::Debug for GraphQLServiceMetrics { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "GraphQLServiceMetrics {{ }}") - } -} - -impl GraphQLServiceMetrics { - pub fn new(registry: Arc) -> Self { - let query_execution_time = registry - .new_histogram_vec( - "query_execution_time", - "Execution time for successful GraphQL queries", - vec![String::from("deployment"), String::from("status")], - vec![0.1, 0.5, 1.0, 10.0, 100.0], - ) - .expect("failed to create `query_execution_time` histogram"); - - Self { - query_execution_time, - } - } - - pub fn observe_query(&self, duration: Duration, results: &QueryResults) { - let id = results - .deployment_hash() - .map(|h| h.as_str()) - .unwrap_or_else(|| { - if results.not_found() { - "notfound" - } else { - "unknown" - } - }); - let status = if results.has_errors() { - "failed" - } else { - "success" - }; - self.query_execution_time - .with_label_values(&[id, status]) - .observe(duration.as_secs_f64()); - } -} - pub type GraphQLServiceResult = Result, GraphQLServerError>; /// An asynchronous response to a GraphQL request. pub type GraphQLServiceResponse = @@ -75,7 +25,6 @@ pub type GraphQLServiceResponse = #[derive(Debug)] pub struct GraphQLService { logger: Logger, - metrics: Arc, graphql_runner: Arc, ws_port: u16, node_id: NodeId, @@ -85,7 +34,6 @@ impl Clone for GraphQLService { fn clone(&self) -> Self { Self { logger: self.logger.clone(), - metrics: self.metrics.clone(), graphql_runner: self.graphql_runner.clone(), ws_port: self.ws_port, node_id: self.node_id.clone(), @@ -98,16 +46,9 @@ where Q: GraphQlRunner, { /// Creates a new GraphQL service. - pub fn new( - logger: Logger, - metrics: Arc, - graphql_runner: Arc, - ws_port: u16, - node_id: NodeId, - ) -> Self { + pub fn new(logger: Logger, graphql_runner: Arc, ws_port: u16, node_id: NodeId) -> Self { GraphQLService { logger, - metrics, graphql_runner, ws_port, node_id, @@ -182,13 +123,13 @@ where request_body: Body, ) -> GraphQLServiceResult { let service = self.clone(); - let service_metrics = self.metrics.clone(); let start = Instant::now(); let body = hyper::body::to_bytes(request_body) .map_err(|_| GraphQLServerError::InternalError("Failed to read request body".into())) .await?; let query = parse_graphql_request(&body); + let query_parsing_time = start.elapsed(); let result = match query { Ok(query) => service.graphql_runner.run_query(query, target).await, @@ -196,7 +137,12 @@ where Err(e) => return Err(e), }; - service_metrics.observe_query(start.elapsed(), &result); + self.graphql_runner + .metrics() + .observe_query_parsing(query_parsing_time, &result); + self.graphql_runner + .metrics() + .observe_query_execution(start.elapsed(), &result); Ok(result.as_http_response()) } @@ -369,20 +315,26 @@ mod tests { query::{QueryResults, QueryTarget}, }; use graph::prelude::*; - use graph_mock::MockMetricsRegistry; use crate::test_utils; use super::GraphQLService; - use super::GraphQLServiceMetrics; /// A simple stupid query runner for testing. pub struct TestGraphQlRunner; + pub struct TestGraphQLMetrics; + lazy_static! { static ref USERS: DeploymentHash = DeploymentHash::new("users").unwrap(); } + impl GraphQLMetrics for TestGraphQLMetrics { + fn observe_query_execution(&self, _duration: Duration, _results: &QueryResults) {} + fn observe_query_parsing(&self, _duration: Duration, _results: &QueryResults) {} + fn observe_query_validation(&self, _duration: Duration, _id: &DeploymentHash) {} + } + #[async_trait] impl GraphQlRunner for TestGraphQlRunner { async fn run_query_with_complexity( @@ -418,18 +370,20 @@ mod tests { fn load_manager(&self) -> Arc { unimplemented!() } + + fn metrics(&self) -> Arc { + Arc::new(TestGraphQLMetrics) + } } #[test] fn posting_invalid_query_yields_error_response() { let logger = Logger::root(slog::Discard, o!()); - let metrics_registry = Arc::new(MockMetricsRegistry::new()); - let metrics = Arc::new(GraphQLServiceMetrics::new(metrics_registry)); let subgraph_id = USERS.clone(); let graphql_runner = Arc::new(TestGraphQlRunner); let node_id = NodeId::new("test").unwrap(); - let mut service = GraphQLService::new(logger, metrics, graphql_runner, 8001, node_id); + let mut service = GraphQLService::new(logger, graphql_runner, 8001, node_id); let request = Request::builder() .method(Method::POST) @@ -455,13 +409,11 @@ mod tests { #[tokio::test(flavor = "multi_thread")] async fn posting_valid_queries_yields_result_response() { let logger = Logger::root(slog::Discard, o!()); - let metrics_registry = Arc::new(MockMetricsRegistry::new()); - let metrics = Arc::new(GraphQLServiceMetrics::new(metrics_registry)); let subgraph_id = USERS.clone(); let graphql_runner = Arc::new(TestGraphQlRunner); let node_id = NodeId::new("test").unwrap(); - let mut service = GraphQLService::new(logger, metrics, graphql_runner, 8001, node_id); + let mut service = GraphQLService::new(logger, graphql_runner, 8001, node_id); let request = Request::builder() .method(Method::POST) diff --git a/server/http/tests/server.rs b/server/http/tests/server.rs index cde7b3d4c95..856a4e9aac2 100644 --- a/server/http/tests/server.rs +++ b/server/http/tests/server.rs @@ -14,6 +14,14 @@ use graph_server_http::GraphQLServer as HyperGraphQLServer; use tokio::time::sleep; +pub struct TestGraphQLMetrics; + +impl GraphQLMetrics for TestGraphQLMetrics { + fn observe_query_execution(&self, _duration: Duration, _results: &QueryResults) {} + fn observe_query_parsing(&self, _duration: Duration, _results: &QueryResults) {} + fn observe_query_validation(&self, _duration: Duration, _id: &DeploymentHash) {} +} + /// A simple stupid query runner for testing. pub struct TestGraphQlRunner; @@ -72,12 +80,15 @@ impl GraphQlRunner for TestGraphQlRunner { fn load_manager(&self) -> Arc { unimplemented!() } + + fn metrics(&self) -> Arc { + Arc::new(TestGraphQLMetrics) + } } #[cfg(test)] mod test { use super::*; - use graph_mock::MockMetricsRegistry; lazy_static! { static ref USERS: DeploymentHash = DeploymentHash::new("users").unwrap(); @@ -90,11 +101,10 @@ mod test { .block_on(async { let logger = Logger::root(slog::Discard, o!()); let logger_factory = LoggerFactory::new(logger, None); - let metrics_registry = Arc::new(MockMetricsRegistry::new()); let id = USERS.clone(); let query_runner = Arc::new(TestGraphQlRunner); let node_id = NodeId::new("test").unwrap(); - let mut server = HyperGraphQLServer::new(&logger_factory, metrics_registry, query_runner, node_id); + let mut server = HyperGraphQLServer::new(&logger_factory, query_runner, node_id); let http_server = server .serve(8007, 8008) .expect("Failed to start GraphQL server"); @@ -132,12 +142,10 @@ mod test { runtime.block_on(async { let logger = Logger::root(slog::Discard, o!()); let logger_factory = LoggerFactory::new(logger, None); - let metrics_registry = Arc::new(MockMetricsRegistry::new()); let id = USERS.clone(); let query_runner = Arc::new(TestGraphQlRunner); let node_id = NodeId::new("test").unwrap(); - let mut server = - HyperGraphQLServer::new(&logger_factory, metrics_registry, query_runner, node_id); + let mut server = HyperGraphQLServer::new(&logger_factory, query_runner, node_id); let http_server = server .serve(8002, 8003) .expect("Failed to start GraphQL server"); @@ -214,12 +222,10 @@ mod test { runtime.block_on(async { let logger = Logger::root(slog::Discard, o!()); let logger_factory = LoggerFactory::new(logger, None); - let metrics_registry = Arc::new(MockMetricsRegistry::new()); let id = USERS.clone(); let query_runner = Arc::new(TestGraphQlRunner); let node_id = NodeId::new("test").unwrap(); - let mut server = - HyperGraphQLServer::new(&logger_factory, metrics_registry, query_runner, node_id); + let mut server = HyperGraphQLServer::new(&logger_factory, query_runner, node_id); let http_server = server .serve(8003, 8004) .expect("Failed to start GraphQL server"); @@ -261,12 +267,10 @@ mod test { let _ = runtime.block_on(async { let logger = Logger::root(slog::Discard, o!()); let logger_factory = LoggerFactory::new(logger, None); - let metrics_registry = Arc::new(MockMetricsRegistry::new()); let id = USERS.clone(); let query_runner = Arc::new(TestGraphQlRunner); let node_id = NodeId::new("test").unwrap(); - let mut server = - HyperGraphQLServer::new(&logger_factory, metrics_registry, query_runner, node_id); + let mut server = HyperGraphQLServer::new(&logger_factory, query_runner, node_id); let http_server = server .serve(8005, 8006) .expect("Failed to start GraphQL server"); diff --git a/server/index-node/src/service.rs b/server/index-node/src/service.rs index 96931647f1f..81ae88d113d 100644 --- a/server/index-node/src/service.rs +++ b/server/index-node/src/service.rs @@ -22,6 +22,14 @@ use crate::explorer::Explorer; use crate::resolver::IndexNodeResolver; use crate::schema::SCHEMA; +struct NoopGraphQLMetrics; + +impl GraphQLMetrics for NoopGraphQLMetrics { + fn observe_query_execution(&self, _duration: Duration, _results: &QueryResults) {} + fn observe_query_parsing(&self, _duration: Duration, _results: &QueryResults) {} + fn observe_query_validation(&self, _duration: Duration, _id: &DeploymentHash) {} +} + /// An asynchronous response to a GraphQL request. pub type IndexNodeServiceResponse = DynTryFuture<'static, Response, GraphQLServerError>; @@ -120,7 +128,15 @@ where let validated = ValidatedRequest::new(body, &req_parts.headers)?; let query = validated.query; - let query = match PreparedQuery::new(&self.logger, schema, None, query, None, 100) { + let query = match PreparedQuery::new( + &self.logger, + schema, + None, + query, + None, + 100, + Arc::new(NoopGraphQLMetrics), + ) { Ok(query) => query, Err(e) => return Ok(QueryResults::from(QueryResult::from(e)).as_http_response()), }; diff --git a/store/test-store/src/store.rs b/store/test-store/src/store.rs index ae68bdf18ef..350e6f90326 100644 --- a/store/test-store/src/store.rs +++ b/store/test-store/src/store.rs @@ -15,7 +15,7 @@ use graph::{ use graph_graphql::prelude::{ execute_query, Query as PreparedQuery, QueryExecutionOptions, StoreResolver, }; -use graph_graphql::test_support::ResultSizeMetrics; +use graph_graphql::test_support::GraphQLMetrics; use graph_mock::MockMetricsRegistry; use graph_node::config::{Config, Opt}; use graph_node::store_builder::StoreBuilder; @@ -410,8 +410,8 @@ macro_rules! return_err { }; } -pub fn result_size_metrics() -> Arc { - Arc::new(ResultSizeMetrics::make(METRICS_REGISTRY.clone())) +pub fn graphql_metrics() -> Arc { + Arc::new(GraphQLMetrics::make(METRICS_REGISTRY.clone())) } async fn execute_subgraph_query_internal( @@ -439,7 +439,8 @@ async fn execute_subgraph_query_internal( network, query, max_complexity, - 100 + 100, + graphql_metrics(), )); let mut result = QueryResults::empty(); let deployment = query.schema.id().clone(); @@ -460,7 +461,7 @@ async fn execute_subgraph_query_internal( bc, error_policy, query.schema.id().clone(), - result_size_metrics() + graphql_metrics() ) .await );