Skip to content

Commit 38c60e6

Browse files
authored
graphql: metrics for validation and parsing phases (#3760)
1 parent 782efc6 commit 38c60e6

File tree

17 files changed

+271
-196
lines changed

17 files changed

+271
-196
lines changed

graph/src/components/graphql.rs

+8
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,14 @@ pub trait GraphQlRunner: Send + Sync + 'static {
4343
) -> Result<SubscriptionResult, SubscriptionError>;
4444

4545
fn load_manager(&self) -> Arc<LoadManager>;
46+
47+
fn metrics(&self) -> Arc<dyn GraphQLMetrics>;
48+
}
49+
50+
pub trait GraphQLMetrics: Send + Sync + 'static {
51+
fn observe_query_execution(&self, duration: Duration, results: &QueryResults);
52+
fn observe_query_parsing(&self, duration: Duration, results: &QueryResults);
53+
fn observe_query_validation(&self, duration: Duration, id: &DeploymentHash);
4654
}
4755

4856
#[async_trait]

graph/src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ pub mod prelude {
103103
LightEthereumBlockExt,
104104
};
105105
pub use crate::components::graphql::{
106-
GraphQlRunner, QueryLoadManager, SubscriptionResultFuture,
106+
GraphQLMetrics, GraphQlRunner, QueryLoadManager, SubscriptionResultFuture,
107107
};
108108
pub use crate::components::link_resolver::{JsonStreamValue, JsonValueStream, LinkResolver};
109109
pub use crate::components::metrics::{

graphql/src/execution/query.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ use graph::data::query::QueryExecutionError;
1818
use graph::data::query::{Query as GraphDataQuery, QueryVariables};
1919
use graph::data::schema::ApiSchema;
2020
use graph::prelude::{
21-
info, o, q, r, s, warn, BlockNumber, CheapClone, Logger, TryFromValue, ENV_VARS,
21+
info, o, q, r, s, warn, BlockNumber, CheapClone, GraphQLMetrics, Logger, TryFromValue, ENV_VARS,
2222
};
2323

2424
use crate::execution::ast as a;
@@ -204,8 +204,11 @@ impl Query {
204204
query: GraphDataQuery,
205205
max_complexity: Option<u64>,
206206
max_depth: u8,
207+
metrics: Arc<dyn GraphQLMetrics>,
207208
) -> Result<Arc<Self>, Vec<QueryExecutionError>> {
209+
let validation_phase_start = Instant::now();
208210
validate_query(logger, &query, &schema.document())?;
211+
metrics.observe_query_validation(validation_phase_start.elapsed(), schema.id());
209212

210213
let mut operation = None;
211214
let mut fragments = HashMap::new();

graphql/src/lib.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ mod store;
2424
/// The external interface for actually running queries
2525
mod runner;
2626

27+
/// Utilities for working with Prometheus.
28+
mod metrics;
29+
2730
/// Prelude that exports the most important traits and types.
2831
pub mod prelude {
2932
pub use super::execution::{ast as a, ExecutionContext, Query, Resolver};
@@ -34,12 +37,13 @@ pub mod prelude {
3437
pub use super::subscription::SubscriptionExecutionOptions;
3538
pub use super::values::MaybeCoercible;
3639

40+
pub use super::metrics::GraphQLMetrics;
3741
pub use super::runner::GraphQlRunner;
3842
pub use graph::prelude::s::ObjectType;
3943
}
4044

4145
#[cfg(debug_assertions)]
4246
pub mod test_support {
43-
pub use super::runner::ResultSizeMetrics;
47+
pub use super::metrics::GraphQLMetrics;
4448
pub use super::runner::INITIAL_DEPLOYMENT_STATE_FOR_TESTS;
4549
}

graphql/src/metrics.rs

+136
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
use std::collections::HashMap;
2+
use std::fmt;
3+
use std::sync::Arc;
4+
use std::time::Duration;
5+
6+
use graph::data::query::QueryResults;
7+
use graph::prelude::{DeploymentHash, GraphQLMetrics as GraphQLMetricsTrait, MetricsRegistry};
8+
use graph::prometheus::{Gauge, Histogram, HistogramVec};
9+
10+
pub struct GraphQLMetrics {
11+
query_execution_time: Box<HistogramVec>,
12+
query_parsing_time: Box<HistogramVec>,
13+
query_validation_time: Box<HistogramVec>,
14+
query_result_size: Box<Histogram>,
15+
query_result_size_max: Box<Gauge>,
16+
}
17+
18+
impl fmt::Debug for GraphQLMetrics {
19+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
20+
write!(f, "GraphQLMetrics {{ }}")
21+
}
22+
}
23+
24+
impl GraphQLMetricsTrait for GraphQLMetrics {
25+
fn observe_query_execution(&self, duration: Duration, results: &QueryResults) {
26+
let id = results
27+
.deployment_hash()
28+
.map(|h| h.as_str())
29+
.unwrap_or_else(|| {
30+
if results.not_found() {
31+
"notfound"
32+
} else {
33+
"unknown"
34+
}
35+
});
36+
let status = if results.has_errors() {
37+
"failed"
38+
} else {
39+
"success"
40+
};
41+
self.query_execution_time
42+
.with_label_values(&[id, status])
43+
.observe(duration.as_secs_f64());
44+
}
45+
46+
fn observe_query_parsing(&self, duration: Duration, results: &QueryResults) {
47+
let id = results
48+
.deployment_hash()
49+
.map(|h| h.as_str())
50+
.unwrap_or_else(|| {
51+
if results.not_found() {
52+
"notfound"
53+
} else {
54+
"unknown"
55+
}
56+
});
57+
self.query_parsing_time
58+
.with_label_values(&[id])
59+
.observe(duration.as_secs_f64());
60+
}
61+
62+
fn observe_query_validation(&self, duration: Duration, id: &DeploymentHash) {
63+
self.query_validation_time
64+
.with_label_values(&[id.as_str()])
65+
.observe(duration.as_secs_f64());
66+
}
67+
}
68+
69+
impl GraphQLMetrics {
70+
pub fn new(registry: Arc<dyn MetricsRegistry>) -> Self {
71+
let query_execution_time = registry
72+
.new_histogram_vec(
73+
"query_execution_time",
74+
"Execution time for successful GraphQL queries",
75+
vec![String::from("deployment"), String::from("status")],
76+
vec![0.1, 0.5, 1.0, 10.0, 100.0],
77+
)
78+
.expect("failed to create `query_execution_time` histogram");
79+
let query_parsing_time = registry
80+
.new_histogram_vec(
81+
"query_parsing_time",
82+
"Parsing time for GraphQL queries",
83+
vec![String::from("deployment")],
84+
vec![0.1, 0.5, 1.0, 10.0, 100.0],
85+
)
86+
.expect("failed to create `query_parsing_time` histogram");
87+
88+
let query_validation_time = registry
89+
.new_histogram_vec(
90+
"query_validation_time",
91+
"Validation time for GraphQL queries",
92+
vec![String::from("deployment")],
93+
vec![0.1, 0.5, 1.0, 10.0, 100.0],
94+
)
95+
.expect("failed to create `query_validation_time` histogram");
96+
97+
let bins = (10..32).map(|n| 2u64.pow(n) as f64).collect::<Vec<_>>();
98+
let query_result_size = registry
99+
.new_histogram(
100+
"query_result_size",
101+
"the size of the result of successful GraphQL queries (in CacheWeight)",
102+
bins,
103+
)
104+
.unwrap();
105+
106+
let query_result_size_max = registry
107+
.new_gauge(
108+
"query_result_max",
109+
"the maximum size of a query result (in CacheWeight)",
110+
HashMap::new(),
111+
)
112+
.unwrap();
113+
114+
Self {
115+
query_execution_time,
116+
query_parsing_time,
117+
query_validation_time,
118+
query_result_size,
119+
query_result_size_max,
120+
}
121+
}
122+
123+
// Tests need to construct one of these, but normal code doesn't
124+
#[cfg(debug_assertions)]
125+
pub fn make(registry: Arc<dyn MetricsRegistry>) -> Self {
126+
Self::new(registry)
127+
}
128+
129+
pub fn observe_query_result_size(&self, size: usize) {
130+
let size = size as f64;
131+
self.query_result_size.observe(size);
132+
if self.query_result_size_max.get() < size {
133+
self.query_result_size_max.set(size);
134+
}
135+
}
136+
}

graphql/src/runner.rs

+17-57
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
1-
use std::collections::HashMap;
21
use std::sync::Arc;
32
use std::time::Instant;
43

4+
use crate::metrics::GraphQLMetrics;
55
use crate::prelude::{QueryExecutionOptions, StoreResolver, SubscriptionExecutionOptions};
66
use crate::query::execute_query;
77
use crate::subscription::execute_prepared_subscription;
88
use graph::prelude::MetricsRegistry;
9-
use graph::prometheus::{Gauge, Histogram};
109
use graph::{
1110
components::store::SubscriptionManager,
1211
prelude::{
13-
async_trait, o, CheapClone, DeploymentState, GraphQlRunner as GraphQlRunnerTrait, Logger,
14-
Query, QueryExecutionError, Subscription, SubscriptionError, SubscriptionResult, ENV_VARS,
12+
async_trait, o, CheapClone, DeploymentState, GraphQLMetrics as GraphQLMetricsTrait,
13+
GraphQlRunner as GraphQlRunnerTrait, Logger, Query, QueryExecutionError, Subscription,
14+
SubscriptionError, SubscriptionResult, ENV_VARS,
1515
},
1616
};
1717
use graph::{data::graphql::effort::LoadManager, prelude::QueryStoreManager};
@@ -20,59 +20,13 @@ use graph::{
2020
prelude::QueryStore,
2121
};
2222

23-
pub struct ResultSizeMetrics {
24-
histogram: Box<Histogram>,
25-
max_gauge: Box<Gauge>,
26-
}
27-
28-
impl ResultSizeMetrics {
29-
fn new(registry: Arc<dyn MetricsRegistry>) -> Self {
30-
// Divide the Histogram into exponentially sized buckets between 1k and 4G
31-
let bins = (10..32).map(|n| 2u64.pow(n) as f64).collect::<Vec<_>>();
32-
let histogram = registry
33-
.new_histogram(
34-
"query_result_size",
35-
"the size of the result of successful GraphQL queries (in CacheWeight)",
36-
bins,
37-
)
38-
.unwrap();
39-
40-
let max_gauge = registry
41-
.new_gauge(
42-
"query_result_max",
43-
"the maximum size of a query result (in CacheWeight)",
44-
HashMap::new(),
45-
)
46-
.unwrap();
47-
48-
Self {
49-
histogram,
50-
max_gauge,
51-
}
52-
}
53-
54-
// Tests need to construct one of these, but normal code doesn't
55-
#[cfg(debug_assertions)]
56-
pub fn make(registry: Arc<dyn MetricsRegistry>) -> Self {
57-
Self::new(registry)
58-
}
59-
60-
pub fn observe(&self, size: usize) {
61-
let size = size as f64;
62-
self.histogram.observe(size);
63-
if self.max_gauge.get() < size {
64-
self.max_gauge.set(size);
65-
}
66-
}
67-
}
68-
6923
/// GraphQL runner implementation for The Graph.
7024
pub struct GraphQlRunner<S, SM> {
7125
logger: Logger,
7226
store: Arc<S>,
7327
subscription_manager: Arc<SM>,
7428
load_manager: Arc<LoadManager>,
75-
result_size: Arc<ResultSizeMetrics>,
29+
graphql_metrics: Arc<GraphQLMetrics>,
7630
}
7731

7832
#[cfg(debug_assertions)]
@@ -95,13 +49,13 @@ where
9549
registry: Arc<dyn MetricsRegistry>,
9650
) -> Self {
9751
let logger = logger.new(o!("component" => "GraphQlRunner"));
98-
let result_size = Arc::new(ResultSizeMetrics::new(registry));
52+
let graphql_metrics = Arc::new(GraphQLMetrics::new(registry));
9953
GraphQlRunner {
10054
logger,
10155
store,
10256
subscription_manager,
10357
load_manager,
104-
result_size,
58+
graphql_metrics,
10559
}
10660
}
10761

@@ -143,7 +97,7 @@ where
14397
max_depth: Option<u8>,
14498
max_first: Option<u32>,
14599
max_skip: Option<u32>,
146-
result_size: Arc<ResultSizeMetrics>,
100+
metrics: Arc<GraphQLMetrics>,
147101
) -> Result<QueryResults, QueryResults> {
148102
// We need to use the same `QueryStore` for the entire query to ensure
149103
// we have a consistent view if the world, even when replicas, which
@@ -175,6 +129,7 @@ where
175129
query,
176130
max_complexity,
177131
max_depth,
132+
metrics.cheap_clone(),
178133
)?;
179134
self.load_manager
180135
.decide(
@@ -197,7 +152,7 @@ where
197152
bc,
198153
error_policy,
199154
query.schema.id().clone(),
200-
result_size.cheap_clone(),
155+
metrics.cheap_clone(),
201156
)
202157
.await?;
203158
max_block = max_block.max(resolver.block_number());
@@ -259,7 +214,7 @@ where
259214
max_depth,
260215
max_first,
261216
max_skip,
262-
self.result_size.cheap_clone(),
217+
self.graphql_metrics.clone(),
263218
)
264219
.await
265220
.unwrap_or_else(|e| e)
@@ -281,6 +236,7 @@ where
281236
subscription.query,
282237
ENV_VARS.graphql.max_complexity,
283238
ENV_VARS.graphql.max_depth,
239+
self.graphql_metrics.cheap_clone(),
284240
)?;
285241

286242
if let Err(err) = self
@@ -306,12 +262,16 @@ where
306262
max_depth: ENV_VARS.graphql.max_depth,
307263
max_first: ENV_VARS.graphql.max_first,
308264
max_skip: ENV_VARS.graphql.max_skip,
309-
result_size: self.result_size.clone(),
265+
graphql_metrics: self.graphql_metrics.clone(),
310266
},
311267
)
312268
}
313269

314270
fn load_manager(&self) -> Arc<LoadManager> {
315271
self.load_manager.clone()
316272
}
273+
274+
fn metrics(&self) -> Arc<dyn GraphQLMetricsTrait> {
275+
self.graphql_metrics.clone()
276+
}
317277
}

graphql/src/store/prefetch.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use graph::{
2424
};
2525

2626
use crate::execution::{ast as a, ExecutionContext, Resolver};
27-
use crate::runner::ResultSizeMetrics;
27+
use crate::metrics::GraphQLMetrics;
2828
use crate::schema::ast as sast;
2929
use crate::store::query::build_query;
3030
use crate::store::StoreResolver;
@@ -480,10 +480,10 @@ pub fn run(
480480
resolver: &StoreResolver,
481481
ctx: &ExecutionContext<impl Resolver>,
482482
selection_set: &a::SelectionSet,
483-
result_size: &ResultSizeMetrics,
483+
graphql_metrics: &GraphQLMetrics,
484484
) -> Result<r::Value, Vec<QueryExecutionError>> {
485485
execute_root_selection_set(resolver, ctx, selection_set).map(|nodes| {
486-
result_size.observe(nodes.weight());
486+
graphql_metrics.observe_query_result_size(nodes.weight());
487487
let obj = Object::from_iter(
488488
nodes
489489
.into_iter()

0 commit comments

Comments
 (0)