Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

graphql: metrics for validation and parsing phases #3760

Merged
merged 1 commit into from
Jul 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions graph/src/components/graphql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ pub trait GraphQlRunner: Send + Sync + 'static {
) -> Result<SubscriptionResult, SubscriptionError>;

fn load_manager(&self) -> Arc<LoadManager>;

fn metrics(&self) -> Arc<dyn GraphQLMetrics>;
}

pub trait GraphQLMetrics: Send + Sync + 'static {
fn observe_query_execution(&self, duration: Duration, results: &QueryResults);
fn observe_query_parsing(&self, duration: Duration, results: &QueryResults);
fn observe_query_validation(&self, duration: Duration, id: &DeploymentHash);
}

#[async_trait]
Expand Down
2 changes: 1 addition & 1 deletion graph/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ pub mod prelude {
LightEthereumBlockExt,
};
pub use crate::components::graphql::{
GraphQlRunner, QueryLoadManager, SubscriptionResultFuture,
GraphQLMetrics, GraphQlRunner, QueryLoadManager, SubscriptionResultFuture,
};
pub use crate::components::link_resolver::{JsonStreamValue, JsonValueStream, LinkResolver};
pub use crate::components::metrics::{
Expand Down
5 changes: 4 additions & 1 deletion graphql/src/execution/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use graph::data::query::QueryExecutionError;
use graph::data::query::{Query as GraphDataQuery, QueryVariables};
use graph::data::schema::ApiSchema;
use graph::prelude::{
info, o, q, r, s, warn, BlockNumber, CheapClone, Logger, TryFromValue, ENV_VARS,
info, o, q, r, s, warn, BlockNumber, CheapClone, GraphQLMetrics, Logger, TryFromValue, ENV_VARS,
};

use crate::execution::ast as a;
Expand Down Expand Up @@ -204,8 +204,11 @@ impl Query {
query: GraphDataQuery,
max_complexity: Option<u64>,
max_depth: u8,
metrics: Arc<dyn GraphQLMetrics>,
) -> Result<Arc<Self>, Vec<QueryExecutionError>> {
let validation_phase_start = Instant::now();
validate_query(logger, &query, &schema.document())?;
metrics.observe_query_validation(validation_phase_start.elapsed(), schema.id());

let mut operation = None;
let mut fragments = HashMap::new();
Expand Down
6 changes: 5 additions & 1 deletion graphql/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ mod store;
/// The external interface for actually running queries
mod runner;

/// Utilities for working with Prometheus.
mod metrics;

/// Prelude that exports the most important traits and types.
pub mod prelude {
pub use super::execution::{ast as a, ExecutionContext, Query, Resolver};
Expand All @@ -34,12 +37,13 @@ pub mod prelude {
pub use super::subscription::SubscriptionExecutionOptions;
pub use super::values::MaybeCoercible;

pub use super::metrics::GraphQLMetrics;
pub use super::runner::GraphQlRunner;
pub use graph::prelude::s::ObjectType;
}

#[cfg(debug_assertions)]
pub mod test_support {
pub use super::runner::ResultSizeMetrics;
pub use super::metrics::GraphQLMetrics;
pub use super::runner::INITIAL_DEPLOYMENT_STATE_FOR_TESTS;
}
136 changes: 136 additions & 0 deletions graphql/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
use std::collections::HashMap;
use std::fmt;
use std::sync::Arc;
use std::time::Duration;

use graph::data::query::QueryResults;
use graph::prelude::{DeploymentHash, GraphQLMetrics as GraphQLMetricsTrait, MetricsRegistry};
use graph::prometheus::{Gauge, Histogram, HistogramVec};

pub struct GraphQLMetrics {
query_execution_time: Box<HistogramVec>,
query_parsing_time: Box<HistogramVec>,
query_validation_time: Box<HistogramVec>,
query_result_size: Box<Histogram>,
query_result_size_max: Box<Gauge>,
}

impl fmt::Debug for GraphQLMetrics {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "GraphQLMetrics {{ }}")
}
}

impl GraphQLMetricsTrait for GraphQLMetrics {
fn observe_query_execution(&self, duration: Duration, results: &QueryResults) {
let id = results
.deployment_hash()
.map(|h| h.as_str())
.unwrap_or_else(|| {
if results.not_found() {
"notfound"
} else {
"unknown"
}
});
let status = if results.has_errors() {
"failed"
} else {
"success"
};
self.query_execution_time
.with_label_values(&[id, status])
.observe(duration.as_secs_f64());
}

fn observe_query_parsing(&self, duration: Duration, results: &QueryResults) {
let id = results
.deployment_hash()
.map(|h| h.as_str())
.unwrap_or_else(|| {
if results.not_found() {
"notfound"
} else {
"unknown"
}
});
self.query_parsing_time
.with_label_values(&[id])
.observe(duration.as_secs_f64());
}

fn observe_query_validation(&self, duration: Duration, id: &DeploymentHash) {
self.query_validation_time
.with_label_values(&[id.as_str()])
.observe(duration.as_secs_f64());
}
}

impl GraphQLMetrics {
pub fn new(registry: Arc<dyn MetricsRegistry>) -> Self {
let query_execution_time = registry
.new_histogram_vec(
"query_execution_time",
"Execution time for successful GraphQL queries",
vec![String::from("deployment"), String::from("status")],
vec![0.1, 0.5, 1.0, 10.0, 100.0],
)
.expect("failed to create `query_execution_time` histogram");
let query_parsing_time = registry
.new_histogram_vec(
"query_parsing_time",
"Parsing time for GraphQL queries",
vec![String::from("deployment")],
vec![0.1, 0.5, 1.0, 10.0, 100.0],
)
.expect("failed to create `query_parsing_time` histogram");

let query_validation_time = registry
.new_histogram_vec(
"query_validation_time",
"Validation time for GraphQL queries",
vec![String::from("deployment")],
vec![0.1, 0.5, 1.0, 10.0, 100.0],
)
.expect("failed to create `query_validation_time` histogram");

let bins = (10..32).map(|n| 2u64.pow(n) as f64).collect::<Vec<_>>();
let query_result_size = registry
.new_histogram(
"query_result_size",
"the size of the result of successful GraphQL queries (in CacheWeight)",
bins,
)
.unwrap();

let query_result_size_max = registry
.new_gauge(
"query_result_max",
"the maximum size of a query result (in CacheWeight)",
HashMap::new(),
)
.unwrap();

Self {
query_execution_time,
query_parsing_time,
query_validation_time,
query_result_size,
query_result_size_max,
}
}

// Tests need to construct one of these, but normal code doesn't
#[cfg(debug_assertions)]
pub fn make(registry: Arc<dyn MetricsRegistry>) -> Self {
Self::new(registry)
}

pub fn observe_query_result_size(&self, size: usize) {
let size = size as f64;
self.query_result_size.observe(size);
if self.query_result_size_max.get() < size {
self.query_result_size_max.set(size);
}
}
}
74 changes: 17 additions & 57 deletions graphql/src/runner.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Instant;

use crate::metrics::GraphQLMetrics;
use crate::prelude::{QueryExecutionOptions, StoreResolver, SubscriptionExecutionOptions};
use crate::query::execute_query;
use crate::subscription::execute_prepared_subscription;
use graph::prelude::MetricsRegistry;
use graph::prometheus::{Gauge, Histogram};
use graph::{
components::store::SubscriptionManager,
prelude::{
async_trait, o, CheapClone, DeploymentState, GraphQlRunner as GraphQlRunnerTrait, Logger,
Query, QueryExecutionError, Subscription, SubscriptionError, SubscriptionResult, ENV_VARS,
async_trait, o, CheapClone, DeploymentState, GraphQLMetrics as GraphQLMetricsTrait,
GraphQlRunner as GraphQlRunnerTrait, Logger, Query, QueryExecutionError, Subscription,
SubscriptionError, SubscriptionResult, ENV_VARS,
},
};
use graph::{data::graphql::effort::LoadManager, prelude::QueryStoreManager};
Expand All @@ -20,59 +20,13 @@ use graph::{
prelude::QueryStore,
};

pub struct ResultSizeMetrics {
histogram: Box<Histogram>,
max_gauge: Box<Gauge>,
}

impl ResultSizeMetrics {
fn new(registry: Arc<dyn MetricsRegistry>) -> Self {
// Divide the Histogram into exponentially sized buckets between 1k and 4G
let bins = (10..32).map(|n| 2u64.pow(n) as f64).collect::<Vec<_>>();
let histogram = registry
.new_histogram(
"query_result_size",
"the size of the result of successful GraphQL queries (in CacheWeight)",
bins,
)
.unwrap();

let max_gauge = registry
.new_gauge(
"query_result_max",
"the maximum size of a query result (in CacheWeight)",
HashMap::new(),
)
.unwrap();

Self {
histogram,
max_gauge,
}
}

// Tests need to construct one of these, but normal code doesn't
#[cfg(debug_assertions)]
pub fn make(registry: Arc<dyn MetricsRegistry>) -> Self {
Self::new(registry)
}

pub fn observe(&self, size: usize) {
let size = size as f64;
self.histogram.observe(size);
if self.max_gauge.get() < size {
self.max_gauge.set(size);
}
}
}

/// GraphQL runner implementation for The Graph.
pub struct GraphQlRunner<S, SM> {
logger: Logger,
store: Arc<S>,
subscription_manager: Arc<SM>,
load_manager: Arc<LoadManager>,
result_size: Arc<ResultSizeMetrics>,
graphql_metrics: Arc<GraphQLMetrics>,
}

#[cfg(debug_assertions)]
Expand All @@ -95,13 +49,13 @@ where
registry: Arc<dyn MetricsRegistry>,
) -> Self {
let logger = logger.new(o!("component" => "GraphQlRunner"));
let result_size = Arc::new(ResultSizeMetrics::new(registry));
let graphql_metrics = Arc::new(GraphQLMetrics::new(registry));
GraphQlRunner {
logger,
store,
subscription_manager,
load_manager,
result_size,
graphql_metrics,
}
}

Expand Down Expand Up @@ -143,7 +97,7 @@ where
max_depth: Option<u8>,
max_first: Option<u32>,
max_skip: Option<u32>,
result_size: Arc<ResultSizeMetrics>,
metrics: Arc<GraphQLMetrics>,
) -> Result<QueryResults, QueryResults> {
// We need to use the same `QueryStore` for the entire query to ensure
// we have a consistent view if the world, even when replicas, which
Expand Down Expand Up @@ -175,6 +129,7 @@ where
query,
max_complexity,
max_depth,
metrics.cheap_clone(),
)?;
self.load_manager
.decide(
Expand All @@ -197,7 +152,7 @@ where
bc,
error_policy,
query.schema.id().clone(),
result_size.cheap_clone(),
metrics.cheap_clone(),
)
.await?;
max_block = max_block.max(resolver.block_number());
Expand Down Expand Up @@ -259,7 +214,7 @@ where
max_depth,
max_first,
max_skip,
self.result_size.cheap_clone(),
self.graphql_metrics.clone(),
)
.await
.unwrap_or_else(|e| e)
Expand All @@ -281,6 +236,7 @@ where
subscription.query,
ENV_VARS.graphql.max_complexity,
ENV_VARS.graphql.max_depth,
self.graphql_metrics.cheap_clone(),
)?;

if let Err(err) = self
Expand All @@ -306,12 +262,16 @@ where
max_depth: ENV_VARS.graphql.max_depth,
max_first: ENV_VARS.graphql.max_first,
max_skip: ENV_VARS.graphql.max_skip,
result_size: self.result_size.clone(),
graphql_metrics: self.graphql_metrics.clone(),
},
)
}

fn load_manager(&self) -> Arc<LoadManager> {
self.load_manager.clone()
}

fn metrics(&self) -> Arc<dyn GraphQLMetricsTrait> {
self.graphql_metrics.clone()
}
}
6 changes: 3 additions & 3 deletions graphql/src/store/prefetch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use graph::{
};

use crate::execution::{ast as a, ExecutionContext, Resolver};
use crate::runner::ResultSizeMetrics;
use crate::metrics::GraphQLMetrics;
use crate::schema::ast as sast;
use crate::store::query::build_query;
use crate::store::StoreResolver;
Expand Down Expand Up @@ -480,10 +480,10 @@ pub fn run(
resolver: &StoreResolver,
ctx: &ExecutionContext<impl Resolver>,
selection_set: &a::SelectionSet,
result_size: &ResultSizeMetrics,
graphql_metrics: &GraphQLMetrics,
) -> Result<r::Value, Vec<QueryExecutionError>> {
execute_root_selection_set(resolver, ctx, selection_set).map(|nodes| {
result_size.observe(nodes.weight());
graphql_metrics.observe_query_result_size(nodes.weight());
let obj = Object::from_iter(
nodes
.into_iter()
Expand Down
Loading