diff --git a/docs/environment-variables.md b/docs/environment-variables.md index 839709aad05..e23676a7d92 100644 --- a/docs/environment-variables.md +++ b/docs/environment-variables.md @@ -200,3 +200,7 @@ those. identified as unused, `graph-node` will wait at least this long before actually deleting the data (value is in minutes, defaults to 360, i.e. 6 hours) +- `GRAPH_STORE_BATCH_TARGET_DURATION`: How long batch operations during + copying or grafting should take. This limits how long transactions for + such long running operations will be, and therefore helps control bloat + in other tables. Value is in seconds and defaults to 180s. diff --git a/graph/src/env/store.rs b/graph/src/env/store.rs index 077088b5b39..4bfe0d0616f 100644 --- a/graph/src/env/store.rs +++ b/graph/src/env/store.rs @@ -87,10 +87,10 @@ pub struct EnvVarsStore { /// done synchronously. pub write_queue_size: usize, - /// This is just in case new behavior causes issues. This can be removed - /// once the new behavior has run in the hosted service for a few days - /// without issues. - pub disable_error_for_toplevel_parents: bool, + /// How long batch operations during copying or grafting should take. + /// Set by `GRAPH_STORE_BATCH_TARGET_DURATION` (expressed in seconds). + /// The default is 180s. + pub batch_target_duration: Duration, } // This does not print any values avoid accidentally leaking any sensitive env vars @@ -127,7 +127,7 @@ impl From for EnvVarsStore { connection_min_idle: x.connection_min_idle, connection_idle_timeout: Duration::from_secs(x.connection_idle_timeout_in_secs), write_queue_size: x.write_queue_size, - disable_error_for_toplevel_parents: x.disable_error_for_toplevel_parents.0, + batch_target_duration: Duration::from_secs(x.batch_target_duration_in_secs), } } } @@ -171,6 +171,6 @@ pub struct InnerStore { connection_idle_timeout_in_secs: u64, #[envconfig(from = "GRAPH_STORE_WRITE_QUEUE", default = "5")] write_queue_size: usize, - #[envconfig(from = "GRAPH_DISABLE_ERROR_FOR_TOPLEVEL_PARENTS", default = "false")] - disable_error_for_toplevel_parents: EnvVarBoolean, + #[envconfig(from = "GRAPH_STORE_BATCH_TARGET_DURATION", default = "180")] + batch_target_duration_in_secs: u64, } diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index d2fddbe8f93..7fea7be9860 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -34,7 +34,7 @@ use diesel::{ use graph::{ components::store::EntityType, constraint_violation, - prelude::{info, o, warn, BlockNumber, BlockPtr, Logger, StoreError}, + prelude::{info, o, warn, BlockNumber, BlockPtr, Logger, StoreError, ENV_VARS}, }; use crate::{ @@ -51,7 +51,7 @@ const INITIAL_BATCH_SIZE: i64 = 10_000; /// arrays can be large and large arrays will slow down copying a lot. We /// therefore tread lightly in that case const INITIAL_BATCH_SIZE_LIST: i64 = 100; -const TARGET_DURATION: Duration = Duration::from_secs(5 * 60); + const LOG_INTERVAL: Duration = Duration::from_secs(3 * 60); /// If replicas are lagging by more than this, the copying code will pause @@ -308,8 +308,9 @@ impl AdaptiveBatchSize { pub fn adapt(&mut self, duration: Duration) { // Avoid division by zero let duration = duration.as_millis().max(1); - let new_batch_size = - self.size as f64 * TARGET_DURATION.as_millis() as f64 / duration as f64; + let new_batch_size = self.size as f64 + * ENV_VARS.store.batch_target_duration.as_millis() as f64 + / duration as f64; self.size = (2 * self.size).min(new_batch_size.round() as i64); } } diff --git a/store/postgres/src/relational_queries.rs b/store/postgres/src/relational_queries.rs index 116f61c49ff..05bca666c8f 100644 --- a/store/postgres/src/relational_queries.rs +++ b/store/postgres/src/relational_queries.rs @@ -500,22 +500,12 @@ impl EntityData { if key == "g$parent_id" { match &parent_type { None => { - if ENV_VARS.store.disable_error_for_toplevel_parents { - // Only temporarily in case reporting an - // error causes unexpected trouble. Can - // be removed once it's been working for - // a few days - let value = - T::Value::from_column_value(&ColumnType::String, json)?; - out.insert_entity_data("g$parent_id".to_owned(), value); - } else { - // A query that does not have parents - // somehow returned parent ids. We have no - // idea how to deserialize that - return Err(graph::constraint_violation!( - "query unexpectedly produces parent ids" - )); - } + // A query that does not have parents + // somehow returned parent ids. We have no + // idea how to deserialize that + return Err(graph::constraint_violation!( + "query unexpectedly produces parent ids" + )); } Some(parent_type) => { let value = T::Value::from_column_value(parent_type, json)?;