From 34577eb334aa87dd883a6bfaf013663df278c193 Mon Sep 17 00:00:00 2001 From: Michal Kucharczyk <1728078+michalkucharczyk@users.noreply.github.com> Date: Thu, 5 Jan 2023 15:40:02 +0100 Subject: [PATCH] service: storage monitor added Storage monitor added. It uses `notify` create to get notifications about any changes to monitored path (which is database path). Notifications are consumed in essential task which terminates when available storage space drops below given threshold. Closes: #12399 --- Cargo.lock | 91 ++++++++++++++- bin/node/cli/benches/block_production.rs | 1 + bin/node/cli/benches/transaction_pool.rs | 1 + client/cli/src/config.rs | 11 ++ client/cli/src/params/database_params.rs | 15 ++- client/cli/src/runner.rs | 1 + client/service/Cargo.toml | 2 + client/service/src/builder.rs | 14 ++- client/service/src/config.rs | 3 + client/service/src/lib.rs | 1 + client/service/src/storage_monitor.rs | 143 +++++++++++++++++++++++ client/service/test/src/lib.rs | 1 + 12 files changed, 275 insertions(+), 9 deletions(-) create mode 100644 client/service/src/storage_monitor.rs diff --git a/Cargo.lock b/Cargo.lock index f82343e442a89..e1ac379997c6b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1179,7 +1179,7 @@ dependencies = [ "cfg-if", "crossbeam-utils", "lazy_static", - "memoffset", + "memoffset 0.6.4", "scopeguard", ] @@ -2826,6 +2826,26 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e04e2fd2b8188ea827b32ef11de88377086d690286ab35747ef7f9bf3ccb590" +[[package]] +name = "inotify" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8069d3ec154eb856955c1c0fbffefbf5f3c40a104ec912d4797314c1801abff" +dependencies = [ + "bitflags", + "inotify-sys", + "libc", +] + +[[package]] +name = "inotify-sys" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e05c02b5e89bff3b946cedeca278abc628fe811e604f027c45a8aa3cf793d0eb" +dependencies = [ + "libc", +] + [[package]] name = "instant" version = "0.1.12" @@ -3161,6 +3181,26 @@ dependencies = [ "substrate-wasm-builder", ] +[[package]] +name = "kqueue" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c8fc60ba15bf51257aa9807a48a61013db043fcf3a78cb0d916e8e396dcad98" +dependencies = [ + "kqueue-sys", + "libc", +] + +[[package]] +name = "kqueue-sys" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8367585489f01bc55dd27404dcf56b95e6da061a256a666ab23be9ba96a2e587" +dependencies = [ + "bitflags", + "libc", +] + [[package]] name = "kvdb" version = "0.13.0" @@ -3214,9 +3254,9 @@ checksum = "3576a87f2ba00f6f106fdfcd16db1d698d648a26ad8e0573cad8537c3c362d2a" [[package]] name = "libc" -version = "0.2.126" +version = "0.2.139" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" +checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" [[package]] name = "libgit2-sys" @@ -3848,6 +3888,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "memoffset" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" +dependencies = [ + "autocfg", +] + [[package]] name = "memory-db" version = "0.31.0" @@ -4173,7 +4222,7 @@ dependencies = [ "cc", "cfg-if", "libc", - "memoffset", + "memoffset 0.6.4", ] [[package]] @@ -4187,6 +4236,20 @@ dependencies = [ "libc", ] +[[package]] +name = "nix" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46a58d1d356c6597d08cde02c2f09d785b09e28711837b1ed667dc652c08a694" +dependencies = [ + "bitflags", + "cfg-if", + "libc", + "memoffset 0.7.1", + "pin-utils", + "static_assertions", +] + [[package]] name = "node-bench" version = "0.9.0-dev" @@ -4551,6 +4614,22 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" +[[package]] +name = "notify" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2c66da08abae1c024c01d635253e402341b4060a12e99b31c7594063bf490a" +dependencies = [ + "bitflags", + "filetime", + "inotify", + "kqueue", + "libc", + "mio", + "walkdir", + "winapi", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -8182,6 +8261,8 @@ dependencies = [ "futures-timer", "jsonrpsee", "log", + "nix 0.26.1", + "notify", "parity-scale-codec", "parking_lot 0.12.1", "pin-project", @@ -11248,7 +11329,7 @@ dependencies = [ "log", "mach", "memfd", - "memoffset", + "memoffset 0.6.4", "paste", "rand 0.8.5", "rustix", diff --git a/bin/node/cli/benches/block_production.rs b/bin/node/cli/benches/block_production.rs index 4fcebb123d9e3..9576e7a61d59b 100644 --- a/bin/node/cli/benches/block_production.rs +++ b/bin/node/cli/benches/block_production.rs @@ -113,6 +113,7 @@ fn new_node(tokio_handle: Handle) -> node_cli::service::NewFullBase { base_path: Some(base_path), informant_output_format: Default::default(), wasm_runtime_overrides: None, + available_storage_theshold: 1000, }; node_cli::service::new_full_base(config, false, |_, _| ()) diff --git a/bin/node/cli/benches/transaction_pool.rs b/bin/node/cli/benches/transaction_pool.rs index a8839642ddc26..de83ede2f9123 100644 --- a/bin/node/cli/benches/transaction_pool.rs +++ b/bin/node/cli/benches/transaction_pool.rs @@ -106,6 +106,7 @@ fn new_node(tokio_handle: Handle) -> node_cli::service::NewFullBase { base_path: Some(base_path), informant_output_format: Default::default(), wasm_runtime_overrides: None, + available_storage_theshold: 1000, }; node_cli::service::new_full_base(config, false, |_, _| ()).expect("Creates node") diff --git a/client/cli/src/config.rs b/client/cli/src/config.rs index 77689708a231f..8789e6ce29308 100644 --- a/client/cli/src/config.rs +++ b/client/cli/src/config.rs @@ -191,6 +191,16 @@ pub trait CliConfiguration: Sized { .unwrap_or_else(|| Ok((None, KeystoreConfig::InMemory))) } + /// Get the database available storage space threshold. + /// + /// By default this is retrieved from `DatabaseParams` if it is available. + fn database_storage_threshold(&self) -> Result { + Ok(self + .database_params() + .map(|x| x.database_storage_threshold()) + .unwrap_or_default()) + } + /// Get the database cache size. /// /// By default this is retrieved from `DatabaseParams` if it is available. Otherwise its `None`. @@ -562,6 +572,7 @@ pub trait CliConfiguration: Sized { base_path: Some(base_path), informant_output_format: Default::default(), runtime_cache_size, + available_storage_theshold: self.database_storage_threshold()?, }) } diff --git a/client/cli/src/params/database_params.rs b/client/cli/src/params/database_params.rs index fdd3622580a6d..d4e69ce2699cf 100644 --- a/client/cli/src/params/database_params.rs +++ b/client/cli/src/params/database_params.rs @@ -19,7 +19,7 @@ use crate::arg_enums::Database; use clap::Args; -/// Parameters for block import. +/// Parameters for database #[derive(Debug, Clone, PartialEq, Args)] pub struct DatabaseParams { /// Select database backend to use. @@ -29,10 +29,16 @@ pub struct DatabaseParams { /// Limit the memory the database cache can use. #[arg(long = "db-cache", value_name = "MiB")] pub database_cache_size: Option, + + /// Required available space on database storage. If available space for DB storage drops below + /// the given threshold, node will be gracefully terminated. If `0` is given monitoring will be + /// disabled. + #[arg(long = "db-storage-threshold", value_name = "MB", default_value_t = 1000)] + pub database_storage_threshold: u64, } impl DatabaseParams { - /// Limit the memory the database cache can use. + /// Database backend pub fn database(&self) -> Option { self.database } @@ -41,4 +47,9 @@ impl DatabaseParams { pub fn database_cache_size(&self) -> Option { self.database_cache_size } + + /// Available storage space threshold + pub fn database_storage_threshold(&self) -> u64 { + self.database_storage_threshold + } } diff --git a/client/cli/src/runner.rs b/client/cli/src/runner.rs index 1a532b3bbc6fb..049e2b05a0f14 100644 --- a/client/cli/src/runner.rs +++ b/client/cli/src/runner.rs @@ -367,6 +367,7 @@ mod tests { base_path: None, informant_output_format: Default::default(), runtime_cache_size: 2, + available_storage_theshold: 1000, }, runtime, ) diff --git a/client/service/Cargo.toml b/client/service/Cargo.toml index 45b1c02620fb2..28befbf2127fa 100644 --- a/client/service/Cargo.toml +++ b/client/service/Cargo.toml @@ -80,6 +80,8 @@ tokio = { version = "1.22.0", features = ["time", "rt-multi-thread", "parking_lo tempfile = "3.1.0" directories = "4.0.1" static_init = "1.0.3" +nix = { version = "0.26.1", features = ["fs"] } +notify = { version = "5.0.0", default-features = false, feature=["macos_kqueue"] } [dev-dependencies] substrate-test-runtime-client = { version = "2.0.0", path = "../../test-utils/runtime/client" } diff --git a/client/service/src/builder.rs b/client/service/src/builder.rs index 1f94f96fae89e..7461d11d7dc8f 100644 --- a/client/service/src/builder.rs +++ b/client/service/src/builder.rs @@ -22,8 +22,10 @@ use crate::{ config::{Configuration, KeystoreConfig, PrometheusConfig}, error::Error, metrics::MetricsService, - start_rpc_servers, BuildGenesisBlock, GenesisBlockBuilder, RpcHandlers, SpawnTaskHandle, - TaskManager, TransactionPoolAdapter, + start_rpc_servers, + storage_monitor::StorageMonitorService, + BuildGenesisBlock, GenesisBlockBuilder, RpcHandlers, SpawnTaskHandle, TaskManager, + TransactionPoolAdapter, }; use futures::{channel::oneshot, future::ready, FutureExt, StreamExt}; use jsonrpsee::RpcModule; @@ -534,6 +536,14 @@ where metrics_service.run(client.clone(), transaction_pool.clone(), network.clone()), ); + if let Some(storage_monitor_service) = StorageMonitorService::new_for_config(&config)? { + task_manager.spawn_essential_handle().spawn( + "storage-monitor", + None, + storage_monitor_service.run(), + ) + } + let rpc_id_provider = config.rpc_id_provider.take(); // jsonrpsee RPC diff --git a/client/service/src/config.rs b/client/service/src/config.rs index efadb8433f63d..64e31cecf0413 100644 --- a/client/service/src/config.rs +++ b/client/service/src/config.rs @@ -149,6 +149,9 @@ pub struct Configuration { pub informant_output_format: sc_informant::OutputFormat, /// Maximum number of different runtime versions that can be cached. pub runtime_cache_size: u8, + /// Threshold (in megabytes) for available storage space associated with `base_path`. `0` means + /// no storage monitoring. + pub available_storage_theshold: u64, } /// Type for tasks spawned by the executor. diff --git a/client/service/src/lib.rs b/client/service/src/lib.rs index 1529b822ade32..4c74edf287b2a 100644 --- a/client/service/src/lib.rs +++ b/client/service/src/lib.rs @@ -32,6 +32,7 @@ pub mod client; #[cfg(not(feature = "test-helpers"))] mod client; mod metrics; +mod storage_monitor; mod task_manager; use std::{collections::HashMap, net::SocketAddr}; diff --git a/client/service/src/storage_monitor.rs b/client/service/src/storage_monitor.rs new file mode 100644 index 0000000000000..65e2c61752745 --- /dev/null +++ b/client/service/src/storage_monitor.rs @@ -0,0 +1,143 @@ +// This file is part of Substrate. + +// Copyright (C) 2022 Parity Technologies (UK) Ltd. +// SPDX-License-Identifier: GPL-3.0-or-later WITH Classpath-exception-2.0 + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +use crate::{config::Configuration, error::Error}; +use futures::StreamExt; +use nix::{errno::Errno, sys::statvfs::statvfs}; +use notify::{Config, Event, RecommendedWatcher, RecursiveMode, Watcher}; +use sc_utils::mpsc::{tracing_unbounded, TracingUnboundedReceiver}; +use std::path::{Path, PathBuf}; + +const LOG_TARGET: &str = "storage-monitor"; + +pub struct StorageMonitorService { + /// watched path + path: PathBuf, + /// notify's events receiver + stream: TracingUnboundedReceiver>, + /// number of bytes that shall be free and available on the filesystem for watched path + threshold: u64, + /// keeps the ref for file system watcher + _watcher: RecommendedWatcher, +} + +impl StorageMonitorService { + /// creates new StorageMonitorService for given client config + pub fn new_for_config(config: &Configuration) -> Result, Error> { + Ok(match (config.available_storage_theshold, config.database.path()) { + (0, _) => { + log::info!( + target: LOG_TARGET, + "StorageMonitorService: threshold 0 given, storage monitoring disabled", + ); + None + }, + (_, None) => { + log::warn!( + target: LOG_TARGET, + "StorageMonitorService: no database path to observe", + ); + None + }, + (threshold, Some(path)) => { + let (sink, stream) = tracing_unbounded("mpsc_storage_monitor", 1024); + + let mut watcher = RecommendedWatcher::new( + move |res| { + sink.unbounded_send(res).unwrap(); + }, + Config::default(), + ) + .map_err(|e| Error::Other(format!("Could not create fs watcher {e}")))?; + + watcher + .watch(path.as_ref(), RecursiveMode::Recursive) + .map_err(|e| Error::Other(format!("Could not start fs watcher {e}")))?; + + log::debug!( + target: LOG_TARGET, + "Initializing StorageMonitorService for db path: {:?}", + path, + ); + + Self::check_free_space(&path, threshold)?; + + Some(StorageMonitorService { + path: path.to_path_buf(), + stream, + threshold, + _watcher: watcher, + }) + }, + }) + } + + /// main monitoring loop, intended to be spawn as essential task + pub async fn run(mut self) { + while let Some(watch_event) = self.stream.next().await { + match watch_event { + Ok(_) => + if Self::check_free_space(&self.path, self.threshold).is_err() { + break + }, + Err(e) => { + println!("watch error: {:?}", e); + }, + } + } + } + + // returns free space in MB + fn free_space(path: &Path) -> Result { + let fs_stats = statvfs(path); + fs_stats.map(|stats| stats.blocks_available() * stats.block_size() / 1_000_000) + } + + // checks if the free space for given `path` is below given `threshold`. + // If not error is returned. + // System errors are silently ignored. + fn check_free_space(path: &Path, threshold: u64) -> Result<(), Error> { + match StorageMonitorService::free_space(path.as_ref()) { + Ok(available_space) => { + log::trace!( + target: LOG_TARGET, + "free:{:?} , threshold:{:?}", + available_space, + threshold, + ); + + if available_space < threshold { + let msg = format!( + "Available space {:?}MB for path {:?} dropped below threshold:{:?}MB , terminating...", + available_space, + path, + threshold, + ); + log::error!(target: LOG_TARGET, "{}", msg); + Err(Error::Other(msg)) + } else { + Ok(()) + } + }, + Err(e) => { + log::warn!(target: LOG_TARGET, "could not read available space: {:?}", e); + Ok(()) + }, + } + } +} diff --git a/client/service/test/src/lib.rs b/client/service/test/src/lib.rs index 5f75e3521e235..afd14afb3c578 100644 --- a/client/service/test/src/lib.rs +++ b/client/service/test/src/lib.rs @@ -265,6 +265,7 @@ fn node_config< base_path: Some(BasePath::new(root)), informant_output_format: Default::default(), runtime_cache_size: 2, + available_storage_theshold: 1000, } }