-
Notifications
You must be signed in to change notification settings - Fork 2.6k
Warn validators with slow hardware #12620
Changes from all commits
0f8dbeb
f232e0c
41c00db
2a8e8df
adb5ca5
e16a59b
2ea691d
889e355
861ee8b
b3b6333
fb5bef8
befdd84
355e16d
f79582a
da40db9
2b09ebe
b488393
117663d
83c7100
3b28a8a
400c47d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,10 +21,10 @@ use crate::{ExecutionLimit, HwBench}; | |
use sc_telemetry::SysInfo; | ||
use sp_core::{sr25519, Pair}; | ||
use sp_io::crypto::sr25519_verify; | ||
use sp_std::{fmt, prelude::*}; | ||
use sp_std::{fmt, fmt::Formatter, prelude::*}; | ||
|
||
use rand::{seq::SliceRandom, Rng, RngCore}; | ||
use serde::Serializer; | ||
use serde::{de::Visitor, Deserialize, Deserializer, Serialize, Serializer}; | ||
use std::{ | ||
fs::File, | ||
io::{Seek, SeekFrom, Write}, | ||
|
@@ -33,6 +33,43 @@ use std::{ | |
time::{Duration, Instant}, | ||
}; | ||
|
||
/// A single hardware metric. | ||
#[derive(Deserialize, Serialize, Debug, Clone, Copy, PartialEq)] | ||
pub enum Metric { | ||
/// SR25519 signature verification. | ||
Sr25519Verify, | ||
/// Blake2-256 hashing algorithm. | ||
Blake2256, | ||
/// Copying data in RAM. | ||
MemCopy, | ||
/// Disk sequential write. | ||
DiskSeqWrite, | ||
/// Disk random write. | ||
DiskRndWrite, | ||
} | ||
|
||
impl Metric { | ||
/// The category of the metric. | ||
pub fn category(&self) -> &'static str { | ||
match self { | ||
Self::Sr25519Verify | Self::Blake2256 => "CPU", | ||
Self::MemCopy => "Memory", | ||
Self::DiskSeqWrite | Self::DiskRndWrite => "Disk", | ||
} | ||
} | ||
|
||
/// The name of the metric. It is always prefixed by the [`self.category()`]. | ||
pub fn name(&self) -> &'static str { | ||
match self { | ||
Self::Sr25519Verify => "SR25519-Verify", | ||
Self::Blake2256 => "BLAKE2-256", | ||
Self::MemCopy => "Copy", | ||
Self::DiskSeqWrite => "Seq Write", | ||
Self::DiskRndWrite => "Rnd Write", | ||
} | ||
} | ||
} | ||
|
||
/// The unit in which the [`Throughput`] (bytes per second) is denoted. | ||
pub enum Unit { | ||
GiBs, | ||
|
@@ -137,6 +174,54 @@ where | |
serializer.serialize_none() | ||
} | ||
|
||
/// Serializes throughput into MiBs and represents it as `f64`. | ||
fn serialize_throughput_as_f64<S>(throughput: &Throughput, serializer: S) -> Result<S::Ok, S::Error> | ||
where | ||
S: Serializer, | ||
{ | ||
serializer.serialize_f64(throughput.as_mibs()) | ||
} | ||
|
||
struct ThroughputVisitor; | ||
impl<'de> Visitor<'de> for ThroughputVisitor { | ||
type Value = Throughput; | ||
|
||
fn expecting(&self, formatter: &mut Formatter) -> fmt::Result { | ||
formatter.write_str("A value that is a f64.") | ||
} | ||
|
||
fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E> | ||
where | ||
E: serde::de::Error, | ||
{ | ||
Ok(Throughput::from_mibs(value)) | ||
} | ||
} | ||
|
||
fn deserialize_throughput<'de, D>(deserializer: D) -> Result<Throughput, D::Error> | ||
where | ||
D: Deserializer<'de>, | ||
{ | ||
Ok(deserializer.deserialize_f64(ThroughputVisitor))? | ||
} | ||
|
||
/// Multiple requirements for the hardware. | ||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] | ||
pub struct Requirements(pub Vec<Requirement>); | ||
|
||
/// A single requirement for the hardware. | ||
#[derive(Deserialize, Serialize, Debug, Clone, Copy, PartialEq)] | ||
pub struct Requirement { | ||
/// The metric to measure. | ||
pub metric: Metric, | ||
/// The minimal throughput that needs to be archived for this requirement. | ||
#[serde( | ||
serialize_with = "serialize_throughput_as_f64", | ||
deserialize_with = "deserialize_throughput" | ||
)] | ||
pub minimum: Throughput, | ||
} | ||
|
||
#[inline(always)] | ||
pub(crate) fn benchmark<E>( | ||
name: &str, | ||
|
@@ -503,8 +588,14 @@ pub fn benchmark_sr25519_verify(limit: ExecutionLimit) -> Throughput { | |
|
||
/// Benchmarks the hardware and returns the results of those benchmarks. | ||
/// | ||
/// Optionally accepts a path to a `scratch_directory` to use to benchmark the disk. | ||
pub fn gather_hwbench(scratch_directory: Option<&Path>) -> HwBench { | ||
/// Optionally accepts a path to a `scratch_directory` to use to benchmark the | ||
/// disk. Also accepts the `requirements` for the hardware benchmark and a | ||
/// boolean to specify if the node is an authority. | ||
pub fn gather_hwbench( | ||
Szegoo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
scratch_directory: Option<&Path>, | ||
requirements: Requirements, | ||
is_authority: bool, | ||
) -> HwBench { | ||
#[allow(unused_mut)] | ||
let mut hwbench = HwBench { | ||
cpu_hashrate_score: benchmark_cpu(DEFAULT_CPU_EXECUTION_LIMIT), | ||
|
@@ -534,9 +625,45 @@ pub fn gather_hwbench(scratch_directory: Option<&Path>) -> HwBench { | |
}; | ||
} | ||
|
||
if is_authority { | ||
ensure_requirements(hwbench.clone(), requirements); | ||
} | ||
|
||
hwbench | ||
} | ||
|
||
fn ensure_requirements(hwbench: HwBench, requirements: Requirements) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also slightly bad that we have the logic here again, but I think its okay. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function could just have been made public and return a bool. Everybody could then build its own logic. |
||
let mut failed = 0; | ||
for requirement in requirements.0.iter() { | ||
match requirement.metric { | ||
Metric::Blake2256 => | ||
if requirement.minimum > hwbench.cpu_hashrate_score { | ||
failed += 1; | ||
}, | ||
Metric::MemCopy => | ||
if requirement.minimum > hwbench.memory_memcpy_score { | ||
failed += 1; | ||
}, | ||
Metric::DiskSeqWrite => | ||
if let Some(score) = hwbench.disk_sequential_write_score { | ||
if requirement.minimum > score { | ||
failed += 1; | ||
} | ||
}, | ||
Metric::DiskRndWrite => | ||
if let Some(score) = hwbench.disk_random_write_score { | ||
if requirement.minimum > score { | ||
failed += 1; | ||
} | ||
}, | ||
Metric::Sr25519Verify => {}, | ||
} | ||
} | ||
if failed != 0 { | ||
log::warn!("⚠️ Your hardware performance score was less than expected for role 'Authority'. See https://wiki.polkadot.network/docs/maintain-guides-how-to-validate-polkadot#reference-hardware"); | ||
} | ||
Comment on lines
+662
to
+664
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why was this approved @ggwpez? Substrate != Polkadot! Please remove this link or the message entirely. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Removing seems better. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I mean the general idea of printing the url is fine. It should just not be done by Substrate with a Polkadot url :P There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yea I will move the printing into the CLI side of the code. |
||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Its a bit unfortunate that we have to pass this in twice now; once for the sysinfo and once for the machine benchmarking.
But I dont see how it could be done better.