diff --git a/io-engine/src/bdev/nexus/nexus_persistence.rs b/io-engine/src/bdev/nexus/nexus_persistence.rs index 2f3e6837a..92d49b36c 100644 --- a/io-engine/src/bdev/nexus/nexus_persistence.rs +++ b/io-engine/src/bdev/nexus/nexus_persistence.rs @@ -104,9 +104,12 @@ impl<'n> Nexus<'n> { }; nexus_info.children.push(child_info); }); - // We started with this child because it was healthy in etcd, or isn't there at all. - // Being unhealthy here means it is undergoing a fault/retire before nexus is open. - if nexus_info.children.len() == 1 && !nexus_info.children[0].healthy { + // We started with this child because it was healthy in etcd, or + // isn't there at all. Being unhealthy here + // means it is undergoing a fault/retire before nexus is open. + if nexus_info.children.len() == 1 + && !nexus_info.children[0].healthy + { warn!("{self:?} Not persisting: the only child went unhealthy during nexus creation"); return Err(Error::NexusCreate { name: self.name.clone(), @@ -224,6 +227,7 @@ impl<'n> Nexus<'n> { }; let mut retry = PersistentStore::retries(); + let mut logged = false; loop { let Err(err) = PersistentStore::put(&key, &info.inner).await else { trace!(?key, "{self:?}: the state was saved successfully"); @@ -238,10 +242,13 @@ impl<'n> Nexus<'n> { }); } - error!( - "{self:?}: failed to persist nexus information, \ - will retry ({retry} left): {err}" - ); + if !logged { + error!( + "{self:?}: failed to persist nexus information, \ + will retry ({retry} left): {err}" + ); + logged = true; + } // Allow some time for the connection to the persistent // store to be re-established before retrying the operation. diff --git a/io-engine/src/core/env.rs b/io-engine/src/core/env.rs index e7e7e3f2c..703524656 100644 --- a/io-engine/src/core/env.rs +++ b/io-engine/src/core/env.rs @@ -183,7 +183,7 @@ pub struct MayastorCliArgs { )] /// Persistent store timeout. pub ps_timeout: Duration, - #[clap(long = "ps-retries", default_value = "30")] + #[clap(long = "ps-retries", default_value = "300")] /// Persistent store operation retries. pub ps_retries: u8, #[clap(long = "bdev-pool-size", default_value = "65535")]