diff --git a/hstream-store/cbits/logdevice/ld_health_checker.cpp b/hstream-store/cbits/logdevice/ld_health_checker.cpp index c3f9e22a3..3500d7189 100644 --- a/hstream-store/cbits/logdevice/ld_health_checker.cpp +++ b/hstream-store/cbits/logdevice/ld_health_checker.cpp @@ -45,7 +45,7 @@ class LdChecker { auto duration = std::chrono::duration_cast(end - start) .count(); - ld_warning("GetClusterState took %ld ms", duration); + ld_debug("GetClusterState took %ld ms", duration); // getClusterState(*client_impl, *nodes_configuration); @@ -69,7 +69,7 @@ class LdChecker { if (!unhealthy_nodes_set.empty()) { ld_warning("Cluster has %lu unhealthy nodes:", unhealthy_nodes_set.size()); - // printUnhealthyNodes(*nodes_configuration, unhealthy_nodes_set); + printUnhealthyNodes(*nodes_configuration, unhealthy_nodes_set); } return unhealthy_nodes_set.size() <= unhealthy_node_limit; @@ -103,7 +103,7 @@ class LdChecker { res.node_id, res.addr.c_str(), st, deadNodes.c_str(), unhealthyNodes.c_str()); } - ld_warning("Check return unhealthy nodes: [%s]", + ld_warning("Check return unhealthy nodes index: [%s]", folly::join(',', sets).c_str()); } diff --git a/hstream/src/HStream/Server/CacheStore.hs b/hstream/src/HStream/Server/CacheStore.hs index 68984e3ce..49349d544 100644 --- a/hstream/src/HStream/Server/CacheStore.hs +++ b/hstream/src/HStream/Server/CacheStore.hs @@ -313,7 +313,7 @@ appendHStoreWithRetry ldClient shardId payload cmpStrategy dumpState = do loop cnt' _ -> loop exitNum | cnt == exitNum = do - Log.warning $ "Dump to shardId " <> Log.build shardId <> " failed because cache store is not dumping, will retry later." + Log.warning $ "Dump to shardId " <> Log.build shardId <> " failed because cache store is not in dumping state, will retry later." return Nothing | otherwise = do Log.fatal $ "Dump to shardId " <> Log.build shardId <> " failed after exausting the retry attempts, drop the record." diff --git a/hstream/src/HStream/Server/HealthMonitor.hs b/hstream/src/HStream/Server/HealthMonitor.hs index 5e8649271..e87461257 100644 --- a/hstream/src/HStream/Server/HealthMonitor.hs +++ b/hstream/src/HStream/Server/HealthMonitor.hs @@ -40,7 +40,7 @@ startMonitor :: ServerContext -> HealthMonitor -> Int -> IO () startMonitor sc hm delaySecond = forever $ do threadDelay $ delaySecond * 1000 * 1000 start <- getCurrentTime - Log.debug $ "========== docheck start..." <> " in " <> Log.build (show start) + -- Log.debug $ "========== docheck start..." <> " in " <> Log.build (show start) res <- try @SomeException $ docheck sc hm end <- getCurrentTime case res of @@ -49,9 +49,9 @@ startMonitor sc hm delaySecond = forever $ do let diff = nominalDiffTimeToSeconds $ diffUTCTime end start when (diff > 1) $ Log.warning $ "Monitor check return slow, total use " <> Log.build (show diff) <> "s" - Log.debug $ "========== docheck end..." <> " in " <> Log.build (show end) - <> ", with start time: " <> Log.build (show start) - <> ", duration: " <> Log.build (show diff) + Log.debug $ "Health monitor finish check in " <> Log.build (show end) + <> ", with start time: " <> Log.build (show start) + <> ", duration: " <> Log.build (show diff) docheck :: ServerContext -> HealthMonitor -> IO () docheck sc@ServerContext{..} hm = do @@ -80,10 +80,9 @@ checkLdCluster HealthMonitor{..} = do start <- getTime Monotonic res <- S.isLdClusterHealthy ldChecker ldUnhealthyNodesLimit end <- getTime Monotonic - let sDuration = toNanoSecs (diffTimeSpec end start) `div` 1000000 - if sDuration > 1000 - then Log.warning $ "CheckLdCluster slow, total time " <> Log.build sDuration <> "ms" - else Log.debug $ "Finish checkLdClusster, total time " <> Log.build sDuration <> "ms" + let msDuration = toNanoSecs (diffTimeSpec end start) `div` 1000000 + when (msDuration > 1000) $ + Log.warning $ "CheckLdCluster return slow, total time " <> Log.build msDuration <> "ms" return res checkMeta :: HealthMonitor -> IO Bool @@ -91,10 +90,9 @@ checkMeta HealthMonitor{..} | ZKHandle c <- metaHandle = do start <- getTime Monotonic res <- checkRecoverable =<< unsafeGetZHandle c end <- getTime Monotonic - let sDuration = toNanoSecs (diffTimeSpec end start) `div` 1000000 - if sDuration > 1000 - then Log.warning $ "CheckMeta slow, total time " <> Log.build sDuration <> "ms" - else Log.debug $ "Finish checkMeta, total time " <> Log.build sDuration <> "ms" + let msDuration = toNanoSecs (diffTimeSpec end start) `div` 1000000 + when (msDuration > 1000) $ + Log.warning $ "CheckMeta return slow, total time " <> Log.build msDuration <> "ms" return res checkMeta HealthMonitor{..} | _ <- metaHandle = do return True