From 828b925fc1a149c4de0e36ad471ee870d8c36065 Mon Sep 17 00:00:00 2001
From: Gaius <gaius.qi@gmail.com>
Date: Wed, 16 Oct 2024 22:44:36 +0800
Subject: [PATCH] feat: add health check for manager addr (#783)

Signed-off-by: Gaius <gaius.qi@gmail.com>
---
 dragonfly-client-config/src/dfdaemon.rs |  3 +++
 dragonfly-client/src/grpc/manager.rs    | 19 ++++++++++++++++---
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/dragonfly-client-config/src/dfdaemon.rs b/dragonfly-client-config/src/dfdaemon.rs
index f96d1cd8827..5810a0de123 100644
--- a/dragonfly-client-config/src/dfdaemon.rs
+++ b/dragonfly-client-config/src/dfdaemon.rs
@@ -514,6 +514,7 @@ impl UploadClient {
             let ca_cert = fs::read(&ca_cert_path).await?;
             let ca_cert = TonicCertificate::from_pem(ca_cert);
 
+            // TODO(gaius): Use trust_anchor to skip the verify of hostname.
             return Ok(Some(
                 ClientTlsConfig::new()
                     .domain_name(domain_name)
@@ -593,6 +594,7 @@ impl Manager {
             let ca_cert = fs::read(&ca_cert_path).await?;
             let ca_cert = TonicCertificate::from_pem(ca_cert);
 
+            // TODO(gaius): Use trust_anchor to skip the verify of hostname.
             return Ok(Some(
                 ClientTlsConfig::new()
                     .domain_name(domain_name)
@@ -675,6 +677,7 @@ impl Scheduler {
             let ca_cert = fs::read(&ca_cert_path).await?;
             let ca_cert = TonicCertificate::from_pem(ca_cert);
 
+            // TODO(gaius): Use trust_anchor to skip the verify of hostname.
             return Ok(Some(
                 ClientTlsConfig::new()
                     .domain_name(domain_name)
diff --git a/dragonfly-client/src/grpc/manager.rs b/dragonfly-client/src/grpc/manager.rs
index e867b853607..1bfb2be34ea 100644
--- a/dragonfly-client/src/grpc/manager.rs
+++ b/dragonfly-client/src/grpc/manager.rs
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+use crate::grpc::health::HealthClient;
 use dragonfly_api::manager::v2::{
     manager_client::ManagerClient as ManagerGRPCClient, DeleteSeedPeerRequest,
     ListSchedulersRequest, ListSchedulersResponse, SeedPeer, UpdateSeedPeerRequest,
@@ -25,6 +26,7 @@ use dragonfly_client_core::{
 };
 use std::sync::Arc;
 use tonic::transport::Channel;
+use tonic_health::pb::health_check_response::ServingStatus;
 use tracing::{error, instrument, warn};
 use url::Url;
 
@@ -48,11 +50,22 @@ impl ManagerClient {
             })?
             .to_string();
 
-        let channel = match config
+        let client_tls_config = config
             .manager
             .load_client_tls_config(domain_name.as_str())
-            .await?
-        {
+            .await?;
+
+        let health_client = HealthClient::new(addr.as_str(), client_tls_config.clone()).await?;
+        match health_client.check().await {
+            Ok(resp) => {
+                if resp.status != ServingStatus::Serving as i32 {
+                    return Err(Error::AvailableManagerNotFound);
+                }
+            }
+            Err(err) => return Err(err),
+        }
+
+        let channel = match client_tls_config {
             Some(client_tls_config) => Channel::from_shared(addr.clone())
                 .map_err(|_| Error::InvalidURI(addr.clone()))?
                 .tls_config(client_tls_config)?