triton-inference-server · rmccorm4 · Apr 29, 2023 · Apr 27, 2023 · Apr 27, 2023 · Apr 27, 2023
diff --git a/src/c++/library/http_client.cc b/src/c++/library/http_client.cc
@@ -1468,7 +1468,12 @@ InferenceServerHttpClient::Infer(
   if (curl_status == CURLE_OPERATION_TIMEDOUT) {
     sync_request->http_code_ = 499;
   } else if (curl_status != CURLE_OK) {
-    sync_request->http_code_ = 400;
+    if (verbose_) {
+      std::cout << "Curl failed with return code: " << curl_status << std::endl;
+    }
+    return Error(
+        "HTTP client failed [400]: " +
+        std::string(curl_easy_strerror(curl_status)));
   } else {
     curl_easy_getinfo(
         easy_handle_, CURLINFO_RESPONSE_CODE, &sync_request->http_code_);
@@ -1488,7 +1493,6 @@ InferenceServerHttpClient::Infer(
   return err;
 }
 
-
 Error
 InferenceServerHttpClient::AsyncInfer(
     OnCompleteFn callback, const InferOptions& options,

diff --git a/src/c++/tests/memory_leak_test.cc b/src/c++/tests/memory_leak_test.cc
@@ -76,7 +76,7 @@ ValidateShapeAndDatatype(
 void
 ValidateResult(
     const std::shared_ptr<tc::InferResult> result,
-    std::vector<int32_t>& input0_data)
+    const std::vector<int32_t>& input0_data)
 {
   // Validate the results...
   ValidateShapeAndDatatype("OUTPUT0", result);
@@ -105,70 +105,93 @@ ValidateResult(
   std::cout << result->DebugString() << std::endl;
 }
 
+void
+ValidateResponse(
+    std::shared_ptr<tc::InferResult> results_ptr,
+    const std::vector<int32_t>& input0_data)
+{
+  // Validate results
+  if (results_ptr->RequestStatus().IsOk()) {
+    ValidateResult(results_ptr, input0_data);
+  } else {
+    std::cerr << "error: Inference failed: " << results_ptr->RequestStatus()
+              << std::endl;
+    exit(1);
+  }
+}
 
+template <typename Client>
 void
-RunSynchronousInference(
+InferWithRetries(
+    const std::unique_ptr<Client>& client, tc::InferResult** results,
+    tc::InferOptions& options, std::vector<tc::InferInput*>& inputs,
+    std::vector<const tc::InferRequestedOutput*>& outputs)
+{
+  // Exit early if we succeed first try
+  auto err = client->Infer(results, options, inputs, outputs);
+  if (err.IsOk()) {
+    return;
+  }
+
+  // If the host runs out of available sockets due to TIME_WAIT, sleep and
+  // retry on failure to give time for sockets to become available.
+  int max_retries = 5;
+  int sleep_secs = 60;
+  bool success = false;
+  for (int i = 0; i < max_retries; i++) {
+    std::cerr << "Error: " << err << std::endl;
+    std::cerr << "Sleeping for " << sleep_secs
+              << " seconds and retrying. [Attempt: " << i + 1 << "/"
+              << max_retries << "]" << std::endl;
+    sleep(sleep_secs);
+
+    err = client->Infer(results, options, inputs, outputs);
+    if (err.IsOk()) {
+      success = true;
+      break;
+    }
+  }
+
+  if (!success) {
+    std::cerr << "error: Exceeded max tries [" << max_retries
+              << "] on inference without success" << std::endl;
+    exit(1);
+  }
+}
+
+// Client should be tc::InferenceServerHttpClient or
+// tc::InferenceServerGrpcClient
+template <typename Client>
+void
+RunSyncInfer(
     std::vector<tc::InferInput*>& inputs,
     std::vector<const tc::InferRequestedOutput*>& outputs,
     tc::InferOptions& options, std::vector<int32_t>& input0_data, bool reuse,
-    std::string url, bool verbose, std::string protocol, uint32_t repetitions)
+    std::string url, bool verbose, uint32_t repetitions)
 {
   // If re-use is enabled then use these client objects else use new objects for
   // each inference request.
-  std::unique_ptr<tc::InferenceServerGrpcClient> grpc_client_reuse;
-  std::unique_ptr<tc::InferenceServerHttpClient> http_client_reuse;
+  std::unique_ptr<Client> client_reuse;
+  if (reuse) {
+    FAIL_IF_ERR(
+        Client::Create(&client_reuse, url, verbose), "unable to create client");
+  }
 
   for (size_t i = 0; i < repetitions; ++i) {
     tc::InferResult* results;
-    if (!reuse) {
-      if (protocol == "grpc") {
-        std::unique_ptr<tc::InferenceServerGrpcClient> grpc_client;
-        FAIL_IF_ERR(
-            tc::InferenceServerGrpcClient::Create(&grpc_client, url, verbose),
-            "unable to create grpc client");
-        FAIL_IF_ERR(
-            grpc_client->Infer(&results, options, inputs, outputs),
-            "unable to run model");
-      } else {
-        std::unique_ptr<tc::InferenceServerHttpClient> http_client;
-        FAIL_IF_ERR(
-            tc::InferenceServerHttpClient::Create(&http_client, url, verbose),
-            "unable to create http client");
-        FAIL_IF_ERR(
-            http_client->Infer(&results, options, inputs, outputs),
-            "unable to run model");
-      }
+    if (reuse) {
+      FAIL_IF_ERR(
+          client_reuse->Infer(&results, options, inputs, outputs),
+          "unable to run model");
     } else {
-      if (protocol == "grpc") {
-        FAIL_IF_ERR(
-            tc::InferenceServerGrpcClient::Create(
-                &grpc_client_reuse, url, verbose),
-            "unable to create grpc client");
-        FAIL_IF_ERR(
-            grpc_client_reuse->Infer(&results, options, inputs, outputs),
-            "unable to run model");
-      } else {
-        FAIL_IF_ERR(
-            tc::InferenceServerHttpClient::Create(
-                &http_client_reuse, url, verbose),
-            "unable to create http client");
-        FAIL_IF_ERR(
-            http_client_reuse->Infer(&results, options, inputs, outputs),
-            "unable to run model");
-      }
+      std::unique_ptr<Client> client;
+      FAIL_IF_ERR(
+          Client::Create(&client, url, verbose), "unable to create client");
+      InferWithRetries<Client>(client, &results, options, inputs, outputs);
     }
 
-    std::shared_ptr<tc::InferResult> results_ptr;
-    results_ptr.reset(results);
-
-    // Validate results
-    if (results_ptr->RequestStatus().IsOk()) {
-      ValidateResult(results_ptr, input0_data);
-    } else {
-      std::cerr << "error: Inference failed: " << results_ptr->RequestStatus()
-                << std::endl;
-      exit(1);
-    }
+    std::shared_ptr<tc::InferResult> results_ptr(results);
+    ValidateResponse(results_ptr, input0_data);
   }
 }
 
@@ -186,6 +209,10 @@ Usage(char** argv, const std::string& msg = std::string())
   std::cerr << "\t-t <client timeout in microseconds>" << std::endl;
   std::cerr << "\t-r <number of repetitions for inference> default is 100."
             << std::endl;
+  std::cerr
+      << "\t-R Re-use the same client for each repetition. Without "
+         "this flag, the default is to create a new client on each repetition."
+      << std::endl;
   std::cerr << std::endl;
 
   exit(1);
@@ -293,9 +320,18 @@ main(int argc, char** argv)
   std::vector<const tc::InferRequestedOutput*> outputs = {output0_ptr.get()};
 
   // Send 'repetitions' number of inference requests to the inference server.
-  RunSynchronousInference(
-      inputs, outputs, options, input0_data, reuse, url, verbose, protocol,
-      repetitions);
+  if (protocol == "http") {
+    RunSyncInfer<tc::InferenceServerHttpClient>(
+        inputs, outputs, options, input0_data, reuse, url, verbose,
+        repetitions);
+  } else if (protocol == "grpc") {
+    RunSyncInfer<tc::InferenceServerGrpcClient>(
+        inputs, outputs, options, input0_data, reuse, url, verbose,
+        repetitions);
+  } else {
+    std::cerr << "Invalid protocol: " << protocol << std::endl;
+    return 1;
+  }
 
   return 0;
 }