From 655904ee8ce51851644bbd956092bfe8f4735ced Mon Sep 17 00:00:00 2001
From: Ryan McCormick <rmccormick@nvidia.com>
Date: Wed, 26 Apr 2023 18:29:36 -0700
Subject: [PATCH 1/4] (1) Refactor memory_leak_test: Fix re-use behavior,
 template helper functions. (2) Update http_client to propogate error on
 client-side failure

---
 src/c++/library/http_client.cc    |   8 +-
 src/c++/tests/memory_leak_test.cc | 142 ++++++++++++++++++------------
 2 files changed, 94 insertions(+), 56 deletions(-)
diff --git a/src/c++/library/http_client.cc b/src/c++/library/http_client.cc
index 6c5c7367e..cc6306f17 100644
--- a/src/c++/library/http_client.cc
+++ b/src/c++/library/http_client.cc
@@ -1468,7 +1468,12 @@ InferenceServerHttpClient::Infer(
   if (curl_status == CURLE_OPERATION_TIMEDOUT) {
     sync_request->http_code_ = 499;
   } else if (curl_status != CURLE_OK) {
-    sync_request->http_code_ = 400;
+    if (verbose_) {
+      std::cout << "Curl failed with return code: " << curl_status << std::endl;
+    }
+    return Error(
+        "HTTP client failed [400]: " +
+        std::string(curl_easy_strerror(curl_status)));
   } else {
     curl_easy_getinfo(
         easy_handle_, CURLINFO_RESPONSE_CODE, &sync_request->http_code_);
@@ -1488,7 +1493,6 @@ InferenceServerHttpClient::Infer(
   return err;
 }
 
-
 Error
 InferenceServerHttpClient::AsyncInfer(
     OnCompleteFn callback, const InferOptions& options,
diff --git a/src/c++/tests/memory_leak_test.cc b/src/c++/tests/memory_leak_test.cc
index 042d2299b..f49f8a38f 100644
--- a/src/c++/tests/memory_leak_test.cc
+++ b/src/c++/tests/memory_leak_test.cc
@@ -76,7 +76,7 @@ ValidateShapeAndDatatype(
 void
 ValidateResult(
     const std::shared_ptr<tc::InferResult> result,
-    std::vector<int32_t>& input0_data)
+    const std::vector<int32_t>& input0_data)
 {
   // Validate the results...
   ValidateShapeAndDatatype("OUTPUT0", result);
@@ -105,70 +105,91 @@ ValidateResult(
   std::cout << result->DebugString() << std::endl;
 }
 
+void
+ValidateResponse(
+    std::shared_ptr<tc::InferResult> results_ptr,
+    const std::vector<int32_t>& input0_data)
+{
+  // Validate results
+  if (results_ptr->RequestStatus().IsOk()) {
+    ValidateResult(results_ptr, input0_data);
+  } else {
+    std::cerr << "error: Inference failed: " << results_ptr->RequestStatus()
+              << std::endl;
+    exit(1);
+  }
+}
 
+template <typename T>
 void
-RunSynchronousInference(
+InferWithRetries(
+    const std::unique_ptr<T>& client, tc::InferResult** results,
+    tc::InferOptions& options, std::vector<tc::InferInput*>& inputs,
+    std::vector<const tc::InferRequestedOutput*>& outputs)
+{
+  // Exit early if we succeed first try
+  auto err = client->Infer(results, options, inputs, outputs);
+  if (err.IsOk()) {
+    return;
+  }
+
+  // If the host runs out of available sockets due to TIME_WAIT, sleep and
+  // retry on failure to give time for sockets to become available.
+  int max_retries = 5;
+  int sleep_secs = 60;
+  bool success = false;
+  for (int i = 0; i < max_retries; i++) {
+    std::cerr << "Error: " << err << std::endl;
+    std::cerr << "Sleeping for " << sleep_secs
+              << " seconds and retrying. [Attempt: " << i + 1 << "/"
+              << max_retries << "]" << std::endl;
+    sleep(sleep_secs);
+
+    err = client->Infer(results, options, inputs, outputs);
+    if (err.IsOk()) {
+      success = true;
+      break;
+    }
+  }
+
+  if (!success) {
+    std::cerr << "error: Exceeded max tries [" << max_retries
+              << "] on inference without success" << std::endl;
+    exit(1);
+  }
+}
+
+// T should be tc::InferenceServerHttpClient or tc::InferenceServerGrpcClient
+template <typename T>
+void
+RunSyncInfer(
     std::vector<tc::InferInput*>& inputs,
     std::vector<const tc::InferRequestedOutput*>& outputs,
     tc::InferOptions& options, std::vector<int32_t>& input0_data, bool reuse,
-    std::string url, bool verbose, std::string protocol, uint32_t repetitions)
+    std::string url, bool verbose, uint32_t repetitions)
 {
   // If re-use is enabled then use these client objects else use new objects for
   // each inference request.
-  std::unique_ptr<tc::InferenceServerGrpcClient> grpc_client_reuse;
-  std::unique_ptr<tc::InferenceServerHttpClient> http_client_reuse;
+  std::unique_ptr<T> client_reuse;
+  if (reuse) {
+    FAIL_IF_ERR(
+        T::Create(&client_reuse, url, verbose), "unable to create client");
+  }
 
   for (size_t i = 0; i < repetitions; ++i) {
     tc::InferResult* results;
-    if (!reuse) {
-      if (protocol == "grpc") {
-        std::unique_ptr<tc::InferenceServerGrpcClient> grpc_client;
-        FAIL_IF_ERR(
-            tc::InferenceServerGrpcClient::Create(&grpc_client, url, verbose),
-            "unable to create grpc client");
-        FAIL_IF_ERR(
-            grpc_client->Infer(&results, options, inputs, outputs),
-            "unable to run model");
-      } else {
-        std::unique_ptr<tc::InferenceServerHttpClient> http_client;
-        FAIL_IF_ERR(
-            tc::InferenceServerHttpClient::Create(&http_client, url, verbose),
-            "unable to create http client");
-        FAIL_IF_ERR(
-            http_client->Infer(&results, options, inputs, outputs),
-            "unable to run model");
-      }
+    if (reuse) {
+      FAIL_IF_ERR(
+          client_reuse->Infer(&results, options, inputs, outputs),
+          "unable to run model");
     } else {
-      if (protocol == "grpc") {
-        FAIL_IF_ERR(
-            tc::InferenceServerGrpcClient::Create(
-                &grpc_client_reuse, url, verbose),
-            "unable to create grpc client");
-        FAIL_IF_ERR(
-            grpc_client_reuse->Infer(&results, options, inputs, outputs),
-            "unable to run model");
-      } else {
-        FAIL_IF_ERR(
-            tc::InferenceServerHttpClient::Create(
-                &http_client_reuse, url, verbose),
-            "unable to create http client");
-        FAIL_IF_ERR(
-            http_client_reuse->Infer(&results, options, inputs, outputs),
-            "unable to run model");
-      }
+      std::unique_ptr<T> client;
+      FAIL_IF_ERR(T::Create(&client, url, verbose), "unable to create client");
+      InferWithRetries<T>(client, &results, options, inputs, outputs);
     }
 
-    std::shared_ptr<tc::InferResult> results_ptr;
-    results_ptr.reset(results);
-
-    // Validate results
-    if (results_ptr->RequestStatus().IsOk()) {
-      ValidateResult(results_ptr, input0_data);
-    } else {
-      std::cerr << "error: Inference failed: " << results_ptr->RequestStatus()
-                << std::endl;
-      exit(1);
-    }
+    std::shared_ptr<tc::InferResult> results_ptr(results);
+    ValidateResponse(results_ptr, input0_data);
   }
 }
 
@@ -186,6 +207,10 @@ Usage(char** argv, const std::string& msg = std::string())
   std::cerr << "\t-t <client timeout in microseconds>" << std::endl;
   std::cerr << "\t-r <number of repetitions for inference> default is 100."
             << std::endl;
+  std::cerr
+      << "\t-R Re-use the same client for each repetition. Without "
+         "this flag, the default is to create a new client on each repetition."
+      << std::endl;
   std::cerr << std::endl;
 
   exit(1);
@@ -293,9 +318,18 @@ main(int argc, char** argv)
   std::vector<const tc::InferRequestedOutput*> outputs = {output0_ptr.get()};
 
   // Send 'repetitions' number of inference requests to the inference server.
-  RunSynchronousInference(
-      inputs, outputs, options, input0_data, reuse, url, verbose, protocol,
-      repetitions);
+  if (protocol == "http") {
+    RunSyncInfer<tc::InferenceServerHttpClient>(
+        inputs, outputs, options, input0_data, reuse, url, verbose,
+        repetitions);
+  } else if (protocol == "grpc") {
+    RunSyncInfer<tc::InferenceServerGrpcClient>(
+        inputs, outputs, options, input0_data, reuse, url, verbose,
+        repetitions);
+  } else {
+    std::cerr << "Invalid protocol: " << protocol << std::endl;
+    return 1;
+  }
 
   return 0;
 }

From 1e6edd4e55421b74d1a8e36bd571d51917241206 Mon Sep 17 00:00:00 2001
From: Ryan McCormick <rmccormick@nvidia.com>
Date: Wed, 26 Apr 2023 19:58:47 -0700
Subject: [PATCH 2/4] template T -> Client

---
 src/c++/tests/memory_leak_test.cc | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/c++/tests/memory_leak_test.cc b/src/c++/tests/memory_leak_test.cc
index f49f8a38f..03e1d7d74 100644
--- a/src/c++/tests/memory_leak_test.cc
+++ b/src/c++/tests/memory_leak_test.cc
@@ -120,10 +120,10 @@ ValidateResponse(
   }
 }
 
-template <typename T>
+template <typename Client>
 void
 InferWithRetries(
-    const std::unique_ptr<T>& client, tc::InferResult** results,
+    const std::unique_ptr<Client>& client, tc::InferResult** results,
     tc::InferOptions& options, std::vector<tc::InferInput*>& inputs,
     std::vector<const tc::InferRequestedOutput*>& outputs)
 {
@@ -159,8 +159,9 @@ InferWithRetries(
   }
 }
 
-// T should be tc::InferenceServerHttpClient or tc::InferenceServerGrpcClient
-template <typename T>
+// Client should be tc::InferenceServerHttpClient or
+// tc::InferenceServerGrpcClient
+template <typename Client>
 void
 RunSyncInfer(
     std::vector<tc::InferInput*>& inputs,
@@ -170,10 +171,10 @@ RunSyncInfer(
 {
   // If re-use is enabled then use these client objects else use new objects for
   // each inference request.
-  std::unique_ptr<T> client_reuse;
+  std::unique_ptr<Client> client_reuse;
   if (reuse) {
     FAIL_IF_ERR(
-        T::Create(&client_reuse, url, verbose), "unable to create client");
+        Client::Create(&client_reuse, url, verbose), "unable to create client");
   }
 
   for (size_t i = 0; i < repetitions; ++i) {
@@ -183,9 +184,10 @@ RunSyncInfer(
           client_reuse->Infer(&results, options, inputs, outputs),
           "unable to run model");
     } else {
-      std::unique_ptr<T> client;
-      FAIL_IF_ERR(T::Create(&client, url, verbose), "unable to create client");
-      InferWithRetries<T>(client, &results, options, inputs, outputs);
+      std::unique_ptr<Client> client;
+      FAIL_IF_ERR(
+          Client::Create(&client, url, verbose), "unable to create client");
+      InferWithRetries<Client>(client, &results, options, inputs, outputs);
     }
 
     std::shared_ptr<tc::InferResult> results_ptr(results);

From bd31a941f4ba7ea72649ed93e43bd145ef061f31 Mon Sep 17 00:00:00 2001
From: Ryan McCormick <rmccormick@nvidia.com>
Date: Thu, 27 Apr 2023 15:26:43 -0700
Subject: [PATCH 3/4] Review feedback: Return error early on timeout, std::cerr
 errors, simplify reuse logic, clarify HTTP millisecond timeout limitation

---
 src/c++/library/common.h          |  8 ++++---
 src/c++/library/http_client.cc    | 14 ++++++------
 src/c++/tests/memory_leak_test.cc | 37 +++++++++----------------------
 3 files changed, 23 insertions(+), 36 deletions(-)

diff --git a/src/c++/library/common.h b/src/c++/library/common.h
index 2aa3a7879..5127ad281 100644
--- a/src/c++/library/common.h
+++ b/src/c++/library/common.h
@@ -209,13 +209,15 @@ struct InferOptions {
   /// https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#dynamic-batcher
   uint64_t server_timeout_;
   // The maximum end-to-end time, in microseconds, the request is allowed
-  // to take. Note the HTTP library only offer the precision upto
-  // milliseconds. The client will abort request when the specified time
-  // elapses. The request will return error with message "Deadline Exceeded".
+  // to take. The client will abort request when the specified time elapses.
+  // The request will return error with message "Deadline Exceeded".
   // The default value is 0 which means client will wait for the
   // response from the server. This option is not supported for streaming
   // requests. Instead see 'stream_timeout' argument in
   // InferenceServerGrpcClient::StartStream().
+  // NOTE: the HTTP client library only offers millisecond precision, so a
+  // timeout < 1000 microseconds will be rounded down to 0 milliseconds and have
+  // no effect.
   uint64_t client_timeout_;
 };
 
diff --git a/src/c++/library/http_client.cc b/src/c++/library/http_client.cc
index cc6306f17..c87c24d0b 100644
--- a/src/c++/library/http_client.cc
+++ b/src/c++/library/http_client.cc
@@ -1466,15 +1466,15 @@ InferenceServerHttpClient::Infer(
   // RECV_END will be set.
   auto curl_status = curl_easy_perform(easy_handle_);
   if (curl_status == CURLE_OPERATION_TIMEDOUT) {
-    sync_request->http_code_ = 499;
-  } else if (curl_status != CURLE_OK) {
-    if (verbose_) {
-      std::cout << "Curl failed with return code: " << curl_status << std::endl;
-    }
+    std::cerr << "Curl failed with return code: " << curl_status << std::endl;
     return Error(
-        "HTTP client failed [400]: " +
+        "HTTP client failed (Deadline Exceeded): " +
         std::string(curl_easy_strerror(curl_status)));
-  } else {
+  } else if (curl_status != CURLE_OK) {
+    std::cerr << "Curl failed with return code: " << curl_status << std::endl;
+    return Error(
+        "HTTP client failed: " + std::string(curl_easy_strerror(curl_status)));
+  } else {  // Success
     curl_easy_getinfo(
         easy_handle_, CURLINFO_RESPONSE_CODE, &sync_request->http_code_);
   }
diff --git a/src/c++/tests/memory_leak_test.cc b/src/c++/tests/memory_leak_test.cc
index 03e1d7d74..f70487b64 100644
--- a/src/c++/tests/memory_leak_test.cc
+++ b/src/c++/tests/memory_leak_test.cc
@@ -50,7 +50,7 @@ namespace {
 
 void
 ValidateShapeAndDatatype(
-    const std::string& name, std::shared_ptr<tc::InferResult> result)
+    const std::string& name, const std::shared_ptr<tc::InferResult> result)
 {
   std::vector<int64_t> shape;
   FAIL_IF_ERR(
@@ -107,7 +107,7 @@ ValidateResult(
 
 void
 ValidateResponse(
-    std::shared_ptr<tc::InferResult> results_ptr,
+    const std::shared_ptr<tc::InferResult> results_ptr,
     const std::vector<int32_t>& input0_data)
 {
   // Validate results
@@ -127,32 +127,24 @@ InferWithRetries(
     tc::InferOptions& options, std::vector<tc::InferInput*>& inputs,
     std::vector<const tc::InferRequestedOutput*>& outputs)
 {
-  // Exit early if we succeed first try
   auto err = client->Infer(results, options, inputs, outputs);
-  if (err.IsOk()) {
-    return;
-  }
 
   // If the host runs out of available sockets due to TIME_WAIT, sleep and
   // retry on failure to give time for sockets to become available.
   int max_retries = 5;
   int sleep_secs = 60;
-  bool success = false;
-  for (int i = 0; i < max_retries; i++) {
+  for (int i = 0; !err.IsOk() && i < max_retries; i++) {
     std::cerr << "Error: " << err << std::endl;
     std::cerr << "Sleeping for " << sleep_secs
               << " seconds and retrying. [Attempt: " << i + 1 << "/"
               << max_retries << "]" << std::endl;
     sleep(sleep_secs);
 
+    // Retry and break from loop on success
     err = client->Infer(results, options, inputs, outputs);
-    if (err.IsOk()) {
-      success = true;
-      break;
-    }
   }
 
-  if (!success) {
+  if (!err.IsOk()) {
     std::cerr << "error: Exceeded max tries [" << max_retries
               << "] on inference without success" << std::endl;
     exit(1);
@@ -171,25 +163,18 @@ RunSyncInfer(
 {
   // If re-use is enabled then use these client objects else use new objects for
   // each inference request.
-  std::unique_ptr<Client> client_reuse;
-  if (reuse) {
-    FAIL_IF_ERR(
-        Client::Create(&client_reuse, url, verbose), "unable to create client");
-  }
+  std::unique_ptr<Client> client;
+  FAIL_IF_ERR(Client::Create(&client, url, verbose), "unable to create client");
 
   for (size_t i = 0; i < repetitions; ++i) {
-    tc::InferResult* results;
-    if (reuse) {
-      FAIL_IF_ERR(
-          client_reuse->Infer(&results, options, inputs, outputs),
-          "unable to run model");
-    } else {
-      std::unique_ptr<Client> client;
+    if (!reuse) {
+      // Create new client connection on every request if reuse flag not set
       FAIL_IF_ERR(
           Client::Create(&client, url, verbose), "unable to create client");
-      InferWithRetries<Client>(client, &results, options, inputs, outputs);
     }
 
+    tc::InferResult* results;
+    InferWithRetries<Client>(client, &results, options, inputs, outputs);
     std::shared_ptr<tc::InferResult> results_ptr(results);
     ValidateResponse(results_ptr, input0_data);
   }

From 6d34b76634f30780068a020f52ada0727d4bfd57 Mon Sep 17 00:00:00 2001
From: Ryan McCormick <rmccormick@nvidia.com>
Date: Fri, 28 Apr 2023 10:12:19 -0700
Subject: [PATCH 4/4] Remove cerrs from feedback, update copyright

---
 src/c++/library/common.h          | 2 +-
 src/c++/library/http_client.cc    | 4 +---
 src/c++/tests/memory_leak_test.cc | 2 +-
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/c++/library/common.h b/src/c++/library/common.h
index 5127ad281..df36a2bdb 100644
--- a/src/c++/library/common.h
+++ b/src/c++/library/common.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
diff --git a/src/c++/library/http_client.cc b/src/c++/library/http_client.cc
index c87c24d0b..a254a4a84 100644
--- a/src/c++/library/http_client.cc
+++ b/src/c++/library/http_client.cc
@@ -1,4 +1,4 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -1466,12 +1466,10 @@ InferenceServerHttpClient::Infer(
   // RECV_END will be set.
   auto curl_status = curl_easy_perform(easy_handle_);
   if (curl_status == CURLE_OPERATION_TIMEDOUT) {
-    std::cerr << "Curl failed with return code: " << curl_status << std::endl;
     return Error(
         "HTTP client failed (Deadline Exceeded): " +
         std::string(curl_easy_strerror(curl_status)));
   } else if (curl_status != CURLE_OK) {
-    std::cerr << "Curl failed with return code: " << curl_status << std::endl;
     return Error(
         "HTTP client failed: " + std::string(curl_easy_strerror(curl_status)));
   } else {  // Success
diff --git a/src/c++/tests/memory_leak_test.cc b/src/c++/tests/memory_leak_test.cc
index f70487b64..829d07360 100644
--- a/src/c++/tests/memory_leak_test.cc
+++ b/src/c++/tests/memory_leak_test.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+// Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions