From 655904ee8ce51851644bbd956092bfe8f4735ced Mon Sep 17 00:00:00 2001 From: Ryan McCormick Date: Wed, 26 Apr 2023 18:29:36 -0700 Subject: [PATCH 1/4] (1) Refactor memory_leak_test: Fix re-use behavior, template helper functions. (2) Update http_client to propogate error on client-side failure --- src/c++/library/http_client.cc | 8 +- src/c++/tests/memory_leak_test.cc | 142 ++++++++++++++++++------------ 2 files changed, 94 insertions(+), 56 deletions(-) diff --git a/src/c++/library/http_client.cc b/src/c++/library/http_client.cc index 6c5c7367e..cc6306f17 100644 --- a/src/c++/library/http_client.cc +++ b/src/c++/library/http_client.cc @@ -1468,7 +1468,12 @@ InferenceServerHttpClient::Infer( if (curl_status == CURLE_OPERATION_TIMEDOUT) { sync_request->http_code_ = 499; } else if (curl_status != CURLE_OK) { - sync_request->http_code_ = 400; + if (verbose_) { + std::cout << "Curl failed with return code: " << curl_status << std::endl; + } + return Error( + "HTTP client failed [400]: " + + std::string(curl_easy_strerror(curl_status))); } else { curl_easy_getinfo( easy_handle_, CURLINFO_RESPONSE_CODE, &sync_request->http_code_); @@ -1488,7 +1493,6 @@ InferenceServerHttpClient::Infer( return err; } - Error InferenceServerHttpClient::AsyncInfer( OnCompleteFn callback, const InferOptions& options, diff --git a/src/c++/tests/memory_leak_test.cc b/src/c++/tests/memory_leak_test.cc index 042d2299b..f49f8a38f 100644 --- a/src/c++/tests/memory_leak_test.cc +++ b/src/c++/tests/memory_leak_test.cc @@ -76,7 +76,7 @@ ValidateShapeAndDatatype( void ValidateResult( const std::shared_ptr result, - std::vector& input0_data) + const std::vector& input0_data) { // Validate the results... ValidateShapeAndDatatype("OUTPUT0", result); @@ -105,70 +105,91 @@ ValidateResult( std::cout << result->DebugString() << std::endl; } +void +ValidateResponse( + std::shared_ptr results_ptr, + const std::vector& input0_data) +{ + // Validate results + if (results_ptr->RequestStatus().IsOk()) { + ValidateResult(results_ptr, input0_data); + } else { + std::cerr << "error: Inference failed: " << results_ptr->RequestStatus() + << std::endl; + exit(1); + } +} +template void -RunSynchronousInference( +InferWithRetries( + const std::unique_ptr& client, tc::InferResult** results, + tc::InferOptions& options, std::vector& inputs, + std::vector& outputs) +{ + // Exit early if we succeed first try + auto err = client->Infer(results, options, inputs, outputs); + if (err.IsOk()) { + return; + } + + // If the host runs out of available sockets due to TIME_WAIT, sleep and + // retry on failure to give time for sockets to become available. + int max_retries = 5; + int sleep_secs = 60; + bool success = false; + for (int i = 0; i < max_retries; i++) { + std::cerr << "Error: " << err << std::endl; + std::cerr << "Sleeping for " << sleep_secs + << " seconds and retrying. [Attempt: " << i + 1 << "/" + << max_retries << "]" << std::endl; + sleep(sleep_secs); + + err = client->Infer(results, options, inputs, outputs); + if (err.IsOk()) { + success = true; + break; + } + } + + if (!success) { + std::cerr << "error: Exceeded max tries [" << max_retries + << "] on inference without success" << std::endl; + exit(1); + } +} + +// T should be tc::InferenceServerHttpClient or tc::InferenceServerGrpcClient +template +void +RunSyncInfer( std::vector& inputs, std::vector& outputs, tc::InferOptions& options, std::vector& input0_data, bool reuse, - std::string url, bool verbose, std::string protocol, uint32_t repetitions) + std::string url, bool verbose, uint32_t repetitions) { // If re-use is enabled then use these client objects else use new objects for // each inference request. - std::unique_ptr grpc_client_reuse; - std::unique_ptr http_client_reuse; + std::unique_ptr client_reuse; + if (reuse) { + FAIL_IF_ERR( + T::Create(&client_reuse, url, verbose), "unable to create client"); + } for (size_t i = 0; i < repetitions; ++i) { tc::InferResult* results; - if (!reuse) { - if (protocol == "grpc") { - std::unique_ptr grpc_client; - FAIL_IF_ERR( - tc::InferenceServerGrpcClient::Create(&grpc_client, url, verbose), - "unable to create grpc client"); - FAIL_IF_ERR( - grpc_client->Infer(&results, options, inputs, outputs), - "unable to run model"); - } else { - std::unique_ptr http_client; - FAIL_IF_ERR( - tc::InferenceServerHttpClient::Create(&http_client, url, verbose), - "unable to create http client"); - FAIL_IF_ERR( - http_client->Infer(&results, options, inputs, outputs), - "unable to run model"); - } + if (reuse) { + FAIL_IF_ERR( + client_reuse->Infer(&results, options, inputs, outputs), + "unable to run model"); } else { - if (protocol == "grpc") { - FAIL_IF_ERR( - tc::InferenceServerGrpcClient::Create( - &grpc_client_reuse, url, verbose), - "unable to create grpc client"); - FAIL_IF_ERR( - grpc_client_reuse->Infer(&results, options, inputs, outputs), - "unable to run model"); - } else { - FAIL_IF_ERR( - tc::InferenceServerHttpClient::Create( - &http_client_reuse, url, verbose), - "unable to create http client"); - FAIL_IF_ERR( - http_client_reuse->Infer(&results, options, inputs, outputs), - "unable to run model"); - } + std::unique_ptr client; + FAIL_IF_ERR(T::Create(&client, url, verbose), "unable to create client"); + InferWithRetries(client, &results, options, inputs, outputs); } - std::shared_ptr results_ptr; - results_ptr.reset(results); - - // Validate results - if (results_ptr->RequestStatus().IsOk()) { - ValidateResult(results_ptr, input0_data); - } else { - std::cerr << "error: Inference failed: " << results_ptr->RequestStatus() - << std::endl; - exit(1); - } + std::shared_ptr results_ptr(results); + ValidateResponse(results_ptr, input0_data); } } @@ -186,6 +207,10 @@ Usage(char** argv, const std::string& msg = std::string()) std::cerr << "\t-t " << std::endl; std::cerr << "\t-r default is 100." << std::endl; + std::cerr + << "\t-R Re-use the same client for each repetition. Without " + "this flag, the default is to create a new client on each repetition." + << std::endl; std::cerr << std::endl; exit(1); @@ -293,9 +318,18 @@ main(int argc, char** argv) std::vector outputs = {output0_ptr.get()}; // Send 'repetitions' number of inference requests to the inference server. - RunSynchronousInference( - inputs, outputs, options, input0_data, reuse, url, verbose, protocol, - repetitions); + if (protocol == "http") { + RunSyncInfer( + inputs, outputs, options, input0_data, reuse, url, verbose, + repetitions); + } else if (protocol == "grpc") { + RunSyncInfer( + inputs, outputs, options, input0_data, reuse, url, verbose, + repetitions); + } else { + std::cerr << "Invalid protocol: " << protocol << std::endl; + return 1; + } return 0; } From 1e6edd4e55421b74d1a8e36bd571d51917241206 Mon Sep 17 00:00:00 2001 From: Ryan McCormick Date: Wed, 26 Apr 2023 19:58:47 -0700 Subject: [PATCH 2/4] template T -> Client --- src/c++/tests/memory_leak_test.cc | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/c++/tests/memory_leak_test.cc b/src/c++/tests/memory_leak_test.cc index f49f8a38f..03e1d7d74 100644 --- a/src/c++/tests/memory_leak_test.cc +++ b/src/c++/tests/memory_leak_test.cc @@ -120,10 +120,10 @@ ValidateResponse( } } -template +template void InferWithRetries( - const std::unique_ptr& client, tc::InferResult** results, + const std::unique_ptr& client, tc::InferResult** results, tc::InferOptions& options, std::vector& inputs, std::vector& outputs) { @@ -159,8 +159,9 @@ InferWithRetries( } } -// T should be tc::InferenceServerHttpClient or tc::InferenceServerGrpcClient -template +// Client should be tc::InferenceServerHttpClient or +// tc::InferenceServerGrpcClient +template void RunSyncInfer( std::vector& inputs, @@ -170,10 +171,10 @@ RunSyncInfer( { // If re-use is enabled then use these client objects else use new objects for // each inference request. - std::unique_ptr client_reuse; + std::unique_ptr client_reuse; if (reuse) { FAIL_IF_ERR( - T::Create(&client_reuse, url, verbose), "unable to create client"); + Client::Create(&client_reuse, url, verbose), "unable to create client"); } for (size_t i = 0; i < repetitions; ++i) { @@ -183,9 +184,10 @@ RunSyncInfer( client_reuse->Infer(&results, options, inputs, outputs), "unable to run model"); } else { - std::unique_ptr client; - FAIL_IF_ERR(T::Create(&client, url, verbose), "unable to create client"); - InferWithRetries(client, &results, options, inputs, outputs); + std::unique_ptr client; + FAIL_IF_ERR( + Client::Create(&client, url, verbose), "unable to create client"); + InferWithRetries(client, &results, options, inputs, outputs); } std::shared_ptr results_ptr(results); From bd31a941f4ba7ea72649ed93e43bd145ef061f31 Mon Sep 17 00:00:00 2001 From: Ryan McCormick Date: Thu, 27 Apr 2023 15:26:43 -0700 Subject: [PATCH 3/4] Review feedback: Return error early on timeout, std::cerr errors, simplify reuse logic, clarify HTTP millisecond timeout limitation --- src/c++/library/common.h | 8 ++++--- src/c++/library/http_client.cc | 14 ++++++------ src/c++/tests/memory_leak_test.cc | 37 +++++++++---------------------- 3 files changed, 23 insertions(+), 36 deletions(-) diff --git a/src/c++/library/common.h b/src/c++/library/common.h index 2aa3a7879..5127ad281 100644 --- a/src/c++/library/common.h +++ b/src/c++/library/common.h @@ -209,13 +209,15 @@ struct InferOptions { /// https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#dynamic-batcher uint64_t server_timeout_; // The maximum end-to-end time, in microseconds, the request is allowed - // to take. Note the HTTP library only offer the precision upto - // milliseconds. The client will abort request when the specified time - // elapses. The request will return error with message "Deadline Exceeded". + // to take. The client will abort request when the specified time elapses. + // The request will return error with message "Deadline Exceeded". // The default value is 0 which means client will wait for the // response from the server. This option is not supported for streaming // requests. Instead see 'stream_timeout' argument in // InferenceServerGrpcClient::StartStream(). + // NOTE: the HTTP client library only offers millisecond precision, so a + // timeout < 1000 microseconds will be rounded down to 0 milliseconds and have + // no effect. uint64_t client_timeout_; }; diff --git a/src/c++/library/http_client.cc b/src/c++/library/http_client.cc index cc6306f17..c87c24d0b 100644 --- a/src/c++/library/http_client.cc +++ b/src/c++/library/http_client.cc @@ -1466,15 +1466,15 @@ InferenceServerHttpClient::Infer( // RECV_END will be set. auto curl_status = curl_easy_perform(easy_handle_); if (curl_status == CURLE_OPERATION_TIMEDOUT) { - sync_request->http_code_ = 499; - } else if (curl_status != CURLE_OK) { - if (verbose_) { - std::cout << "Curl failed with return code: " << curl_status << std::endl; - } + std::cerr << "Curl failed with return code: " << curl_status << std::endl; return Error( - "HTTP client failed [400]: " + + "HTTP client failed (Deadline Exceeded): " + std::string(curl_easy_strerror(curl_status))); - } else { + } else if (curl_status != CURLE_OK) { + std::cerr << "Curl failed with return code: " << curl_status << std::endl; + return Error( + "HTTP client failed: " + std::string(curl_easy_strerror(curl_status))); + } else { // Success curl_easy_getinfo( easy_handle_, CURLINFO_RESPONSE_CODE, &sync_request->http_code_); } diff --git a/src/c++/tests/memory_leak_test.cc b/src/c++/tests/memory_leak_test.cc index 03e1d7d74..f70487b64 100644 --- a/src/c++/tests/memory_leak_test.cc +++ b/src/c++/tests/memory_leak_test.cc @@ -50,7 +50,7 @@ namespace { void ValidateShapeAndDatatype( - const std::string& name, std::shared_ptr result) + const std::string& name, const std::shared_ptr result) { std::vector shape; FAIL_IF_ERR( @@ -107,7 +107,7 @@ ValidateResult( void ValidateResponse( - std::shared_ptr results_ptr, + const std::shared_ptr results_ptr, const std::vector& input0_data) { // Validate results @@ -127,32 +127,24 @@ InferWithRetries( tc::InferOptions& options, std::vector& inputs, std::vector& outputs) { - // Exit early if we succeed first try auto err = client->Infer(results, options, inputs, outputs); - if (err.IsOk()) { - return; - } // If the host runs out of available sockets due to TIME_WAIT, sleep and // retry on failure to give time for sockets to become available. int max_retries = 5; int sleep_secs = 60; - bool success = false; - for (int i = 0; i < max_retries; i++) { + for (int i = 0; !err.IsOk() && i < max_retries; i++) { std::cerr << "Error: " << err << std::endl; std::cerr << "Sleeping for " << sleep_secs << " seconds and retrying. [Attempt: " << i + 1 << "/" << max_retries << "]" << std::endl; sleep(sleep_secs); + // Retry and break from loop on success err = client->Infer(results, options, inputs, outputs); - if (err.IsOk()) { - success = true; - break; - } } - if (!success) { + if (!err.IsOk()) { std::cerr << "error: Exceeded max tries [" << max_retries << "] on inference without success" << std::endl; exit(1); @@ -171,25 +163,18 @@ RunSyncInfer( { // If re-use is enabled then use these client objects else use new objects for // each inference request. - std::unique_ptr client_reuse; - if (reuse) { - FAIL_IF_ERR( - Client::Create(&client_reuse, url, verbose), "unable to create client"); - } + std::unique_ptr client; + FAIL_IF_ERR(Client::Create(&client, url, verbose), "unable to create client"); for (size_t i = 0; i < repetitions; ++i) { - tc::InferResult* results; - if (reuse) { - FAIL_IF_ERR( - client_reuse->Infer(&results, options, inputs, outputs), - "unable to run model"); - } else { - std::unique_ptr client; + if (!reuse) { + // Create new client connection on every request if reuse flag not set FAIL_IF_ERR( Client::Create(&client, url, verbose), "unable to create client"); - InferWithRetries(client, &results, options, inputs, outputs); } + tc::InferResult* results; + InferWithRetries(client, &results, options, inputs, outputs); std::shared_ptr results_ptr(results); ValidateResponse(results_ptr, input0_data); } From 6d34b76634f30780068a020f52ada0727d4bfd57 Mon Sep 17 00:00:00 2001 From: Ryan McCormick Date: Fri, 28 Apr 2023 10:12:19 -0700 Subject: [PATCH 4/4] Remove cerrs from feedback, update copyright --- src/c++/library/common.h | 2 +- src/c++/library/http_client.cc | 4 +--- src/c++/tests/memory_leak_test.cc | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/c++/library/common.h b/src/c++/library/common.h index 5127ad281..df36a2bdb 100644 --- a/src/c++/library/common.h +++ b/src/c++/library/common.h @@ -1,4 +1,4 @@ -// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions diff --git a/src/c++/library/http_client.cc b/src/c++/library/http_client.cc index c87c24d0b..a254a4a84 100644 --- a/src/c++/library/http_client.cc +++ b/src/c++/library/http_client.cc @@ -1,4 +1,4 @@ -// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -1466,12 +1466,10 @@ InferenceServerHttpClient::Infer( // RECV_END will be set. auto curl_status = curl_easy_perform(easy_handle_); if (curl_status == CURLE_OPERATION_TIMEDOUT) { - std::cerr << "Curl failed with return code: " << curl_status << std::endl; return Error( "HTTP client failed (Deadline Exceeded): " + std::string(curl_easy_strerror(curl_status))); } else if (curl_status != CURLE_OK) { - std::cerr << "Curl failed with return code: " << curl_status << std::endl; return Error( "HTTP client failed: " + std::string(curl_easy_strerror(curl_status))); } else { // Success diff --git a/src/c++/tests/memory_leak_test.cc b/src/c++/tests/memory_leak_test.cc index f70487b64..829d07360 100644 --- a/src/c++/tests/memory_leak_test.cc +++ b/src/c++/tests/memory_leak_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +// Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions