Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve L0_client_memory_growth test #303

Merged
merged 4 commits into from
Apr 29, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions src/c++/library/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,13 +209,15 @@ struct InferOptions {
/// https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#dynamic-batcher
uint64_t server_timeout_;
// The maximum end-to-end time, in microseconds, the request is allowed
// to take. Note the HTTP library only offer the precision upto
// milliseconds. The client will abort request when the specified time
// elapses. The request will return error with message "Deadline Exceeded".
// to take. The client will abort request when the specified time elapses.
// The request will return error with message "Deadline Exceeded".
// The default value is 0 which means client will wait for the
// response from the server. This option is not supported for streaming
// requests. Instead see 'stream_timeout' argument in
// InferenceServerGrpcClient::StartStream().
// NOTE: the HTTP client library only offers millisecond precision, so a
// timeout < 1000 microseconds will be rounded down to 0 milliseconds and have
// no effect.
uint64_t client_timeout_;
};

Expand Down
12 changes: 8 additions & 4 deletions src/c++/library/http_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1466,10 +1466,15 @@ InferenceServerHttpClient::Infer(
// RECV_END will be set.
auto curl_status = curl_easy_perform(easy_handle_);
if (curl_status == CURLE_OPERATION_TIMEDOUT) {
sync_request->http_code_ = 499;
std::cerr << "Curl failed with return code: " << curl_status << std::endl;
rmccorm4 marked this conversation as resolved.
Show resolved Hide resolved
return Error(
"HTTP client failed (Deadline Exceeded): " +
rmccorm4 marked this conversation as resolved.
Show resolved Hide resolved
std::string(curl_easy_strerror(curl_status)));
} else if (curl_status != CURLE_OK) {
sync_request->http_code_ = 400;
} else {
std::cerr << "Curl failed with return code: " << curl_status << std::endl;
return Error(
"HTTP client failed: " + std::string(curl_easy_strerror(curl_status)));
} else { // Success
curl_easy_getinfo(
easy_handle_, CURLINFO_RESPONSE_CODE, &sync_request->http_code_);
}
Expand All @@ -1488,7 +1493,6 @@ InferenceServerHttpClient::Infer(
return err;
}


Error
InferenceServerHttpClient::AsyncInfer(
OnCompleteFn callback, const InferOptions& options,
Expand Down
133 changes: 77 additions & 56 deletions src/c++/tests/memory_leak_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ namespace {

void
ValidateShapeAndDatatype(
const std::string& name, std::shared_ptr<tc::InferResult> result)
const std::string& name, const std::shared_ptr<tc::InferResult> result)
{
std::vector<int64_t> shape;
FAIL_IF_ERR(
Expand All @@ -76,7 +76,7 @@ ValidateShapeAndDatatype(
void
ValidateResult(
const std::shared_ptr<tc::InferResult> result,
std::vector<int32_t>& input0_data)
const std::vector<int32_t>& input0_data)
{
// Validate the results...
ValidateShapeAndDatatype("OUTPUT0", result);
Expand Down Expand Up @@ -105,70 +105,78 @@ ValidateResult(
std::cout << result->DebugString() << std::endl;
}

void
ValidateResponse(
const std::shared_ptr<tc::InferResult> results_ptr,
const std::vector<int32_t>& input0_data)
{
// Validate results
if (results_ptr->RequestStatus().IsOk()) {
ValidateResult(results_ptr, input0_data);
} else {
std::cerr << "error: Inference failed: " << results_ptr->RequestStatus()
<< std::endl;
exit(1);
}
}

template <typename Client>
void
InferWithRetries(
const std::unique_ptr<Client>& client, tc::InferResult** results,
tc::InferOptions& options, std::vector<tc::InferInput*>& inputs,
rmccorm4 marked this conversation as resolved.
Show resolved Hide resolved
std::vector<const tc::InferRequestedOutput*>& outputs)
{
auto err = client->Infer(results, options, inputs, outputs);

// If the host runs out of available sockets due to TIME_WAIT, sleep and
// retry on failure to give time for sockets to become available.
int max_retries = 5;
int sleep_secs = 60;
for (int i = 0; !err.IsOk() && i < max_retries; i++) {
std::cerr << "Error: " << err << std::endl;
std::cerr << "Sleeping for " << sleep_secs
<< " seconds and retrying. [Attempt: " << i + 1 << "/"
<< max_retries << "]" << std::endl;
sleep(sleep_secs);

// Retry and break from loop on success
err = client->Infer(results, options, inputs, outputs);
}

if (!err.IsOk()) {
std::cerr << "error: Exceeded max tries [" << max_retries
<< "] on inference without success" << std::endl;
exit(1);
}
}

// Client should be tc::InferenceServerHttpClient or
// tc::InferenceServerGrpcClient
template <typename Client>
void
RunSynchronousInference(
RunSyncInfer(
std::vector<tc::InferInput*>& inputs,
std::vector<const tc::InferRequestedOutput*>& outputs,
tc::InferOptions& options, std::vector<int32_t>& input0_data, bool reuse,
std::string url, bool verbose, std::string protocol, uint32_t repetitions)
std::string url, bool verbose, uint32_t repetitions)
{
// If re-use is enabled then use these client objects else use new objects for
// each inference request.
std::unique_ptr<tc::InferenceServerGrpcClient> grpc_client_reuse;
std::unique_ptr<tc::InferenceServerHttpClient> http_client_reuse;
std::unique_ptr<Client> client;
FAIL_IF_ERR(Client::Create(&client, url, verbose), "unable to create client");

for (size_t i = 0; i < repetitions; ++i) {
tc::InferResult* results;
if (!reuse) {
if (protocol == "grpc") {
std::unique_ptr<tc::InferenceServerGrpcClient> grpc_client;
FAIL_IF_ERR(
tc::InferenceServerGrpcClient::Create(&grpc_client, url, verbose),
"unable to create grpc client");
FAIL_IF_ERR(
grpc_client->Infer(&results, options, inputs, outputs),
"unable to run model");
} else {
std::unique_ptr<tc::InferenceServerHttpClient> http_client;
FAIL_IF_ERR(
tc::InferenceServerHttpClient::Create(&http_client, url, verbose),
"unable to create http client");
FAIL_IF_ERR(
http_client->Infer(&results, options, inputs, outputs),
"unable to run model");
}
} else {
if (protocol == "grpc") {
FAIL_IF_ERR(
tc::InferenceServerGrpcClient::Create(
&grpc_client_reuse, url, verbose),
"unable to create grpc client");
FAIL_IF_ERR(
grpc_client_reuse->Infer(&results, options, inputs, outputs),
"unable to run model");
} else {
FAIL_IF_ERR(
tc::InferenceServerHttpClient::Create(
&http_client_reuse, url, verbose),
"unable to create http client");
FAIL_IF_ERR(
http_client_reuse->Infer(&results, options, inputs, outputs),
"unable to run model");
}
// Create new client connection on every request if reuse flag not set
FAIL_IF_ERR(
Client::Create(&client, url, verbose), "unable to create client");
}

std::shared_ptr<tc::InferResult> results_ptr;
results_ptr.reset(results);

// Validate results
if (results_ptr->RequestStatus().IsOk()) {
ValidateResult(results_ptr, input0_data);
} else {
std::cerr << "error: Inference failed: " << results_ptr->RequestStatus()
<< std::endl;
exit(1);
}
tc::InferResult* results;
InferWithRetries<Client>(client, &results, options, inputs, outputs);
std::shared_ptr<tc::InferResult> results_ptr(results);
ValidateResponse(results_ptr, input0_data);
}
}

Expand All @@ -186,6 +194,10 @@ Usage(char** argv, const std::string& msg = std::string())
std::cerr << "\t-t <client timeout in microseconds>" << std::endl;
std::cerr << "\t-r <number of repetitions for inference> default is 100."
<< std::endl;
std::cerr
<< "\t-R Re-use the same client for each repetition. Without "
"this flag, the default is to create a new client on each repetition."
<< std::endl;
std::cerr << std::endl;

exit(1);
Expand Down Expand Up @@ -293,9 +305,18 @@ main(int argc, char** argv)
std::vector<const tc::InferRequestedOutput*> outputs = {output0_ptr.get()};

// Send 'repetitions' number of inference requests to the inference server.
RunSynchronousInference(
inputs, outputs, options, input0_data, reuse, url, verbose, protocol,
repetitions);
if (protocol == "http") {
RunSyncInfer<tc::InferenceServerHttpClient>(
rmccorm4 marked this conversation as resolved.
Show resolved Hide resolved
inputs, outputs, options, input0_data, reuse, url, verbose,
repetitions);
} else if (protocol == "grpc") {
RunSyncInfer<tc::InferenceServerGrpcClient>(
inputs, outputs, options, input0_data, reuse, url, verbose,
repetitions);
} else {
std::cerr << "Invalid protocol: " << protocol << std::endl;
return 1;
}

return 0;
}