Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add response statistics #6869

Merged
merged 15 commits into from
Feb 17, 2024
Merged
185 changes: 185 additions & 0 deletions qa/L0_response_statistics/response_statistics_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
#!/usr/bin/env python3

# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import time
import unittest

import numpy as np
import tritonclient.grpc as grpcclient
import tritonclient.http as httpclient


class TestResponseStatistics(unittest.TestCase):
def setUp(self):
self._model_name = "square_int32"
self._min_infer_delay_ns = 400000000
self._min_output_delay_ns = 200000000
self._number_of_fail_responses = 2
self._number_of_empty_responses = 1
self._statistics_counts = []
self._grpc_client = grpcclient.InferenceServerClient(
"localhost:8001", verbose=True
)
self._http_client = httpclient.InferenceServerClient("localhost:8000")

def _generate_streaming_callback_and_response_pair(self):
oandreeva-nv marked this conversation as resolved.
Show resolved Hide resolved
response = [] # [{"result": result, "error": error}, ...]
oandreeva-nv marked this conversation as resolved.
Show resolved Hide resolved

def callback(result, error):
response.append({"result": result, "error": error})

return callback, response

def _stream_infer(self, number_of_responses):
callback, responses = self._generate_streaming_callback_and_response_pair()
self._grpc_client.start_stream(callback)
input_data = np.array([number_of_responses], dtype=np.int32)
inputs = [grpcclient.InferInput("IN", input_data.shape, "INT32")]
inputs[0].set_data_from_numpy(input_data)
outputs = [grpcclient.InferRequestedOutput("OUT")]
self._grpc_client.async_stream_infer(
model_name=self._model_name, inputs=inputs, outputs=outputs
)
while len(responses) < (number_of_responses - self._number_of_empty_responses):
time.sleep(0.1) # poll until all expected responses are received
self._grpc_client.stop_stream()
return responses

def _update_statistics_counts(self, current_index, number_of_responses):
if current_index >= len(self._statistics_counts):
self._statistics_counts.append(
{
"compute_infer": 0,
"compute_output": 0,
"success": 0,
"fail": 0,
"empty_response": 0,
}
)
if (
current_index
+ self._number_of_fail_responses
+ self._number_of_empty_responses
< number_of_responses
):
# success
self._statistics_counts[current_index]["compute_infer"] += 1
self._statistics_counts[current_index]["compute_output"] += 1
self._statistics_counts[current_index]["success"] += 1
elif current_index + self._number_of_empty_responses < number_of_responses:
# fail
self._statistics_counts[current_index]["compute_infer"] += 1
self._statistics_counts[current_index]["compute_output"] += 1
self._statistics_counts[current_index]["fail"] += 1
else:
# empty
self._statistics_counts[current_index]["compute_infer"] += 1
self._statistics_counts[current_index]["empty_response"] += 1

def _check_statistics_count_and_duration(
self, response_stats, current_index, stats_name
):
expected_count = self._statistics_counts[current_index][stats_name]
if stats_name == "compute_infer" or stats_name == "empty_response":
delay_ns = self._min_infer_delay_ns
elif stats_name == "compute_output":
delay_ns = self._min_output_delay_ns
else: # success or fail
delay_ns = self._min_infer_delay_ns + self._min_output_delay_ns
upper_bound_ns = 1.1 * delay_ns * expected_count
lower_bound_ns = 0.9 * delay_ns * expected_count
stats = response_stats[str(current_index)][stats_name]
self.assertEqual(stats["count"], expected_count)
self.assertLessEqual(stats["ns"], upper_bound_ns)
self.assertGreaterEqual(stats["ns"], lower_bound_ns)

def _get_response_statistics(self):
# http response statistics
statistics_http = self._http_client.get_inference_statistics(
model_name=self._model_name
)
model_stats_http = statistics_http["model_stats"][0]
self.assertEqual(model_stats_http["name"], self._model_name)
response_stats_http = model_stats_http["response_stats"]
# grpc response statistics
statistics_grpc = self._grpc_client.get_inference_statistics(
model_name=self._model_name, as_json=True
)
model_stats_grpc = statistics_grpc["model_stats"][0]
self.assertEqual(model_stats_grpc["name"], self._model_name)
response_stats_grpc = model_stats_grpc["response_stats"]
# check equivalent between http and grpc statistics
self.assertEqual(len(response_stats_http), len(response_stats_grpc))
for idx, statistics_http in response_stats_http.items():
self.assertIn(idx, response_stats_grpc)
statistics_grpc = response_stats_grpc[idx]
for name, stats_http in statistics_http.items():
self.assertIn(name, statistics_grpc)
stats_grpc = statistics_grpc[name]
# normalize gRPC statistics to http
stats_grpc["count"] = (
int(stats_grpc["count"]) if ("count" in stats_grpc) else 0
)
stats_grpc["ns"] = int(stats_grpc["ns"]) if ("ns" in stats_grpc) else 0
# check equal
self.assertEqual(stats_http, stats_grpc)
return response_stats_http

def _check_response_stats(self, responses, number_of_responses):
response_stats = self._get_response_statistics()
self.assertGreaterEqual(len(response_stats), number_of_responses)
for i in range(number_of_responses):
self._update_statistics_counts(i, number_of_responses)
self._check_statistics_count_and_duration(
response_stats, i, "compute_infer"
)
self._check_statistics_count_and_duration(
response_stats, i, "compute_output"
)
self._check_statistics_count_and_duration(response_stats, i, "success")
self._check_statistics_count_and_duration(response_stats, i, "fail")
self._check_statistics_count_and_duration(
response_stats, i, "empty_response"
)

def test_response_statistics(self):
oandreeva-nv marked this conversation as resolved.
Show resolved Hide resolved
number_of_responses = 4
responses = self._stream_infer(number_of_responses)
self._check_response_stats(responses, number_of_responses)

number_of_responses = 6
responses = self._stream_infer(number_of_responses)
self._check_response_stats(responses, number_of_responses)

number_of_responses = 3
responses = self._stream_infer(number_of_responses)
self._check_response_stats(responses, number_of_responses)


if __name__ == "__main__":
unittest.main()
89 changes: 89 additions & 0 deletions qa/L0_response_statistics/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/bin/bash
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
echo -e "Repository version must be specified"
echo -e "\n***\n*** Test Failed\n***"
exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

export CUDA_VISIBLE_DEVICES=0

SERVER=/opt/tritonserver/bin/tritonserver
source ../common/util.sh

RET=0

rm -rf models && mkdir models
mkdir -p models/square_int32/1 && (cd models/square_int32 && \
echo 'name: "square_int32"' >> config.pbtxt && \
oandreeva-nv marked this conversation as resolved.
Show resolved Hide resolved
echo 'backend: "square"' >> config.pbtxt && \
echo 'max_batch_size: 0' >> config.pbtxt && \
echo 'model_transaction_policy { decoupled: True }' >> config.pbtxt && \
echo -e 'input [{ name: "IN" \n data_type: TYPE_INT32 \n dims: [ 1 ] }]' >> config.pbtxt && \
echo -e 'output [{ name: "OUT" \n data_type: TYPE_INT32 \n dims: [ 1 ] }]' >> config.pbtxt && \
echo -e 'parameters [{ key: "CUSTOM_INFER_DELAY_NS" \n value: { string_value: "400000000" } }]' >> config.pbtxt && \
echo -e 'parameters [{ key: "CUSTOM_OUTPUT_DELAY_NS" \n value: { string_value: "200000000" } }]' >> config.pbtxt && \
echo -e 'parameters [{ key: "CUSTOM_FAIL_COUNT" \n value: { string_value: "2" } }]' >> config.pbtxt && \
echo -e 'parameters [{ key: "CUSTOM_EMPTY_COUNT" \n value: { string_value: "1" } }]' >> config.pbtxt)

TEST_LOG="response_statistics_test.log"
SERVER_LOG="./response_statistics_test.server.log"

SERVER_ARGS="--model-repository=`pwd`/models"
run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
cat $SERVER_LOG
exit 1
fi

set +e
python response_statistics_test.py > $TEST_LOG 2>&1
if [ $? -ne 0 ]; then
echo -e "\n***\n*** Failed response statistics test\n***"
cat $TEST_LOG
RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
echo -e "\n***\n*** Test Passed\n***"
else
echo -e "\n***\n*** Test FAILED\n***"
fi
exit $RET
92 changes: 91 additions & 1 deletion src/grpc/grpc_server.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -1100,6 +1100,96 @@ CommonHandler::RegisterModelStatistics()
ucnt);
}

{
triton::common::TritonJson::Value responses_json;
err = model_stat.MemberAsObject("response_stats", &responses_json);
GOTO_IF_ERR(err, earlyexit);

std::vector<std::string> keys;
err = responses_json.Members(&keys);
GOTO_IF_ERR(err, earlyexit);

for (const auto& key : keys) {
triton::common::TritonJson::Value res_json;
err = responses_json.MemberAsObject(key.c_str(), &res_json);
GOTO_IF_ERR(err, earlyexit);

inference::InferResponseStatistics res;

{
triton::common::TritonJson::Value stat_json;
err = res_json.MemberAsObject("compute_infer", &stat_json);
GOTO_IF_ERR(err, earlyexit);

uint64_t val;
err = stat_json.MemberAsUInt("count", &val);
GOTO_IF_ERR(err, earlyexit);
res.mutable_compute_infer()->set_count(val);
err = stat_json.MemberAsUInt("ns", &val);
GOTO_IF_ERR(err, earlyexit);
res.mutable_compute_infer()->set_ns(val);
}

{
triton::common::TritonJson::Value stat_json;
err = res_json.MemberAsObject("compute_output", &stat_json);
GOTO_IF_ERR(err, earlyexit);

uint64_t val;
err = stat_json.MemberAsUInt("count", &val);
GOTO_IF_ERR(err, earlyexit);
res.mutable_compute_output()->set_count(val);
err = stat_json.MemberAsUInt("ns", &val);
GOTO_IF_ERR(err, earlyexit);
res.mutable_compute_output()->set_ns(val);
}

{
triton::common::TritonJson::Value stat_json;
err = res_json.MemberAsObject("success", &stat_json);
GOTO_IF_ERR(err, earlyexit);

uint64_t val;
err = stat_json.MemberAsUInt("count", &val);
GOTO_IF_ERR(err, earlyexit);
res.mutable_success()->set_count(val);
err = stat_json.MemberAsUInt("ns", &val);
GOTO_IF_ERR(err, earlyexit);
res.mutable_success()->set_ns(val);
}

{
triton::common::TritonJson::Value stat_json;
err = res_json.MemberAsObject("fail", &stat_json);
GOTO_IF_ERR(err, earlyexit);

uint64_t val;
err = stat_json.MemberAsUInt("count", &val);
GOTO_IF_ERR(err, earlyexit);
res.mutable_fail()->set_count(val);
err = stat_json.MemberAsUInt("ns", &val);
GOTO_IF_ERR(err, earlyexit);
res.mutable_fail()->set_ns(val);
}

{
triton::common::TritonJson::Value stat_json;
err = res_json.MemberAsObject("empty_response", &stat_json);
GOTO_IF_ERR(err, earlyexit);

uint64_t val;
err = stat_json.MemberAsUInt("count", &val);
GOTO_IF_ERR(err, earlyexit);
res.mutable_empty_response()->set_count(val);
err = stat_json.MemberAsUInt("ns", &val);
GOTO_IF_ERR(err, earlyexit);
res.mutable_empty_response()->set_ns(val);
}

(*statistics->mutable_response_stats())[key] = std::move(res);
}
}

triton::common::TritonJson::Value batches_json;
err = model_stat.MemberAsArray("batch_stats", &batches_json);
GOTO_IF_ERR(err, earlyexit);
Expand Down
Loading