diff --git a/docs/protocol/extension_statistics.md b/docs/protocol/extension_statistics.md index 4da4140657..4ff956b60a 100644 --- a/docs/protocol/extension_statistics.md +++ b/docs/protocol/extension_statistics.md @@ -195,7 +195,8 @@ $response_stats = "compute_output" : $duration_stat, "success" : $duration_stat, "fail" : $duration_stat, - "empty_response" : $duration_stat + "empty_response" : $duration_stat, + "cancel" : $duration_stat } ``` @@ -208,6 +209,8 @@ $response_stats = is the sum of infer and output durations. - "empty_response" : The count and cumulative duration of an inference with an empty / no response. The duration is infer durations. +- "cancel" : The count and cumulative duration of a inference cancellation. The + duration is for cleaning up resources held by cancelled inference requests. ``` diff --git a/qa/L0_response_statistics/response_statistics_test.py b/qa/L0_response_statistics/response_statistics_test.py index b04403bfb3..64f2d4fb68 100755 --- a/qa/L0_response_statistics/response_statistics_test.py +++ b/qa/L0_response_statistics/response_statistics_test.py @@ -36,11 +36,12 @@ class TestResponseStatistics(unittest.TestCase): def setUp(self): - self._model_name = "square_int32" - self._min_infer_delay_ns = 400000000 - self._min_output_delay_ns = 200000000 - self._number_of_fail_responses = 2 - self._number_of_empty_responses = 1 + self._model_name = "set_by_test_case" + self._min_infer_delay_ns = 0 + self._min_output_delay_ns = 0 + self._min_cancel_delay_ns = 0 + self._number_of_fail_responses = 0 + self._number_of_empty_responses = 0 self._statistics_counts = [] self._grpc_client = grpcclient.InferenceServerClient( "localhost:8001", verbose=True @@ -59,8 +60,10 @@ def callback(result, error): # Send an infer request and return its responses. 'number_of_responses' is the sum # of success, fail and empty responses the model should return for this request. - # This function waits until all success and fail responses are received. - def _stream_infer(self, number_of_responses): + # 'cancel_at_response_size' will cancel the stream when the number of responses + # received equals the size, set to None if cancellation is not required. This + # function waits until all success and fail responses are received, or cancelled. + def _stream_infer(self, number_of_responses, cancel_at_response_size=None): callback, responses = self._generate_streaming_callback_and_response_pair() self._grpc_client.start_stream(callback) input_data = np.array([number_of_responses], dtype=np.int32) @@ -70,15 +73,27 @@ def _stream_infer(self, number_of_responses): self._grpc_client.async_stream_infer( model_name=self._model_name, inputs=inputs, outputs=outputs ) - while len(responses) < (number_of_responses - self._number_of_empty_responses): - time.sleep(0.1) # poll until all expected responses are received - self._grpc_client.stop_stream() + if cancel_at_response_size is None: + # poll until all expected responses are received + while len(responses) < ( + number_of_responses - self._number_of_empty_responses + ): + time.sleep(0.1) + self._grpc_client.stop_stream(cancel_requests=False) + else: + # poll until cancellation response size is reached + while len(responses) < cancel_at_response_size: + time.sleep(0.1) + self._grpc_client.stop_stream(cancel_requests=True) return responses # Update expected statistics counts for the response at 'current_index'. # 'number_of_responses' is the sum of success, fail and empty responses expected - # from this inference request. - def _update_statistics_counts(self, current_index, number_of_responses): + # from this inference request. 'cancel_at_index' is the index at which the request + # should be cancelled. + def _update_statistics_counts( + self, current_index, number_of_responses, cancel_at_index + ): if current_index >= len(self._statistics_counts): self._statistics_counts.append( { @@ -87,9 +102,13 @@ def _update_statistics_counts(self, current_index, number_of_responses): "success": 0, "fail": 0, "empty_response": 0, + "cancel": 0, } ) - if ( + if current_index == cancel_at_index: + # cancel + self._statistics_counts[current_index]["cancel"] += 1 + elif ( current_index + self._number_of_fail_responses + self._number_of_empty_responses @@ -118,10 +137,16 @@ def _check_statistics_count_and_duration( delay_ns = self._min_infer_delay_ns elif stats_name == "compute_output": delay_ns = self._min_output_delay_ns + elif stats_name == "cancel": + delay_ns = self._min_cancel_delay_ns else: # success or fail delay_ns = self._min_infer_delay_ns + self._min_output_delay_ns - upper_bound_ns = 1.1 * delay_ns * expected_count - lower_bound_ns = 0.9 * delay_ns * expected_count + if delay_ns == 0: + upper_bound_ns = 10000000 * expected_count + lower_bound_ns = 0 + else: + upper_bound_ns = 1.1 * delay_ns * expected_count + lower_bound_ns = 0.9 * delay_ns * expected_count stats = response_stats[str(current_index)][stats_name] self.assertEqual(stats["count"], expected_count) self.assertLessEqual(stats["ns"], upper_bound_ns) @@ -162,12 +187,14 @@ def _get_response_statistics(self): return response_stats_http # Check the response statistics is valid for a given infer request, providing its - # 'responses' and 'number_of_responses'. - def _check_response_stats(self, responses, number_of_responses): + # 'responses', expected 'number_of_responses' and 'cancel_at_index'. + def _check_response_stats( + self, responses, number_of_responses, cancel_at_index=None + ): response_stats = self._get_response_statistics() self.assertGreaterEqual(len(response_stats), number_of_responses) for i in range(number_of_responses): - self._update_statistics_counts(i, number_of_responses) + self._update_statistics_counts(i, number_of_responses, cancel_at_index) self._check_statistics_count_and_duration( response_stats, i, "compute_infer" ) @@ -179,24 +206,57 @@ def _check_response_stats(self, responses, number_of_responses): self._check_statistics_count_and_duration( response_stats, i, "empty_response" ) + self._check_statistics_count_and_duration(response_stats, i, "cancel") # Test response statistics. The statistics must be valid over two or more infers. def test_response_statistics(self): + self._model_name = "square_int32" + self._min_infer_delay_ns = 400000000 + self._min_output_delay_ns = 200000000 + self._number_of_fail_responses = 2 + self._number_of_empty_responses = 1 # Send a request that generates 4 responses. number_of_responses = 4 responses = self._stream_infer(number_of_responses) self._check_response_stats(responses, number_of_responses) - # Send a request that generates 6 responses, and make sure the - # statistics are aggregated with the previous request. + # Send a request that generates 6 responses, and make sure the statistics are + # aggregated with the previous request. number_of_responses = 6 responses = self._stream_infer(number_of_responses) self._check_response_stats(responses, number_of_responses) - # Send a request that generates 3 responses, and make sure the - # statistics are aggregated with the previous requests. + # Send a request that generates 3 responses, and make sure the statistics are + # aggregated with the previous requests. number_of_responses = 3 responses = self._stream_infer(number_of_responses) self._check_response_stats(responses, number_of_responses) + # Test response statistics with cancellation. + def test_response_statistics_cancel(self): + self._model_name = "square_int32_slow" + self._min_infer_delay_ns = 1200000000 + self._min_output_delay_ns = 800000000 + self._min_cancel_delay_ns = 400000000 + + # Send a request that generates 4 responses. + number_of_responses = 4 + responses = self._stream_infer(number_of_responses) + self._check_response_stats(responses, number_of_responses) + + # Send a request that generates 4 responses, and cancel on the 3rd response. + # Make sure the statistics are aggregated with the previous request. + responses = self._stream_infer(number_of_responses=4, cancel_at_response_size=1) + # There is an infer and output delay on the 1st and 2nd response, and a cancel + # delay on the 3rd response. + min_total_delay_ns = ( + self._min_infer_delay_ns + self._min_output_delay_ns + ) * 2 + self._min_cancel_delay_ns + # Make sure the inference and cancellation is completed before checking. + time.sleep(min_total_delay_ns * 1.5 / 1000000000) + # The request is cancelled when the 2nd response is computing, so the + # cancellation should be received at the 3rd response (index 2), making a total + # of 3 responses on the statistics. + self._check_response_stats(responses, number_of_responses=3, cancel_at_index=2) + if __name__ == "__main__": unittest.main() diff --git a/qa/L0_response_statistics/test.sh b/qa/L0_response_statistics/test.sh index eae900a9e9..b91e3bbde1 100755 --- a/qa/L0_response_statistics/test.sh +++ b/qa/L0_response_statistics/test.sh @@ -56,6 +56,15 @@ mkdir -p models/square_int32/1 && (cd models/square_int32 && \ echo -e 'parameters [{ key: "CUSTOM_OUTPUT_DELAY_NS" \n value: { string_value: "200000000" } }]' >> config.pbtxt && \ echo -e 'parameters [{ key: "CUSTOM_FAIL_COUNT" \n value: { string_value: "2" } }]' >> config.pbtxt && \ echo -e 'parameters [{ key: "CUSTOM_EMPTY_COUNT" \n value: { string_value: "1" } }]' >> config.pbtxt) +mkdir -p models/square_int32_slow/1 && (cd models/square_int32_slow && \ + echo 'backend: "square"' >> config.pbtxt && \ + echo 'max_batch_size: 0' >> config.pbtxt && \ + echo 'model_transaction_policy { decoupled: True }' >> config.pbtxt && \ + echo -e 'input [{ name: "IN" \n data_type: TYPE_INT32 \n dims: [ 1 ] }]' >> config.pbtxt && \ + echo -e 'output [{ name: "OUT" \n data_type: TYPE_INT32 \n dims: [ 1 ] }]' >> config.pbtxt && \ + echo -e 'parameters [{ key: "CUSTOM_INFER_DELAY_NS" \n value: { string_value: "1200000000" } }]' >> config.pbtxt && \ + echo -e 'parameters [{ key: "CUSTOM_OUTPUT_DELAY_NS" \n value: { string_value: "800000000" } }]' >> config.pbtxt && \ + echo -e 'parameters [{ key: "CUSTOM_CANCEL_DELAY_NS" \n value: { string_value: "400000000" } }]' >> config.pbtxt) TEST_LOG="response_statistics_test.log" SERVER_LOG="./response_statistics_test.server.log" diff --git a/src/grpc/grpc_server.cc b/src/grpc/grpc_server.cc index 187272217d..63e2854208 100644 --- a/src/grpc/grpc_server.cc +++ b/src/grpc/grpc_server.cc @@ -300,6 +300,13 @@ class CommonHandler : public HandlerBase { void RegisterRepositoryModelLoad(); void RegisterRepositoryModelUnload(); + // Set count and cumulative duration for 'RegisterModelStatistics()' + template + TRITONSERVER_Error* SetStatisticsDuration( + triton::common::TritonJson::Value& statistics_json, + const std::string& statistics_name, + PBTYPE* mutable_statistics_duration_protobuf) const; + const std::string name_; std::shared_ptr tritonserver_; @@ -968,136 +975,43 @@ CommonHandler::RegisterModelStatistics() GOTO_IF_ERR(err, earlyexit); statistics->set_execution_count(ucnt); - triton::common::TritonJson::Value infer_stats_json; - err = model_stat.MemberAsObject("inference_stats", &infer_stats_json); - GOTO_IF_ERR(err, earlyexit); - - { - triton::common::TritonJson::Value success_json; - err = infer_stats_json.MemberAsObject("success", &success_json); - GOTO_IF_ERR(err, earlyexit); - - err = success_json.MemberAsUInt("count", &ucnt); - GOTO_IF_ERR(err, earlyexit); - statistics->mutable_inference_stats()->mutable_success()->set_count( - ucnt); - err = success_json.MemberAsUInt("ns", &ucnt); - GOTO_IF_ERR(err, earlyexit); - statistics->mutable_inference_stats()->mutable_success()->set_ns( - ucnt); - } - - { - triton::common::TritonJson::Value fail_json; - err = infer_stats_json.MemberAsObject("fail", &fail_json); - GOTO_IF_ERR(err, earlyexit); - - err = fail_json.MemberAsUInt("count", &ucnt); - GOTO_IF_ERR(err, earlyexit); - statistics->mutable_inference_stats()->mutable_fail()->set_count( - ucnt); - err = fail_json.MemberAsUInt("ns", &ucnt); - GOTO_IF_ERR(err, earlyexit); - statistics->mutable_inference_stats()->mutable_fail()->set_ns(ucnt); - } - - { - triton::common::TritonJson::Value queue_json; - err = infer_stats_json.MemberAsObject("queue", &queue_json); - GOTO_IF_ERR(err, earlyexit); - - err = queue_json.MemberAsUInt("count", &ucnt); - GOTO_IF_ERR(err, earlyexit); - statistics->mutable_inference_stats()->mutable_queue()->set_count( - ucnt); - err = queue_json.MemberAsUInt("ns", &ucnt); - GOTO_IF_ERR(err, earlyexit); - statistics->mutable_inference_stats()->mutable_queue()->set_ns(ucnt); - } - { - triton::common::TritonJson::Value compute_input_json; - err = infer_stats_json.MemberAsObject( - "compute_input", &compute_input_json); + triton::common::TritonJson::Value infer_stats_json; + err = model_stat.MemberAsObject("inference_stats", &infer_stats_json); GOTO_IF_ERR(err, earlyexit); - err = compute_input_json.MemberAsUInt("count", &ucnt); + err = SetStatisticsDuration( + infer_stats_json, "success", + statistics->mutable_inference_stats()->mutable_success()); GOTO_IF_ERR(err, earlyexit); - statistics->mutable_inference_stats() - ->mutable_compute_input() - ->set_count(ucnt); - err = compute_input_json.MemberAsUInt("ns", &ucnt); + err = SetStatisticsDuration( + infer_stats_json, "fail", + statistics->mutable_inference_stats()->mutable_fail()); GOTO_IF_ERR(err, earlyexit); - statistics->mutable_inference_stats() - ->mutable_compute_input() - ->set_ns(ucnt); - } - - { - triton::common::TritonJson::Value compute_infer_json; - err = infer_stats_json.MemberAsObject( - "compute_infer", &compute_infer_json); + err = SetStatisticsDuration( + infer_stats_json, "queue", + statistics->mutable_inference_stats()->mutable_queue()); GOTO_IF_ERR(err, earlyexit); - - err = compute_infer_json.MemberAsUInt("count", &ucnt); + err = SetStatisticsDuration( + infer_stats_json, "compute_input", + statistics->mutable_inference_stats()->mutable_compute_input()); GOTO_IF_ERR(err, earlyexit); - statistics->mutable_inference_stats() - ->mutable_compute_infer() - ->set_count(ucnt); - err = compute_infer_json.MemberAsUInt("ns", &ucnt); + err = SetStatisticsDuration( + infer_stats_json, "compute_infer", + statistics->mutable_inference_stats()->mutable_compute_infer()); GOTO_IF_ERR(err, earlyexit); - statistics->mutable_inference_stats() - ->mutable_compute_infer() - ->set_ns(ucnt); - } - - { - triton::common::TritonJson::Value compute_output_json; - err = infer_stats_json.MemberAsObject( - "compute_output", &compute_output_json); + err = SetStatisticsDuration( + infer_stats_json, "compute_output", + statistics->mutable_inference_stats()->mutable_compute_output()); GOTO_IF_ERR(err, earlyexit); - - err = compute_output_json.MemberAsUInt("count", &ucnt); + err = SetStatisticsDuration( + infer_stats_json, "cache_hit", + statistics->mutable_inference_stats()->mutable_cache_hit()); GOTO_IF_ERR(err, earlyexit); - statistics->mutable_inference_stats() - ->mutable_compute_output() - ->set_count(ucnt); - err = compute_output_json.MemberAsUInt("ns", &ucnt); + err = SetStatisticsDuration( + infer_stats_json, "cache_miss", + statistics->mutable_inference_stats()->mutable_cache_miss()); GOTO_IF_ERR(err, earlyexit); - statistics->mutable_inference_stats() - ->mutable_compute_output() - ->set_ns(ucnt); - } - - { - triton::common::TritonJson::Value cache_hit_json; - err = infer_stats_json.MemberAsObject("cache_hit", &cache_hit_json); - GOTO_IF_ERR(err, earlyexit); - - err = cache_hit_json.MemberAsUInt("count", &ucnt); - GOTO_IF_ERR(err, earlyexit); - statistics->mutable_inference_stats()->mutable_cache_hit()->set_count( - ucnt); - err = cache_hit_json.MemberAsUInt("ns", &ucnt); - GOTO_IF_ERR(err, earlyexit); - statistics->mutable_inference_stats()->mutable_cache_hit()->set_ns( - ucnt); - } - - { - triton::common::TritonJson::Value cache_miss_json; - err = infer_stats_json.MemberAsObject("cache_miss", &cache_miss_json); - GOTO_IF_ERR(err, earlyexit); - - err = cache_miss_json.MemberAsUInt("count", &ucnt); - GOTO_IF_ERR(err, earlyexit); - statistics->mutable_inference_stats() - ->mutable_cache_miss() - ->set_count(ucnt); - err = cache_miss_json.MemberAsUInt("ns", &ucnt); - GOTO_IF_ERR(err, earlyexit); - statistics->mutable_inference_stats()->mutable_cache_miss()->set_ns( - ucnt); } { @@ -1116,167 +1030,90 @@ CommonHandler::RegisterModelStatistics() inference::InferResponseStatistics res; - { - triton::common::TritonJson::Value stat_json; - err = res_json.MemberAsObject("compute_infer", &stat_json); - GOTO_IF_ERR(err, earlyexit); - - uint64_t val; - err = stat_json.MemberAsUInt("count", &val); - GOTO_IF_ERR(err, earlyexit); - res.mutable_compute_infer()->set_count(val); - err = stat_json.MemberAsUInt("ns", &val); - GOTO_IF_ERR(err, earlyexit); - res.mutable_compute_infer()->set_ns(val); - } - - { - triton::common::TritonJson::Value stat_json; - err = res_json.MemberAsObject("compute_output", &stat_json); - GOTO_IF_ERR(err, earlyexit); - - uint64_t val; - err = stat_json.MemberAsUInt("count", &val); - GOTO_IF_ERR(err, earlyexit); - res.mutable_compute_output()->set_count(val); - err = stat_json.MemberAsUInt("ns", &val); - GOTO_IF_ERR(err, earlyexit); - res.mutable_compute_output()->set_ns(val); - } - - { - triton::common::TritonJson::Value stat_json; - err = res_json.MemberAsObject("success", &stat_json); - GOTO_IF_ERR(err, earlyexit); - - uint64_t val; - err = stat_json.MemberAsUInt("count", &val); - GOTO_IF_ERR(err, earlyexit); - res.mutable_success()->set_count(val); - err = stat_json.MemberAsUInt("ns", &val); - GOTO_IF_ERR(err, earlyexit); - res.mutable_success()->set_ns(val); - } - - { - triton::common::TritonJson::Value stat_json; - err = res_json.MemberAsObject("fail", &stat_json); - GOTO_IF_ERR(err, earlyexit); - - uint64_t val; - err = stat_json.MemberAsUInt("count", &val); - GOTO_IF_ERR(err, earlyexit); - res.mutable_fail()->set_count(val); - err = stat_json.MemberAsUInt("ns", &val); - GOTO_IF_ERR(err, earlyexit); - res.mutable_fail()->set_ns(val); - } - - { - triton::common::TritonJson::Value stat_json; - err = res_json.MemberAsObject("empty_response", &stat_json); - GOTO_IF_ERR(err, earlyexit); - - uint64_t val; - err = stat_json.MemberAsUInt("count", &val); - GOTO_IF_ERR(err, earlyexit); - res.mutable_empty_response()->set_count(val); - err = stat_json.MemberAsUInt("ns", &val); - GOTO_IF_ERR(err, earlyexit); - res.mutable_empty_response()->set_ns(val); - } + err = SetStatisticsDuration( + res_json, "compute_infer", res.mutable_compute_infer()); + GOTO_IF_ERR(err, earlyexit); + err = SetStatisticsDuration( + res_json, "compute_output", res.mutable_compute_output()); + GOTO_IF_ERR(err, earlyexit); + err = SetStatisticsDuration( + res_json, "success", res.mutable_success()); + GOTO_IF_ERR(err, earlyexit); + err = SetStatisticsDuration(res_json, "fail", res.mutable_fail()); + GOTO_IF_ERR(err, earlyexit); + err = SetStatisticsDuration( + res_json, "empty_response", res.mutable_empty_response()); + GOTO_IF_ERR(err, earlyexit); + err = + SetStatisticsDuration(res_json, "cancel", res.mutable_cancel()); + GOTO_IF_ERR(err, earlyexit); (*statistics->mutable_response_stats())[key] = std::move(res); } } - triton::common::TritonJson::Value batches_json; - err = model_stat.MemberAsArray("batch_stats", &batches_json); - GOTO_IF_ERR(err, earlyexit); - - for (size_t idx = 0; idx < batches_json.ArraySize(); ++idx) { - triton::common::TritonJson::Value batch_stat; - err = batches_json.IndexAsObject(idx, &batch_stat); - GOTO_IF_ERR(err, earlyexit); - - auto batch_statistics = statistics->add_batch_stats(); - - uint64_t ucnt; - err = batch_stat.MemberAsUInt("batch_size", &ucnt); + { + triton::common::TritonJson::Value batches_json; + err = model_stat.MemberAsArray("batch_stats", &batches_json); GOTO_IF_ERR(err, earlyexit); - batch_statistics->set_batch_size(ucnt); - { - triton::common::TritonJson::Value compute_input_json; - err = - batch_stat.MemberAsObject("compute_input", &compute_input_json); + for (size_t idx = 0; idx < batches_json.ArraySize(); ++idx) { + triton::common::TritonJson::Value batch_stat; + err = batches_json.IndexAsObject(idx, &batch_stat); GOTO_IF_ERR(err, earlyexit); - err = compute_input_json.MemberAsUInt("count", &ucnt); - GOTO_IF_ERR(err, earlyexit); - batch_statistics->mutable_compute_input()->set_count(ucnt); - err = compute_input_json.MemberAsUInt("ns", &ucnt); - GOTO_IF_ERR(err, earlyexit); - batch_statistics->mutable_compute_input()->set_ns(ucnt); - } - - { - triton::common::TritonJson::Value compute_infer_json; - err = - batch_stat.MemberAsObject("compute_infer", &compute_infer_json); - GOTO_IF_ERR(err, earlyexit); + auto batch_statistics = statistics->add_batch_stats(); - err = compute_infer_json.MemberAsUInt("count", &ucnt); - GOTO_IF_ERR(err, earlyexit); - batch_statistics->mutable_compute_infer()->set_count(ucnt); - err = compute_infer_json.MemberAsUInt("ns", &ucnt); + uint64_t ucnt; + err = batch_stat.MemberAsUInt("batch_size", &ucnt); GOTO_IF_ERR(err, earlyexit); - batch_statistics->mutable_compute_infer()->set_ns(ucnt); - } + batch_statistics->set_batch_size(ucnt); - { - triton::common::TritonJson::Value compute_output_json; - err = batch_stat.MemberAsObject( - "compute_output", &compute_output_json); + err = SetStatisticsDuration( + batch_stat, "compute_input", + batch_statistics->mutable_compute_input()); GOTO_IF_ERR(err, earlyexit); - - err = compute_output_json.MemberAsUInt("count", &ucnt); + err = SetStatisticsDuration( + batch_stat, "compute_infer", + batch_statistics->mutable_compute_infer()); GOTO_IF_ERR(err, earlyexit); - batch_statistics->mutable_compute_output()->set_count(ucnt); - err = compute_output_json.MemberAsUInt("ns", &ucnt); + err = SetStatisticsDuration( + batch_stat, "compute_output", + batch_statistics->mutable_compute_output()); GOTO_IF_ERR(err, earlyexit); - batch_statistics->mutable_compute_output()->set_ns(ucnt); } } - triton::common::TritonJson::Value memory_usage_json; - err = model_stat.MemberAsArray("memory_usage", &memory_usage_json); - GOTO_IF_ERR(err, earlyexit); - - for (size_t idx = 0; idx < memory_usage_json.ArraySize(); ++idx) { - triton::common::TritonJson::Value usage; - err = memory_usage_json.IndexAsObject(idx, &usage); + { + triton::common::TritonJson::Value memory_usage_json; + err = model_stat.MemberAsArray("memory_usage", &memory_usage_json); GOTO_IF_ERR(err, earlyexit); - auto memory_usage = statistics->add_memory_usage(); - { - const char* type; - size_t type_len; - err = usage.MemberAsString("type", &type, &type_len); - GOTO_IF_ERR(err, earlyexit); - memory_usage->set_type(std::string(type, type_len)); - } - { - int64_t id; - err = usage.MemberAsInt("id", &id); - GOTO_IF_ERR(err, earlyexit); - memory_usage->set_id(id); - } - { - uint64_t byte_size; - err = usage.MemberAsUInt("byte_size", &byte_size); + for (size_t idx = 0; idx < memory_usage_json.ArraySize(); ++idx) { + triton::common::TritonJson::Value usage; + err = memory_usage_json.IndexAsObject(idx, &usage); GOTO_IF_ERR(err, earlyexit); - memory_usage->set_byte_size(byte_size); + + auto memory_usage = statistics->add_memory_usage(); + { + const char* type; + size_t type_len; + err = usage.MemberAsString("type", &type, &type_len); + GOTO_IF_ERR(err, earlyexit); + memory_usage->set_type(std::string(type, type_len)); + } + { + int64_t id; + err = usage.MemberAsInt("id", &id); + GOTO_IF_ERR(err, earlyexit); + memory_usage->set_id(id); + } + { + uint64_t byte_size; + err = usage.MemberAsUInt("byte_size", &byte_size); + GOTO_IF_ERR(err, earlyexit); + memory_usage->set_byte_size(byte_size); + } } } } @@ -1303,6 +1140,26 @@ CommonHandler::RegisterModelStatistics() false /* async */, cq_, restricted_kv, response_delay_); } +template +TRITONSERVER_Error* +CommonHandler::SetStatisticsDuration( + triton::common::TritonJson::Value& statistics_json, + const std::string& statistics_name, + PBTYPE* mutable_statistics_duration_protobuf) const +{ + triton::common::TritonJson::Value statistics_duration_json; + RETURN_IF_ERR(statistics_json.MemberAsObject( + statistics_name.c_str(), &statistics_duration_json)); + + uint64_t value; + RETURN_IF_ERR(statistics_duration_json.MemberAsUInt("count", &value)); + mutable_statistics_duration_protobuf->set_count(value); + RETURN_IF_ERR(statistics_duration_json.MemberAsUInt("ns", &value)); + mutable_statistics_duration_protobuf->set_ns(value); + + return nullptr; +} + void CommonHandler::RegisterTrace() {