diff --git a/docs/protocol/extension_statistics.md b/docs/protocol/extension_statistics.md
index 4da4140657..4ff956b60a 100644
--- a/docs/protocol/extension_statistics.md
+++ b/docs/protocol/extension_statistics.md
@@ -195,7 +195,8 @@ $response_stats =
   "compute_output" : $duration_stat,
   "success" : $duration_stat,
   "fail" : $duration_stat,
-  "empty_response" : $duration_stat
+  "empty_response" : $duration_stat,
+  "cancel" : $duration_stat
 }
 ```
 
@@ -208,6 +209,8 @@ $response_stats =
   is the sum of infer and output durations.
 - "empty_response" : The count and cumulative duration of an inference with an
   empty / no response. The duration is infer durations.
+- "cancel" : The count and cumulative duration of a inference cancellation. The
+  duration is for cleaning up resources held by cancelled inference requests.
 
 
 ```
diff --git a/qa/L0_response_statistics/response_statistics_test.py b/qa/L0_response_statistics/response_statistics_test.py
index b04403bfb3..64f2d4fb68 100755
--- a/qa/L0_response_statistics/response_statistics_test.py
+++ b/qa/L0_response_statistics/response_statistics_test.py
@@ -36,11 +36,12 @@
 
 class TestResponseStatistics(unittest.TestCase):
     def setUp(self):
-        self._model_name = "square_int32"
-        self._min_infer_delay_ns = 400000000
-        self._min_output_delay_ns = 200000000
-        self._number_of_fail_responses = 2
-        self._number_of_empty_responses = 1
+        self._model_name = "set_by_test_case"
+        self._min_infer_delay_ns = 0
+        self._min_output_delay_ns = 0
+        self._min_cancel_delay_ns = 0
+        self._number_of_fail_responses = 0
+        self._number_of_empty_responses = 0
         self._statistics_counts = []
         self._grpc_client = grpcclient.InferenceServerClient(
             "localhost:8001", verbose=True
@@ -59,8 +60,10 @@ def callback(result, error):
 
     # Send an infer request and return its responses. 'number_of_responses' is the sum
     # of success, fail and empty responses the model should return for this request.
-    # This function waits until all success and fail responses are received.
-    def _stream_infer(self, number_of_responses):
+    # 'cancel_at_response_size' will cancel the stream when the number of responses
+    # received equals the size, set to None if cancellation is not required. This
+    # function waits until all success and fail responses are received, or cancelled.
+    def _stream_infer(self, number_of_responses, cancel_at_response_size=None):
         callback, responses = self._generate_streaming_callback_and_response_pair()
         self._grpc_client.start_stream(callback)
         input_data = np.array([number_of_responses], dtype=np.int32)
@@ -70,15 +73,27 @@ def _stream_infer(self, number_of_responses):
         self._grpc_client.async_stream_infer(
             model_name=self._model_name, inputs=inputs, outputs=outputs
         )
-        while len(responses) < (number_of_responses - self._number_of_empty_responses):
-            time.sleep(0.1)  # poll until all expected responses are received
-        self._grpc_client.stop_stream()
+        if cancel_at_response_size is None:
+            # poll until all expected responses are received
+            while len(responses) < (
+                number_of_responses - self._number_of_empty_responses
+            ):
+                time.sleep(0.1)
+            self._grpc_client.stop_stream(cancel_requests=False)
+        else:
+            # poll until cancellation response size is reached
+            while len(responses) < cancel_at_response_size:
+                time.sleep(0.1)
+            self._grpc_client.stop_stream(cancel_requests=True)
         return responses
 
     # Update expected statistics counts for the response at 'current_index'.
     # 'number_of_responses' is the sum of success, fail and empty responses expected
-    # from this inference request.
-    def _update_statistics_counts(self, current_index, number_of_responses):
+    # from this inference request. 'cancel_at_index' is the index at which the request
+    # should be cancelled.
+    def _update_statistics_counts(
+        self, current_index, number_of_responses, cancel_at_index
+    ):
         if current_index >= len(self._statistics_counts):
             self._statistics_counts.append(
                 {
@@ -87,9 +102,13 @@ def _update_statistics_counts(self, current_index, number_of_responses):
                     "success": 0,
                     "fail": 0,
                     "empty_response": 0,
+                    "cancel": 0,
                 }
             )
-        if (
+        if current_index == cancel_at_index:
+            # cancel
+            self._statistics_counts[current_index]["cancel"] += 1
+        elif (
             current_index
             + self._number_of_fail_responses
             + self._number_of_empty_responses
@@ -118,10 +137,16 @@ def _check_statistics_count_and_duration(
             delay_ns = self._min_infer_delay_ns
         elif stats_name == "compute_output":
             delay_ns = self._min_output_delay_ns
+        elif stats_name == "cancel":
+            delay_ns = self._min_cancel_delay_ns
         else:  # success or fail
             delay_ns = self._min_infer_delay_ns + self._min_output_delay_ns
-        upper_bound_ns = 1.1 * delay_ns * expected_count
-        lower_bound_ns = 0.9 * delay_ns * expected_count
+        if delay_ns == 0:
+            upper_bound_ns = 10000000 * expected_count
+            lower_bound_ns = 0
+        else:
+            upper_bound_ns = 1.1 * delay_ns * expected_count
+            lower_bound_ns = 0.9 * delay_ns * expected_count
         stats = response_stats[str(current_index)][stats_name]
         self.assertEqual(stats["count"], expected_count)
         self.assertLessEqual(stats["ns"], upper_bound_ns)
@@ -162,12 +187,14 @@ def _get_response_statistics(self):
         return response_stats_http
 
     # Check the response statistics is valid for a given infer request, providing its
-    # 'responses' and 'number_of_responses'.
-    def _check_response_stats(self, responses, number_of_responses):
+    # 'responses', expected 'number_of_responses' and 'cancel_at_index'.
+    def _check_response_stats(
+        self, responses, number_of_responses, cancel_at_index=None
+    ):
         response_stats = self._get_response_statistics()
         self.assertGreaterEqual(len(response_stats), number_of_responses)
         for i in range(number_of_responses):
-            self._update_statistics_counts(i, number_of_responses)
+            self._update_statistics_counts(i, number_of_responses, cancel_at_index)
             self._check_statistics_count_and_duration(
                 response_stats, i, "compute_infer"
             )
@@ -179,24 +206,57 @@ def _check_response_stats(self, responses, number_of_responses):
             self._check_statistics_count_and_duration(
                 response_stats, i, "empty_response"
             )
+            self._check_statistics_count_and_duration(response_stats, i, "cancel")
 
     # Test response statistics. The statistics must be valid over two or more infers.
     def test_response_statistics(self):
+        self._model_name = "square_int32"
+        self._min_infer_delay_ns = 400000000
+        self._min_output_delay_ns = 200000000
+        self._number_of_fail_responses = 2
+        self._number_of_empty_responses = 1
         # Send a request that generates 4 responses.
         number_of_responses = 4
         responses = self._stream_infer(number_of_responses)
         self._check_response_stats(responses, number_of_responses)
-        # Send a request that generates 6 responses, and make sure the
-        # statistics are aggregated with the previous request.
+        # Send a request that generates 6 responses, and make sure the statistics are
+        # aggregated with the previous request.
         number_of_responses = 6
         responses = self._stream_infer(number_of_responses)
         self._check_response_stats(responses, number_of_responses)
-        # Send a request that generates 3 responses, and make sure the
-        # statistics are aggregated with the previous requests.
+        # Send a request that generates 3 responses, and make sure the statistics are
+        # aggregated with the previous requests.
         number_of_responses = 3
         responses = self._stream_infer(number_of_responses)
         self._check_response_stats(responses, number_of_responses)
 
+    # Test response statistics with cancellation.
+    def test_response_statistics_cancel(self):
+        self._model_name = "square_int32_slow"
+        self._min_infer_delay_ns = 1200000000
+        self._min_output_delay_ns = 800000000
+        self._min_cancel_delay_ns = 400000000
+
+        # Send a request that generates 4 responses.
+        number_of_responses = 4
+        responses = self._stream_infer(number_of_responses)
+        self._check_response_stats(responses, number_of_responses)
+
+        # Send a request that generates 4 responses, and cancel on the 3rd response.
+        # Make sure the statistics are aggregated with the previous request.
+        responses = self._stream_infer(number_of_responses=4, cancel_at_response_size=1)
+        # There is an infer and output delay on the 1st and 2nd response, and a cancel
+        # delay on the 3rd response.
+        min_total_delay_ns = (
+            self._min_infer_delay_ns + self._min_output_delay_ns
+        ) * 2 + self._min_cancel_delay_ns
+        # Make sure the inference and cancellation is completed before checking.
+        time.sleep(min_total_delay_ns * 1.5 / 1000000000)
+        # The request is cancelled when the 2nd response is computing, so the
+        # cancellation should be received at the 3rd response (index 2), making a total
+        # of 3 responses on the statistics.
+        self._check_response_stats(responses, number_of_responses=3, cancel_at_index=2)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/qa/L0_response_statistics/test.sh b/qa/L0_response_statistics/test.sh
index eae900a9e9..b91e3bbde1 100755
--- a/qa/L0_response_statistics/test.sh
+++ b/qa/L0_response_statistics/test.sh
@@ -56,6 +56,15 @@ mkdir -p models/square_int32/1 && (cd models/square_int32 && \
     echo -e 'parameters [{ key: "CUSTOM_OUTPUT_DELAY_NS" \n value: { string_value: "200000000" } }]' >> config.pbtxt && \
     echo -e 'parameters [{ key: "CUSTOM_FAIL_COUNT" \n value: { string_value: "2" } }]' >> config.pbtxt && \
     echo -e 'parameters [{ key: "CUSTOM_EMPTY_COUNT" \n value: { string_value: "1" } }]' >> config.pbtxt)
+mkdir -p models/square_int32_slow/1 && (cd models/square_int32_slow && \
+    echo 'backend: "square"' >> config.pbtxt && \
+    echo 'max_batch_size: 0' >> config.pbtxt && \
+    echo 'model_transaction_policy { decoupled: True }' >> config.pbtxt && \
+    echo -e 'input [{ name: "IN" \n data_type: TYPE_INT32 \n dims: [ 1 ] }]' >> config.pbtxt && \
+    echo -e 'output [{ name: "OUT" \n data_type: TYPE_INT32 \n dims: [ 1 ] }]' >> config.pbtxt && \
+    echo -e 'parameters [{ key: "CUSTOM_INFER_DELAY_NS" \n value: { string_value: "1200000000" } }]' >> config.pbtxt && \
+    echo -e 'parameters [{ key: "CUSTOM_OUTPUT_DELAY_NS" \n value: { string_value: "800000000" } }]' >> config.pbtxt && \
+    echo -e 'parameters [{ key: "CUSTOM_CANCEL_DELAY_NS" \n value: { string_value: "400000000" } }]' >> config.pbtxt)
 
 TEST_LOG="response_statistics_test.log"
 SERVER_LOG="./response_statistics_test.server.log"
diff --git a/src/grpc/grpc_server.cc b/src/grpc/grpc_server.cc
index 187272217d..63e2854208 100644
--- a/src/grpc/grpc_server.cc
+++ b/src/grpc/grpc_server.cc
@@ -300,6 +300,13 @@ class CommonHandler : public HandlerBase {
   void RegisterRepositoryModelLoad();
   void RegisterRepositoryModelUnload();
 
+  // Set count and cumulative duration for 'RegisterModelStatistics()'
+  template <typename PBTYPE>
+  TRITONSERVER_Error* SetStatisticsDuration(
+      triton::common::TritonJson::Value& statistics_json,
+      const std::string& statistics_name,
+      PBTYPE* mutable_statistics_duration_protobuf) const;
+
   const std::string name_;
   std::shared_ptr<TRITONSERVER_Server> tritonserver_;
 
@@ -968,136 +975,43 @@ CommonHandler::RegisterModelStatistics()
         GOTO_IF_ERR(err, earlyexit);
         statistics->set_execution_count(ucnt);
 
-        triton::common::TritonJson::Value infer_stats_json;
-        err = model_stat.MemberAsObject("inference_stats", &infer_stats_json);
-        GOTO_IF_ERR(err, earlyexit);
-
-        {
-          triton::common::TritonJson::Value success_json;
-          err = infer_stats_json.MemberAsObject("success", &success_json);
-          GOTO_IF_ERR(err, earlyexit);
-
-          err = success_json.MemberAsUInt("count", &ucnt);
-          GOTO_IF_ERR(err, earlyexit);
-          statistics->mutable_inference_stats()->mutable_success()->set_count(
-              ucnt);
-          err = success_json.MemberAsUInt("ns", &ucnt);
-          GOTO_IF_ERR(err, earlyexit);
-          statistics->mutable_inference_stats()->mutable_success()->set_ns(
-              ucnt);
-        }
-
-        {
-          triton::common::TritonJson::Value fail_json;
-          err = infer_stats_json.MemberAsObject("fail", &fail_json);
-          GOTO_IF_ERR(err, earlyexit);
-
-          err = fail_json.MemberAsUInt("count", &ucnt);
-          GOTO_IF_ERR(err, earlyexit);
-          statistics->mutable_inference_stats()->mutable_fail()->set_count(
-              ucnt);
-          err = fail_json.MemberAsUInt("ns", &ucnt);
-          GOTO_IF_ERR(err, earlyexit);
-          statistics->mutable_inference_stats()->mutable_fail()->set_ns(ucnt);
-        }
-
-        {
-          triton::common::TritonJson::Value queue_json;
-          err = infer_stats_json.MemberAsObject("queue", &queue_json);
-          GOTO_IF_ERR(err, earlyexit);
-
-          err = queue_json.MemberAsUInt("count", &ucnt);
-          GOTO_IF_ERR(err, earlyexit);
-          statistics->mutable_inference_stats()->mutable_queue()->set_count(
-              ucnt);
-          err = queue_json.MemberAsUInt("ns", &ucnt);
-          GOTO_IF_ERR(err, earlyexit);
-          statistics->mutable_inference_stats()->mutable_queue()->set_ns(ucnt);
-        }
-
         {
-          triton::common::TritonJson::Value compute_input_json;
-          err = infer_stats_json.MemberAsObject(
-              "compute_input", &compute_input_json);
+          triton::common::TritonJson::Value infer_stats_json;
+          err = model_stat.MemberAsObject("inference_stats", &infer_stats_json);
           GOTO_IF_ERR(err, earlyexit);
 
-          err = compute_input_json.MemberAsUInt("count", &ucnt);
+          err = SetStatisticsDuration(
+              infer_stats_json, "success",
+              statistics->mutable_inference_stats()->mutable_success());
           GOTO_IF_ERR(err, earlyexit);
-          statistics->mutable_inference_stats()
-              ->mutable_compute_input()
-              ->set_count(ucnt);
-          err = compute_input_json.MemberAsUInt("ns", &ucnt);
+          err = SetStatisticsDuration(
+              infer_stats_json, "fail",
+              statistics->mutable_inference_stats()->mutable_fail());
           GOTO_IF_ERR(err, earlyexit);
-          statistics->mutable_inference_stats()
-              ->mutable_compute_input()
-              ->set_ns(ucnt);
-        }
-
-        {
-          triton::common::TritonJson::Value compute_infer_json;
-          err = infer_stats_json.MemberAsObject(
-              "compute_infer", &compute_infer_json);
+          err = SetStatisticsDuration(
+              infer_stats_json, "queue",
+              statistics->mutable_inference_stats()->mutable_queue());
           GOTO_IF_ERR(err, earlyexit);
-
-          err = compute_infer_json.MemberAsUInt("count", &ucnt);
+          err = SetStatisticsDuration(
+              infer_stats_json, "compute_input",
+              statistics->mutable_inference_stats()->mutable_compute_input());
           GOTO_IF_ERR(err, earlyexit);
-          statistics->mutable_inference_stats()
-              ->mutable_compute_infer()
-              ->set_count(ucnt);
-          err = compute_infer_json.MemberAsUInt("ns", &ucnt);
+          err = SetStatisticsDuration(
+              infer_stats_json, "compute_infer",
+              statistics->mutable_inference_stats()->mutable_compute_infer());
           GOTO_IF_ERR(err, earlyexit);
-          statistics->mutable_inference_stats()
-              ->mutable_compute_infer()
-              ->set_ns(ucnt);
-        }
-
-        {
-          triton::common::TritonJson::Value compute_output_json;
-          err = infer_stats_json.MemberAsObject(
-              "compute_output", &compute_output_json);
+          err = SetStatisticsDuration(
+              infer_stats_json, "compute_output",
+              statistics->mutable_inference_stats()->mutable_compute_output());
           GOTO_IF_ERR(err, earlyexit);
-
-          err = compute_output_json.MemberAsUInt("count", &ucnt);
+          err = SetStatisticsDuration(
+              infer_stats_json, "cache_hit",
+              statistics->mutable_inference_stats()->mutable_cache_hit());
           GOTO_IF_ERR(err, earlyexit);
-          statistics->mutable_inference_stats()
-              ->mutable_compute_output()
-              ->set_count(ucnt);
-          err = compute_output_json.MemberAsUInt("ns", &ucnt);
+          err = SetStatisticsDuration(
+              infer_stats_json, "cache_miss",
+              statistics->mutable_inference_stats()->mutable_cache_miss());
           GOTO_IF_ERR(err, earlyexit);
-          statistics->mutable_inference_stats()
-              ->mutable_compute_output()
-              ->set_ns(ucnt);
-        }
-
-        {
-          triton::common::TritonJson::Value cache_hit_json;
-          err = infer_stats_json.MemberAsObject("cache_hit", &cache_hit_json);
-          GOTO_IF_ERR(err, earlyexit);
-
-          err = cache_hit_json.MemberAsUInt("count", &ucnt);
-          GOTO_IF_ERR(err, earlyexit);
-          statistics->mutable_inference_stats()->mutable_cache_hit()->set_count(
-              ucnt);
-          err = cache_hit_json.MemberAsUInt("ns", &ucnt);
-          GOTO_IF_ERR(err, earlyexit);
-          statistics->mutable_inference_stats()->mutable_cache_hit()->set_ns(
-              ucnt);
-        }
-
-        {
-          triton::common::TritonJson::Value cache_miss_json;
-          err = infer_stats_json.MemberAsObject("cache_miss", &cache_miss_json);
-          GOTO_IF_ERR(err, earlyexit);
-
-          err = cache_miss_json.MemberAsUInt("count", &ucnt);
-          GOTO_IF_ERR(err, earlyexit);
-          statistics->mutable_inference_stats()
-              ->mutable_cache_miss()
-              ->set_count(ucnt);
-          err = cache_miss_json.MemberAsUInt("ns", &ucnt);
-          GOTO_IF_ERR(err, earlyexit);
-          statistics->mutable_inference_stats()->mutable_cache_miss()->set_ns(
-              ucnt);
         }
 
         {
@@ -1116,167 +1030,90 @@ CommonHandler::RegisterModelStatistics()
 
             inference::InferResponseStatistics res;
 
-            {
-              triton::common::TritonJson::Value stat_json;
-              err = res_json.MemberAsObject("compute_infer", &stat_json);
-              GOTO_IF_ERR(err, earlyexit);
-
-              uint64_t val;
-              err = stat_json.MemberAsUInt("count", &val);
-              GOTO_IF_ERR(err, earlyexit);
-              res.mutable_compute_infer()->set_count(val);
-              err = stat_json.MemberAsUInt("ns", &val);
-              GOTO_IF_ERR(err, earlyexit);
-              res.mutable_compute_infer()->set_ns(val);
-            }
-
-            {
-              triton::common::TritonJson::Value stat_json;
-              err = res_json.MemberAsObject("compute_output", &stat_json);
-              GOTO_IF_ERR(err, earlyexit);
-
-              uint64_t val;
-              err = stat_json.MemberAsUInt("count", &val);
-              GOTO_IF_ERR(err, earlyexit);
-              res.mutable_compute_output()->set_count(val);
-              err = stat_json.MemberAsUInt("ns", &val);
-              GOTO_IF_ERR(err, earlyexit);
-              res.mutable_compute_output()->set_ns(val);
-            }
-
-            {
-              triton::common::TritonJson::Value stat_json;
-              err = res_json.MemberAsObject("success", &stat_json);
-              GOTO_IF_ERR(err, earlyexit);
-
-              uint64_t val;
-              err = stat_json.MemberAsUInt("count", &val);
-              GOTO_IF_ERR(err, earlyexit);
-              res.mutable_success()->set_count(val);
-              err = stat_json.MemberAsUInt("ns", &val);
-              GOTO_IF_ERR(err, earlyexit);
-              res.mutable_success()->set_ns(val);
-            }
-
-            {
-              triton::common::TritonJson::Value stat_json;
-              err = res_json.MemberAsObject("fail", &stat_json);
-              GOTO_IF_ERR(err, earlyexit);
-
-              uint64_t val;
-              err = stat_json.MemberAsUInt("count", &val);
-              GOTO_IF_ERR(err, earlyexit);
-              res.mutable_fail()->set_count(val);
-              err = stat_json.MemberAsUInt("ns", &val);
-              GOTO_IF_ERR(err, earlyexit);
-              res.mutable_fail()->set_ns(val);
-            }
-
-            {
-              triton::common::TritonJson::Value stat_json;
-              err = res_json.MemberAsObject("empty_response", &stat_json);
-              GOTO_IF_ERR(err, earlyexit);
-
-              uint64_t val;
-              err = stat_json.MemberAsUInt("count", &val);
-              GOTO_IF_ERR(err, earlyexit);
-              res.mutable_empty_response()->set_count(val);
-              err = stat_json.MemberAsUInt("ns", &val);
-              GOTO_IF_ERR(err, earlyexit);
-              res.mutable_empty_response()->set_ns(val);
-            }
+            err = SetStatisticsDuration(
+                res_json, "compute_infer", res.mutable_compute_infer());
+            GOTO_IF_ERR(err, earlyexit);
+            err = SetStatisticsDuration(
+                res_json, "compute_output", res.mutable_compute_output());
+            GOTO_IF_ERR(err, earlyexit);
+            err = SetStatisticsDuration(
+                res_json, "success", res.mutable_success());
+            GOTO_IF_ERR(err, earlyexit);
+            err = SetStatisticsDuration(res_json, "fail", res.mutable_fail());
+            GOTO_IF_ERR(err, earlyexit);
+            err = SetStatisticsDuration(
+                res_json, "empty_response", res.mutable_empty_response());
+            GOTO_IF_ERR(err, earlyexit);
+            err =
+                SetStatisticsDuration(res_json, "cancel", res.mutable_cancel());
+            GOTO_IF_ERR(err, earlyexit);
 
             (*statistics->mutable_response_stats())[key] = std::move(res);
           }
         }
 
-        triton::common::TritonJson::Value batches_json;
-        err = model_stat.MemberAsArray("batch_stats", &batches_json);
-        GOTO_IF_ERR(err, earlyexit);
-
-        for (size_t idx = 0; idx < batches_json.ArraySize(); ++idx) {
-          triton::common::TritonJson::Value batch_stat;
-          err = batches_json.IndexAsObject(idx, &batch_stat);
-          GOTO_IF_ERR(err, earlyexit);
-
-          auto batch_statistics = statistics->add_batch_stats();
-
-          uint64_t ucnt;
-          err = batch_stat.MemberAsUInt("batch_size", &ucnt);
+        {
+          triton::common::TritonJson::Value batches_json;
+          err = model_stat.MemberAsArray("batch_stats", &batches_json);
           GOTO_IF_ERR(err, earlyexit);
-          batch_statistics->set_batch_size(ucnt);
 
-          {
-            triton::common::TritonJson::Value compute_input_json;
-            err =
-                batch_stat.MemberAsObject("compute_input", &compute_input_json);
+          for (size_t idx = 0; idx < batches_json.ArraySize(); ++idx) {
+            triton::common::TritonJson::Value batch_stat;
+            err = batches_json.IndexAsObject(idx, &batch_stat);
             GOTO_IF_ERR(err, earlyexit);
 
-            err = compute_input_json.MemberAsUInt("count", &ucnt);
-            GOTO_IF_ERR(err, earlyexit);
-            batch_statistics->mutable_compute_input()->set_count(ucnt);
-            err = compute_input_json.MemberAsUInt("ns", &ucnt);
-            GOTO_IF_ERR(err, earlyexit);
-            batch_statistics->mutable_compute_input()->set_ns(ucnt);
-          }
-
-          {
-            triton::common::TritonJson::Value compute_infer_json;
-            err =
-                batch_stat.MemberAsObject("compute_infer", &compute_infer_json);
-            GOTO_IF_ERR(err, earlyexit);
+            auto batch_statistics = statistics->add_batch_stats();
 
-            err = compute_infer_json.MemberAsUInt("count", &ucnt);
-            GOTO_IF_ERR(err, earlyexit);
-            batch_statistics->mutable_compute_infer()->set_count(ucnt);
-            err = compute_infer_json.MemberAsUInt("ns", &ucnt);
+            uint64_t ucnt;
+            err = batch_stat.MemberAsUInt("batch_size", &ucnt);
             GOTO_IF_ERR(err, earlyexit);
-            batch_statistics->mutable_compute_infer()->set_ns(ucnt);
-          }
+            batch_statistics->set_batch_size(ucnt);
 
-          {
-            triton::common::TritonJson::Value compute_output_json;
-            err = batch_stat.MemberAsObject(
-                "compute_output", &compute_output_json);
+            err = SetStatisticsDuration(
+                batch_stat, "compute_input",
+                batch_statistics->mutable_compute_input());
             GOTO_IF_ERR(err, earlyexit);
-
-            err = compute_output_json.MemberAsUInt("count", &ucnt);
+            err = SetStatisticsDuration(
+                batch_stat, "compute_infer",
+                batch_statistics->mutable_compute_infer());
             GOTO_IF_ERR(err, earlyexit);
-            batch_statistics->mutable_compute_output()->set_count(ucnt);
-            err = compute_output_json.MemberAsUInt("ns", &ucnt);
+            err = SetStatisticsDuration(
+                batch_stat, "compute_output",
+                batch_statistics->mutable_compute_output());
             GOTO_IF_ERR(err, earlyexit);
-            batch_statistics->mutable_compute_output()->set_ns(ucnt);
           }
         }
 
-        triton::common::TritonJson::Value memory_usage_json;
-        err = model_stat.MemberAsArray("memory_usage", &memory_usage_json);
-        GOTO_IF_ERR(err, earlyexit);
-
-        for (size_t idx = 0; idx < memory_usage_json.ArraySize(); ++idx) {
-          triton::common::TritonJson::Value usage;
-          err = memory_usage_json.IndexAsObject(idx, &usage);
+        {
+          triton::common::TritonJson::Value memory_usage_json;
+          err = model_stat.MemberAsArray("memory_usage", &memory_usage_json);
           GOTO_IF_ERR(err, earlyexit);
 
-          auto memory_usage = statistics->add_memory_usage();
-          {
-            const char* type;
-            size_t type_len;
-            err = usage.MemberAsString("type", &type, &type_len);
-            GOTO_IF_ERR(err, earlyexit);
-            memory_usage->set_type(std::string(type, type_len));
-          }
-          {
-            int64_t id;
-            err = usage.MemberAsInt("id", &id);
-            GOTO_IF_ERR(err, earlyexit);
-            memory_usage->set_id(id);
-          }
-          {
-            uint64_t byte_size;
-            err = usage.MemberAsUInt("byte_size", &byte_size);
+          for (size_t idx = 0; idx < memory_usage_json.ArraySize(); ++idx) {
+            triton::common::TritonJson::Value usage;
+            err = memory_usage_json.IndexAsObject(idx, &usage);
             GOTO_IF_ERR(err, earlyexit);
-            memory_usage->set_byte_size(byte_size);
+
+            auto memory_usage = statistics->add_memory_usage();
+            {
+              const char* type;
+              size_t type_len;
+              err = usage.MemberAsString("type", &type, &type_len);
+              GOTO_IF_ERR(err, earlyexit);
+              memory_usage->set_type(std::string(type, type_len));
+            }
+            {
+              int64_t id;
+              err = usage.MemberAsInt("id", &id);
+              GOTO_IF_ERR(err, earlyexit);
+              memory_usage->set_id(id);
+            }
+            {
+              uint64_t byte_size;
+              err = usage.MemberAsUInt("byte_size", &byte_size);
+              GOTO_IF_ERR(err, earlyexit);
+              memory_usage->set_byte_size(byte_size);
+            }
           }
         }
       }
@@ -1303,6 +1140,26 @@ CommonHandler::RegisterModelStatistics()
       false /* async */, cq_, restricted_kv, response_delay_);
 }
 
+template <typename PBTYPE>
+TRITONSERVER_Error*
+CommonHandler::SetStatisticsDuration(
+    triton::common::TritonJson::Value& statistics_json,
+    const std::string& statistics_name,
+    PBTYPE* mutable_statistics_duration_protobuf) const
+{
+  triton::common::TritonJson::Value statistics_duration_json;
+  RETURN_IF_ERR(statistics_json.MemberAsObject(
+      statistics_name.c_str(), &statistics_duration_json));
+
+  uint64_t value;
+  RETURN_IF_ERR(statistics_duration_json.MemberAsUInt("count", &value));
+  mutable_statistics_duration_protobuf->set_count(value);
+  RETURN_IF_ERR(statistics_duration_json.MemberAsUInt("ns", &value));
+  mutable_statistics_duration_protobuf->set_ns(value);
+
+  return nullptr;
+}
+
 void
 CommonHandler::RegisterTrace()
 {