diff --git a/include/triton/core/tritonbackend.h b/include/triton/core/tritonbackend.h
index ef3af71b5..ad04d57f8 100644
--- a/include/triton/core/tritonbackend.h
+++ b/include/triton/core/tritonbackend.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -65,6 +65,7 @@ struct TRITONBACKEND_Response;
 struct TRITONBACKEND_Backend;
 struct TRITONBACKEND_Model;
 struct TRITONBACKEND_ModelInstance;
+struct TRITONBACKEND_ModelInstanceResponseStatistics;
 struct TRITONBACKEND_BackendAttribute;
 struct TRITONBACKEND_Batcher;
 
@@ -94,7 +95,7 @@ struct TRITONBACKEND_Batcher;
 ///   }
 ///
 #define TRITONBACKEND_API_VERSION_MAJOR 1
-#define TRITONBACKEND_API_VERSION_MINOR 18
+#define TRITONBACKEND_API_VERSION_MINOR 19
 
 /// Get the TRITONBACKEND API version supported by Triton. This value
 /// can be compared against the TRITONBACKEND_API_VERSION_MAJOR and
@@ -761,8 +762,9 @@ TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ResponseOutput(
 /// \param send_flags Flags associated with the response. \see
 /// TRITONSERVER_ResponseCompleteFlag. \see
 /// TRITONSERVER_InferenceResponseCompleteFn_t.
-/// \param error The TRITONSERVER_Error to send if the response is an
-/// error, or nullptr if the response is successful.
+/// \param error The TRITONSERVER_Error to send if the response is an error, or
+/// nullptr if the response is successful. The caller retains ownership to the
+/// error object and must free it with TRITONSERVER_ErrorDelete.
 /// \return a TRITONSERVER_Error indicating success or failure.
 TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ResponseSend(
     TRITONBACKEND_Response* response, const uint32_t send_flags,
@@ -1319,16 +1321,16 @@ TRITONBACKEND_ModelInstanceReportMemoryUsage(
 /// TRITONBACKEND_ModelInstanceExecute.
 ///
 ///   TRITONBACKEND_ModelInstanceExecute()
-///     CAPTURE TIMESPACE (exec_start_ns)
+///     CAPTURE TIMESTAMP (exec_start_ns)
 ///     < process input tensors to prepare them for inference
 ///       execution, including copying the tensors to/from GPU if
 ///       necessary>
-///     CAPTURE TIMESPACE (compute_start_ns)
+///     CAPTURE TIMESTAMP (compute_start_ns)
 ///     < perform inference computations to produce outputs >
-///     CAPTURE TIMESPACE (compute_end_ns)
+///     CAPTURE TIMESTAMP (compute_end_ns)
 ///     < allocate output buffers and extract output tensors, including
 ///       copying the tensors to/from GPU if necessary>
-///     CAPTURE TIMESPACE (exec_end_ns)
+///     CAPTURE TIMESTAMP (exec_end_ns)
 ///     return
 ///
 /// Note that these statistics are associated with a valid
@@ -1356,6 +1358,156 @@ TRITONBACKEND_ModelInstanceReportStatistics(
     const uint64_t compute_start_ns, const uint64_t compute_end_ns,
     const uint64_t exec_end_ns);
 
+/// Create a new inference response statistics object.
+///
+/// \param response_statistics The new response statistics object to be created.
+/// \return a TRITONSERVER_Error indicating success or failure.
+TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceResponseStatisticsNew(
+    TRITONBACKEND_ModelInstanceResponseStatistics** response_statistics);
+
+/// Delete an inference response statistics object.
+///
+/// The caller retains ownership to the objects set on the deleted response
+/// statistics object and must free them separately.
+///
+/// \param response_statistics The response statistics object to be deleted.
+/// \return a TRITONSERVER_Error indicating success or failure.
+TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceResponseStatisticsDelete(
+    TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics);
+
+/// Set model instance to an inference response statistics object.
+///
+/// \param response_statistics The response statistics object.
+/// \param model_instance The model instance.
+/// \return a TRITONSERVER_Error indicating success or failure.
+TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceResponseStatisticsSetModelInstance(
+    TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics,
+    TRITONBACKEND_ModelInstance* model_instance);
+
+/// Set response factory to an inference response statistics object.
+///
+/// \param response_statistics The response statistics object.
+/// \param response_factory The response factory.
+/// \return a TRITONSERVER_Error indicating success or failure.
+TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceResponseStatisticsSetResponseFactory(
+    TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics,
+    TRITONBACKEND_ResponseFactory* response_factory);
+
+/// Set response start time to an inference response statistics object.
+///
+/// All timestamps should be reported in nanonseconds and collected using
+/// std::chrono::steady_clock::now().time_since_epoch() or the equivalent.
+///
+/// For consistency of measurement across different backends, the timestamps
+/// should be collected at the following points during
+/// TRITONBACKEND_ModelInstanceExecute.
+///
+///   TRITONBACKEND_ModelInstanceExecute()
+///     < start of this response >
+///     CAPTURE TIMESTAMP (response_start)
+///     < generate this response >
+///     CAPTURE TIMESTAMP (compute_output_start)
+///     < allocate output buffers and extract output tensors, including copying
+///       the tensors to/from GPU if necessary >
+///     CAPTURE TIMESTAMP (response_end)
+///     < end of this response >
+///     return
+///
+/// \param response_statistics The response statistics object.
+/// \param response_start The response start time.
+/// \return a TRITONSERVER_Error indicating success or failure.
+TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceResponseStatisticsSetResponseStart(
+    TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics,
+    uint64_t response_start);
+
+/// Set compute output start time to an inference response statistics object.
+///
+/// Do NOT set this compute output start time (or set it to 0), if reporting an
+/// empty response.
+///
+/// All timestamps should be reported in nanonseconds and collected using
+/// std::chrono::steady_clock::now().time_since_epoch() or the equivalent.
+///
+/// For consistency of measurement across different backends, the timestamps
+/// should be collected at the following points during
+/// TRITONBACKEND_ModelInstanceExecute.
+///
+///   TRITONBACKEND_ModelInstanceExecute()
+///     < start of this response >
+///     CAPTURE TIMESTAMP (response_start)
+///     < generate this response >
+///     CAPTURE TIMESTAMP (compute_output_start)
+///     < allocate output buffers and extract output tensors, including copying
+///       the tensors to/from GPU if necessary >
+///     CAPTURE TIMESTAMP (response_end)
+///     < end of this response >
+///     return
+///
+/// \param response_statistics The response statistics object.
+/// \param compute_output_start The compute output start time.
+/// \return a TRITONSERVER_Error indicating success or failure.
+TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceResponseStatisticsSetComputeOutputStart(
+    TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics,
+    uint64_t compute_output_start);
+
+/// Set response end time to an inference response statistics object.
+///
+/// All timestamps should be reported in nanonseconds and collected using
+/// std::chrono::steady_clock::now().time_since_epoch() or the equivalent.
+///
+/// For consistency of measurement across different backends, the timestamps
+/// should be collected at the following points during
+/// TRITONBACKEND_ModelInstanceExecute.
+///
+///   TRITONBACKEND_ModelInstanceExecute()
+///     < start of this response >
+///     CAPTURE TIMESTAMP (response_start)
+///     < generate this response >
+///     CAPTURE TIMESTAMP (compute_output_start)
+///     < allocate output buffers and extract output tensors, including copying
+///       the tensors to/from GPU if necessary >
+///     CAPTURE TIMESTAMP (response_end)
+///     < end of this response >
+///     return
+///
+/// \param response_statistics The response statistics object.
+/// \param response_end The response end time.
+/// \return a TRITONSERVER_Error indicating success or failure.
+TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceResponseStatisticsSetResponseEnd(
+    TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics,
+    uint64_t response_end);
+
+/// Set error to an inference response statistics object.
+///
+/// Use the same error object passed to the TRITONBACKEND_ResponseSend. \see
+/// TRITONBACKEND_ResponseSend.
+///
+/// \param response_statistics The response statistics object.
+/// \param error The error object.
+/// \return a TRITONSERVER_Error indicating success or failure.
+TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceResponseStatisticsSetError(
+    TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics,
+    TRITONSERVER_Error* error);
+
+/// Record statistics for an inference response.
+///
+/// The caller retains ownership to the response statistics and must free it
+/// after this function returns.
+///
+/// \param response_statistics The statistics to be recorded.
+/// \return a TRITONSERVER_Error indicating success or failure.
+TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceReportResponseStatistics(
+    TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics);
+
 /// Record statistics for the execution of an entire batch of
 /// inference requests.
 ///
diff --git a/src/backend_model_instance.cc b/src/backend_model_instance.cc
index a4a9bba2e..5e43e093d 100644
--- a/src/backend_model_instance.cc
+++ b/src/backend_model_instance.cc
@@ -1,4 +1,4 @@
-// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -795,6 +795,23 @@ TritonModelInstance::TritonBackendThread::BackendThread()
   LOG_VERBOSE(1) << "Stopping backend thread for " << name_ << "...";
 }
 
+// Opaque object for the response statistics C-API
+struct ModelInstanceResponseStatistics {
+#ifdef TRITON_ENABLE_STATS
+  ModelInstanceResponseStatistics()
+      : model_instance(nullptr), response_factory(nullptr), response_start(0),
+        compute_output_start(0), response_end(0), error(nullptr)
+  {
+  }
+  TritonModelInstance* model_instance;
+  std::shared_ptr<InferenceResponseFactory>* response_factory;
+  uint64_t response_start;
+  uint64_t compute_output_start;
+  uint64_t response_end;
+  TRITONSERVER_Error* error;
+#endif  // TRITON_ENABLE_STATS
+};
+
 extern "C" {
 
 TRITONAPI_DECLSPEC TRITONSERVER_Error*
@@ -953,6 +970,148 @@ TRITONBACKEND_ModelInstanceReportStatistics(
   return nullptr;  // success
 }
 
+TRITONAPI_DECLSPEC TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceResponseStatisticsNew(
+    TRITONBACKEND_ModelInstanceResponseStatistics** response_statistics)
+{
+#ifdef TRITON_ENABLE_STATS
+  *response_statistics =
+      reinterpret_cast<TRITONBACKEND_ModelInstanceResponseStatistics*>(
+          new ModelInstanceResponseStatistics());
+#endif  // TRITON_ENABLE_STATS
+
+  return nullptr;  // success
+}
+
+TRITONAPI_DECLSPEC TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceResponseStatisticsDelete(
+    TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics)
+{
+#ifdef TRITON_ENABLE_STATS
+  delete reinterpret_cast<ModelInstanceResponseStatistics*>(
+      response_statistics);
+#endif  // TRITON_ENABLE_STATS
+
+  return nullptr;  // success
+}
+
+TRITONAPI_DECLSPEC TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceResponseStatisticsSetModelInstance(
+    TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics,
+    TRITONBACKEND_ModelInstance* model_instance)
+{
+#ifdef TRITON_ENABLE_STATS
+  ModelInstanceResponseStatistics* rs =
+      reinterpret_cast<ModelInstanceResponseStatistics*>(response_statistics);
+  rs->model_instance = reinterpret_cast<TritonModelInstance*>(model_instance);
+#endif  // TRITON_ENABLE_STATS
+
+  return nullptr;  // success
+}
+
+TRITONAPI_DECLSPEC TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceResponseStatisticsSetResponseFactory(
+    TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics,
+    TRITONBACKEND_ResponseFactory* response_factory)
+{
+#ifdef TRITON_ENABLE_STATS
+  ModelInstanceResponseStatistics* rs =
+      reinterpret_cast<ModelInstanceResponseStatistics*>(response_statistics);
+  rs->response_factory =
+      reinterpret_cast<std::shared_ptr<InferenceResponseFactory>*>(
+          response_factory);
+  ;
+#endif  // TRITON_ENABLE_STATS
+
+  return nullptr;  // success
+}
+
+TRITONAPI_DECLSPEC TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceResponseStatisticsSetResponseStart(
+    TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics,
+    uint64_t response_start)
+{
+#ifdef TRITON_ENABLE_STATS
+  ModelInstanceResponseStatistics* rs =
+      reinterpret_cast<ModelInstanceResponseStatistics*>(response_statistics);
+  rs->response_start = response_start;
+#endif  // TRITON_ENABLE_STATS
+
+  return nullptr;  // success
+}
+
+TRITONAPI_DECLSPEC TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceResponseStatisticsSetComputeOutputStart(
+    TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics,
+    uint64_t compute_output_start)
+{
+#ifdef TRITON_ENABLE_STATS
+  ModelInstanceResponseStatistics* rs =
+      reinterpret_cast<ModelInstanceResponseStatistics*>(response_statistics);
+  rs->compute_output_start = compute_output_start;
+#endif  // TRITON_ENABLE_STATS
+
+  return nullptr;  // success
+}
+
+TRITONAPI_DECLSPEC TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceResponseStatisticsSetResponseEnd(
+    TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics,
+    uint64_t response_end)
+{
+#ifdef TRITON_ENABLE_STATS
+  ModelInstanceResponseStatistics* rs =
+      reinterpret_cast<ModelInstanceResponseStatistics*>(response_statistics);
+  rs->response_end = response_end;
+#endif  // TRITON_ENABLE_STATS
+
+  return nullptr;  // success
+}
+
+TRITONAPI_DECLSPEC TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceResponseStatisticsSetError(
+    TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics,
+    TRITONSERVER_Error* error)
+{
+#ifdef TRITON_ENABLE_STATS
+  ModelInstanceResponseStatistics* rs =
+      reinterpret_cast<ModelInstanceResponseStatistics*>(response_statistics);
+  rs->error = error;
+#endif  // TRITON_ENABLE_STATS
+
+  return nullptr;  // success
+}
+
+TRITONAPI_DECLSPEC TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceReportResponseStatistics(
+    TRITONBACKEND_ModelInstanceResponseStatistics* response_statistics)
+{
+#ifdef TRITON_ENABLE_STATS
+  ModelInstanceResponseStatistics* rs =
+      reinterpret_cast<ModelInstanceResponseStatistics*>(response_statistics);
+
+  InferenceStatsAggregator* sa =
+      rs->model_instance->Model()->MutableStatsAggregator();
+  std::string key =
+      std::to_string((*rs->response_factory)->GetAndIncrementResponseIndex());
+
+  if (rs->error == nullptr) {
+    if (rs->compute_output_start > 0) {
+      RETURN_TRITONSERVER_ERROR_IF_ERROR(sa->UpdateResponseSuccess(
+          key, rs->response_start, rs->compute_output_start, rs->response_end));
+    } else {
+      RETURN_TRITONSERVER_ERROR_IF_ERROR(
+          sa->UpdateResponseEmpty(key, rs->response_start, rs->response_end));
+    }
+  } else {
+    RETURN_TRITONSERVER_ERROR_IF_ERROR(sa->UpdateResponseFail(
+        key, rs->response_start, rs->compute_output_start, rs->response_end));
+  }
+#endif  // TRITON_ENABLE_STATS
+
+  return nullptr;  // success
+}
+
 TRITONAPI_DECLSPEC TRITONSERVER_Error*
 TRITONBACKEND_ModelInstanceReportBatchStatistics(
     TRITONBACKEND_ModelInstance* instance, const uint64_t batch_size,
diff --git a/src/infer_response.h b/src/infer_response.h
index 75d5f9d48..612f8c1fe 100644
--- a/src/infer_response.h
+++ b/src/infer_response.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -61,6 +61,10 @@ class InferenceResponseFactory {
         alloc_userp_(alloc_userp), response_fn_(response_fn),
         response_userp_(response_userp), response_delegator_(delegator),
         is_cancelled_(false)
+#ifdef TRITON_ENABLE_STATS
+        ,
+        response_stats_index_(0)
+#endif  // TRITON_ENABLE_STATS
   {
   }
 
@@ -94,6 +98,11 @@ class InferenceResponseFactory {
   void ReleaseTrace() { trace_ = nullptr; }
 #endif  // TRITON_ENABLE_TRACING
 
+#ifdef TRITON_ENABLE_STATS
+  // Return the current response statistics index and increment it.
+  uint64_t GetAndIncrementResponseIndex() { return response_stats_index_++; };
+#endif  // TRITON_ENABLE_STATS
+
  private:
   // The model associated with this factory. For normal
   // requests/responses this will always be defined and acts to keep
@@ -129,6 +138,11 @@ class InferenceResponseFactory {
   // Inference trace associated with this response.
   std::shared_ptr<InferenceTraceProxy> trace_;
 #endif  // TRITON_ENABLE_TRACING
+
+#ifdef TRITON_ENABLE_STATS
+  // Number of response statistics reported.
+  std::atomic<uint64_t> response_stats_index_;
+#endif  // TRITON_ENABLE_STATS
 };
 
 //
diff --git a/src/infer_stats.cc b/src/infer_stats.cc
index a1af3b9f2..0f47485c2 100644
--- a/src/infer_stats.cc
+++ b/src/infer_stats.cc
@@ -1,4 +1,4 @@
-// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -210,6 +210,132 @@ InferenceStatsAggregator::UpdateSuccessCacheMiss(
 #endif  // TRITON_ENABLE_METRICS
 }
 
+Status
+InferenceStatsAggregator::UpdateResponseSuccess(
+    const std::string& key, const uint64_t response_start_ns,
+    const uint64_t compute_output_start_ns, const uint64_t response_end_ns)
+{
+  if (response_start_ns > compute_output_start_ns) {
+    return Status(
+        Status::Code::INVALID_ARG,
+        "Response start cannot happen after compute output start");
+  }
+  if (compute_output_start_ns > response_end_ns) {
+    return Status(
+        Status::Code::INVALID_ARG,
+        "Compute output start cannot happen after response end");
+  }
+  const uint64_t compute_infer_duration_ns =
+      compute_output_start_ns - response_start_ns;
+  const uint64_t compute_output_duration_ns =
+      response_end_ns - compute_output_start_ns;
+  const uint64_t total_duration_ns = response_end_ns - response_start_ns;
+
+  {
+    std::lock_guard<std::mutex> lock(mu_);
+
+    auto it = response_stats_.find(key);
+    if (it == response_stats_.end()) {
+      it = response_stats_.emplace(key, InferResponseStats()).first;
+    }
+
+    it->second.compute_infer_count++;
+    it->second.compute_infer_duration_ns += compute_infer_duration_ns;
+    it->second.compute_output_count++;
+    it->second.compute_output_duration_ns += compute_output_duration_ns;
+    it->second.success_count++;
+    it->second.success_duration_ns += total_duration_ns;
+  }
+
+  return Status::Success;
+}
+
+Status
+InferenceStatsAggregator::UpdateResponseFail(
+    const std::string& key, const uint64_t response_start_ns,
+    const uint64_t compute_output_start_ns, const uint64_t response_end_ns)
+{
+  uint64_t compute_infer_duration_ns, compute_output_duration_ns,
+      total_duration_ns;
+  if (compute_output_start_ns > 0) {
+    // output tensors copied
+    if (response_start_ns > compute_output_start_ns) {
+      return Status(
+          Status::Code::INVALID_ARG,
+          "Response start cannot happen after compute output start");
+    }
+    if (compute_output_start_ns > response_end_ns) {
+      return Status(
+          Status::Code::INVALID_ARG,
+          "Compute output start cannot happen after response end");
+    }
+    compute_infer_duration_ns = compute_output_start_ns - response_start_ns;
+    compute_output_duration_ns = response_end_ns - compute_output_start_ns;
+    total_duration_ns = response_end_ns - response_start_ns;
+  } else {
+    // no output tensors copied
+    if (response_start_ns > response_end_ns) {
+      return Status(
+          Status::Code::INVALID_ARG,
+          "Response start cannot happen after response end");
+    }
+    compute_infer_duration_ns = response_end_ns - response_start_ns;
+    compute_output_duration_ns = 0;
+    total_duration_ns = response_end_ns - response_start_ns;
+  }
+
+  {
+    std::lock_guard<std::mutex> lock(mu_);
+
+    auto it = response_stats_.find(key);
+    if (it == response_stats_.end()) {
+      it = response_stats_.emplace(key, InferResponseStats()).first;
+    }
+
+    it->second.compute_infer_count++;
+    it->second.compute_infer_duration_ns += compute_infer_duration_ns;
+    if (compute_output_duration_ns > 0) {
+      it->second.compute_output_count++;
+      it->second.compute_output_duration_ns += compute_output_duration_ns;
+    }
+    it->second.fail_count++;
+    it->second.fail_duration_ns += total_duration_ns;
+  }
+
+  return Status::Success;
+}
+
+Status
+InferenceStatsAggregator::UpdateResponseEmpty(
+    const std::string& key, const uint64_t response_start_ns,
+    const uint64_t response_end_ns)
+{
+  if (response_start_ns > response_end_ns) {
+    return Status(
+        Status::Code::INVALID_ARG,
+        "Response start cannot happen after response end");
+  }
+  const uint64_t compute_infer_duration_ns =
+      response_end_ns - response_start_ns;
+  const uint64_t total_duration_ns = response_end_ns - response_start_ns;
+
+  {
+    std::lock_guard<std::mutex> lock(mu_);
+
+    auto it = response_stats_.find(key);
+    if (it == response_stats_.end()) {
+      it = response_stats_.emplace(key, InferResponseStats()).first;
+    }
+
+    it->second.compute_infer_count++;
+    it->second.compute_infer_duration_ns += compute_infer_duration_ns;
+    it->second.empty_response_count++;
+    it->second.empty_response_duration_ns += total_duration_ns;
+  }
+
+  return Status::Success;
+}
+
 void
 InferenceStatsAggregator::UpdateInferBatchStats(
     MetricModelReporter* metric_reporter, const size_t batch_size,
diff --git a/src/infer_stats.h b/src/infer_stats.h
index 86ad9a654..2ae2bc226 100644
--- a/src/infer_stats.h
+++ b/src/infer_stats.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -78,6 +78,27 @@ class InferenceStatsAggregator {
     uint64_t cache_miss_duration_ns_;
   };
 
+  struct InferResponseStats {
+    InferResponseStats()
+        : compute_infer_count(0), compute_infer_duration_ns(0),
+          compute_output_count(0), compute_output_duration_ns(0),
+          success_count(0), success_duration_ns(0), fail_count(0),
+          fail_duration_ns(0), empty_response_count(0),
+          empty_response_duration_ns(0)
+    {
+    }
+    uint64_t compute_infer_count;
+    uint64_t compute_infer_duration_ns;
+    uint64_t compute_output_count;
+    uint64_t compute_output_duration_ns;
+    uint64_t success_count;
+    uint64_t success_duration_ns;
+    uint64_t fail_count;
+    uint64_t fail_duration_ns;
+    uint64_t empty_response_count;
+    uint64_t empty_response_duration_ns;
+  };
+
   struct InferBatchStats {
     InferBatchStats()
         : count_(0), compute_input_duration_ns_(0),
@@ -100,6 +121,11 @@ class InferenceStatsAggregator {
   uint64_t InferenceCount() const { return inference_count_; }
   uint64_t ExecutionCount() const { return execution_count_; }
   const InferStats& ImmutableInferStats() const { return infer_stats_; }
+  const std::map<std::string, InferResponseStats>& ImmutableInferResponseStats()
+      const
+  {
+    return response_stats_;
+  }
   const std::map<size_t, InferBatchStats>& ImmutableInferBatchStats() const
   {
     return batch_stats_;
@@ -140,6 +166,21 @@ class InferenceStatsAggregator {
       MetricModelReporter* metric_reporter,
       const uint64_t cache_miss_duration_ns);
 
+  // Add durations to response stats for a successful response.
+  Status UpdateResponseSuccess(
+      const std::string& key, const uint64_t response_start_ns,
+      const uint64_t compute_output_start_ns, const uint64_t response_end_ns);
+
+  // Add durations to response stats for a failed response.
+  Status UpdateResponseFail(
+      const std::string& key, const uint64_t response_start_ns,
+      const uint64_t compute_output_start_ns, const uint64_t response_end_ns);
+
+  // Add durations to response stats for an empty response.
+  Status UpdateResponseEmpty(
+      const std::string& key, const uint64_t response_start_ns,
+      const uint64_t response_end_ns);
+
   // Add durations to batch infer stats for a batch execution.
   // 'success_request_count' is the number of success requests in the
   // batch that have infer_stats attached.
@@ -163,6 +204,7 @@ class InferenceStatsAggregator {
   uint64_t inference_count_;
   uint64_t execution_count_;
   InferStats infer_stats_;
+  std::map<std::string, InferResponseStats> response_stats_;
   std::map<size_t, InferBatchStats> batch_stats_;
 #endif  // TRITON_ENABLE_STATS
 };
diff --git a/src/tritonserver.cc b/src/tritonserver.cc
index 70bdddb75..f2d3c2624 100644
--- a/src/tritonserver.cc
+++ b/src/tritonserver.cc
@@ -1,4 +1,4 @@
-// Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -2897,6 +2897,8 @@ TRITONSERVER_ServerModelStatistics(
 
       // Add infer statistic
       const auto& infer_stats = model->StatsAggregator().ImmutableInferStats();
+      const auto& infer_response_stats =
+          model->StatsAggregator().ImmutableInferResponseStats();
       const auto& infer_batch_stats =
           model->StatsAggregator().ImmutableInferBatchStats();
 
@@ -2933,6 +2935,35 @@ TRITONSERVER_ServerModelStatistics(
           metadata, inference_stats, "cache_miss",
           infer_stats.cache_miss_count_, infer_stats.cache_miss_duration_ns_);
 
+      // Add response statistics
+      triton::common::TritonJson::Value response_stats(
+          metadata, triton::common::TritonJson::ValueType::OBJECT);
+      for (const auto& res_pair : infer_response_stats) {
+        triton::common::TritonJson::Value res_stat(
+            metadata, triton::common::TritonJson::ValueType::OBJECT);
+        SetDurationStat(
+            metadata, res_stat, "compute_infer",
+            res_pair.second.compute_infer_count,
+            res_pair.second.compute_infer_duration_ns);
+        SetDurationStat(
+            metadata, res_stat, "compute_output",
+            res_pair.second.compute_output_count,
+            res_pair.second.compute_output_duration_ns);
+        SetDurationStat(
+            metadata, res_stat, "success", res_pair.second.success_count,
+            res_pair.second.success_duration_ns);
+        SetDurationStat(
+            metadata, res_stat, "fail", res_pair.second.fail_count,
+            res_pair.second.fail_duration_ns);
+        SetDurationStat(
+            metadata, res_stat, "empty_response",
+            res_pair.second.empty_response_count,
+            res_pair.second.empty_response_duration_ns);
+        RETURN_IF_STATUS_ERROR(
+            response_stats.Add(res_pair.first.c_str(), std::move(res_stat)));
+      }
+
+      // Add batch statistics
       triton::common::TritonJson::Value batch_stats(
           metadata, triton::common::TritonJson::ValueType::ARRAY);
       for (const auto& batch : infer_batch_stats) {
@@ -2967,6 +2998,8 @@ TRITONSERVER_ServerModelStatistics(
 
       RETURN_IF_STATUS_ERROR(
           model_stat.Add("inference_stats", std::move(inference_stats)));
+      RETURN_IF_STATUS_ERROR(
+          model_stat.Add("response_stats", std::move(response_stats)));
       RETURN_IF_STATUS_ERROR(
           model_stat.Add("batch_stats", std::move(batch_stats)));
       RETURN_IF_STATUS_ERROR(
diff --git a/src/tritonserver_stub.cc b/src/tritonserver_stub.cc
index e6efcda12..dc9bcca52 100644
--- a/src/tritonserver_stub.cc
+++ b/src/tritonserver_stub.cc
@@ -1,4 +1,4 @@
-// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -947,6 +947,42 @@ TRITONBACKEND_ModelInstanceReportStatistics()
 {
 }
 TRITONAPI_DECLSPEC void
+TRITONBACKEND_ModelInstanceResponseStatisticsNew()
+{
+}
+TRITONAPI_DECLSPEC void
+TRITONBACKEND_ModelInstanceResponseStatisticsSetModelInstance()
+{
+}
+TRITONAPI_DECLSPEC void
+TRITONBACKEND_ModelInstanceResponseStatisticsSetResponseFactory()
+{
+}
+TRITONAPI_DECLSPEC void
+TRITONBACKEND_ModelInstanceResponseStatisticsSetResponseStart()
+{
+}
+TRITONAPI_DECLSPEC void
+TRITONBACKEND_ModelInstanceResponseStatisticsSetComputeOutputStart()
+{
+}
+TRITONAPI_DECLSPEC void
+TRITONBACKEND_ModelInstanceResponseStatisticsSetResponseEnd()
+{
+}
+TRITONAPI_DECLSPEC void
+TRITONBACKEND_ModelInstanceResponseStatisticsSetError()
+{
+}
+TRITONAPI_DECLSPEC void
+TRITONBACKEND_ModelInstanceResponseStatisticsDelete()
+{
+}
+TRITONAPI_DECLSPEC void
+TRITONBACKEND_ModelInstanceReportResponseStatistics()
+{
+}
+TRITONAPI_DECLSPEC void
 TRITONBACKEND_ModelInstanceReportBatchStatistics()
 {
 }