From d905aeb8f84767e20d7f1871dc16afa7c69c06ea Mon Sep 17 00:00:00 2001 From: GuanLuo Date: Thu, 7 Mar 2024 14:48:51 -0800 Subject: [PATCH] Delay time-point of response complete timestamp in GPRC and SageMaker endpoint --- src/grpc/infer_handler.cc | 10 +++++----- src/sagemaker_server.cc | 13 ++++++------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/grpc/infer_handler.cc b/src/grpc/infer_handler.cc index f977543896..cde705cf6e 100644 --- a/src/grpc/infer_handler.cc +++ b/src/grpc/infer_handler.cc @@ -992,11 +992,6 @@ ModelInferHandler::InferResponseComplete( state->context_->EraseInflightState(state); } -#ifdef TRITON_ENABLE_TRACING - state->trace_timestamps_.emplace_back(std::make_pair( - "INFER_RESPONSE_COMPLETE", TraceManager::CaptureTimestamp())); -#endif // TRITON_ENABLE_TRACING - // If gRPC Stream is cancelled then no need of forming and returning // a response. if (state->IsGrpcContextCancelled()) { @@ -1064,6 +1059,11 @@ ModelInferHandler::InferResponseComplete( return; } +#ifdef TRITON_ENABLE_TRACING + state->trace_timestamps_.emplace_back(std::make_pair( + "INFER_RESPONSE_COMPLETE", TraceManager::CaptureTimestamp())); +#endif // TRITON_ENABLE_TRACING + #ifdef TRITON_ENABLE_TRACING state->trace_timestamps_.emplace_back( std::make_pair("GRPC_SEND_START", TraceManager::CaptureTimestamp())); diff --git a/src/sagemaker_server.cc b/src/sagemaker_server.cc index a214ff99b6..daedce4f4f 100644 --- a/src/sagemaker_server.cc +++ b/src/sagemaker_server.cc @@ -394,13 +394,6 @@ SagemakerAPIServer::SagemakeInferRequestClass::InferResponseComplete( err = infer_request->FinalizeResponse(response); } -#ifdef TRITON_ENABLE_TRACING - if (infer_request->trace_ != nullptr) { - infer_request->trace_->CaptureTimestamp( - "INFER_RESPONSE_COMPLETE", TraceManager::CaptureTimestamp()); - } -#endif // TRITON_ENABLE_TRACING - LOG_TRITONSERVER_ERROR( TRITONSERVER_InferenceResponseDelete(response), @@ -426,6 +419,12 @@ SagemakerAPIServer::SagemakeInferRequestClass::InferResponseComplete( if ((flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) == 0) { return; } +#ifdef TRITON_ENABLE_TRACING + if (infer_request->trace_ != nullptr) { + infer_request->trace_->CaptureTimestamp( + "INFER_RESPONSE_COMPLETE", TraceManager::CaptureTimestamp()); + } +#endif // TRITON_ENABLE_TRACING evthr_defer(infer_request->thread_, ReplyCallback, infer_request); }