diff --git a/CMakeLists.txt b/CMakeLists.txt index 92b785bc..ee209b5b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -155,6 +155,8 @@ set( src/infer_response.h src/infer_request.cc src/infer_request.h + src/infer_trace.cc + src/infer_trace.h src/message_queue.h src/ipc_message.cc src/ipc_message.h diff --git a/src/infer_request.cc b/src/infer_request.cc index 0c33e515..31182281 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -170,7 +170,7 @@ InferRequest::GetPreferredMemory() } InferenceTrace& -InferRequest::Trace() +InferRequest::GetTrace() { return trace_; } @@ -210,7 +210,6 @@ InferRequest::SaveToSharedMemory(std::unique_ptr& shm_pool) infer_request_shm_ptr_->is_decoupled = is_decoupled_; infer_request_shm_ptr_->timeout = timeout_; infer_request_shm_ptr_->preferred_memory = preferred_memory_; - infer_request_shm_ptr_->trace = trace_; infer_request_shm_ptr_->request_release_flags = request_release_flags_; output_names_handle_shm_ptr_ = @@ -258,6 +257,9 @@ InferRequest::SaveToSharedMemory(std::unique_ptr& shm_pool) PbString::Create(shm_pool, Parameters()); infer_request_shm_ptr_->parameters_shm_handle = parameters_shm->ShmHandle(); + trace_.SaveToSharedMemory(shm_pool); + infer_request_shm_ptr_->trace_shm_handle = trace_.ShmHandle(); + // Save the references to shared memory. infer_request_shm_ = std::move(infer_request_shm); request_id_shm_ = std::move(request_id_shm); @@ -312,6 +314,10 @@ InferRequest::LoadFromSharedMemory( CorrelationId::LoadFromSharedMemory( shm_pool, infer_request_shm_ptr->correlation_id_shm_handle); + std::unique_ptr infer_trace_shm = + InferenceTrace::LoadFromSharedMemory( + shm_pool, infer_request_shm_ptr->trace_shm_handle); + std::unique_ptr model_name_shm = PbString::LoadFromSharedMemory( shm_pool, infer_request_shm_ptr->model_name_shm_handle); std::unique_ptr request_id_shm = PbString::LoadFromSharedMemory( @@ -321,8 +327,8 @@ InferRequest::LoadFromSharedMemory( return std::unique_ptr(new InferRequest( infer_request_shm, request_id_shm, correlation_id_shm, - requested_output_names_shm, model_name_shm, input_tensors, - parameters_shm)); + requested_output_names_shm, model_name_shm, input_tensors, parameters_shm, + infer_trace_shm)); } InferRequest::InferRequest( @@ -332,7 +338,8 @@ InferRequest::InferRequest( std::vector>& requested_output_names_shm, std::unique_ptr& model_name_shm, std::vector>& input_tensors, - std::unique_ptr& parameters_shm) + std::unique_ptr& parameters_shm, + std::unique_ptr& infer_trace_shm) : infer_request_shm_(std::move(infer_request_shm)), request_id_shm_(std::move(request_id_shm)), requested_output_names_shm_(std::move(requested_output_names_shm)), @@ -373,7 +380,7 @@ InferRequest::InferRequest( is_decoupled_ = infer_request_shm_ptr_->is_decoupled; timeout_ = infer_request_shm_ptr_->timeout; preferred_memory_ = infer_request_shm_ptr_->preferred_memory; - trace_ = infer_request_shm_ptr_->trace; + trace_ = InferenceTrace(infer_trace_shm); request_release_flags_ = infer_request_shm_ptr_->request_release_flags; #ifdef TRITON_PB_STUB diff --git a/src/infer_request.h b/src/infer_request.h index 3d0379eb..e0887624 100644 --- a/src/infer_request.h +++ b/src/infer_request.h @@ -31,6 +31,7 @@ #include "correlation_id.h" #include "infer_response.h" +#include "infer_trace.h" #include "pb_preferred_memory.h" #include "pb_tensor.h" @@ -43,22 +44,6 @@ namespace triton { namespace backend { namespace python { class Stub; -// -// Inference Trace -// -struct InferenceTrace { -#ifndef TRITON_PB_STUB - TRITONSERVER_InferenceTrace* triton_trace_; - InferenceTrace(TRITONSERVER_InferenceTrace* triton_trace) - : triton_trace_(triton_trace) - { - } -#else - void* triton_trace_; -#endif - InferenceTrace() : triton_trace_(nullptr) {} -}; - // // Inference Request // @@ -72,7 +57,7 @@ struct InferRequestShm { bool is_decoupled; uint64_t timeout; PreferredMemory preferred_memory; - InferenceTrace trace; + bi::managed_external_buffer::handle_t trace_shm_handle; uint32_t request_release_flags; bi::managed_external_buffer::handle_t correlation_id_shm_handle; bi::managed_external_buffer::handle_t model_name_shm_handle; @@ -108,7 +93,7 @@ class InferRequest { bool IsDecoupled(); void SetIsDecoupled(const bool is_decoupled); PreferredMemory& GetPreferredMemory(); - InferenceTrace& Trace(); + InferenceTrace& GetTrace(); uint32_t ReleaseFlags(); void SetReleaseFlags(const uint32_t& flags); @@ -149,7 +134,8 @@ class InferRequest { std::vector>& requested_output_names_shm, std::unique_ptr& model_name_shm, std::vector>& input_tensors, - std::unique_ptr& parameters_shm); + std::unique_ptr& parameters_shm, + std::unique_ptr& infer_trace_shm); std::string request_id_; CorrelationId correlation_id_; diff --git a/src/infer_trace.cc b/src/infer_trace.cc new file mode 100644 index 00000000..50645dcc --- /dev/null +++ b/src/infer_trace.cc @@ -0,0 +1,101 @@ +// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "infer_trace.h" + +namespace triton { namespace backend { namespace python { + +InferenceTrace::InferenceTrace(const InferenceTrace& rhs) +{ + triton_trace_ = rhs.triton_trace_; + trace_context_ = rhs.trace_context_; +} + +InferenceTrace& +InferenceTrace::operator=(const InferenceTrace& rhs) +{ + triton_trace_ = rhs.triton_trace_; + trace_context_ = rhs.trace_context_; + return *this; +} + +InferenceTrace::InferenceTrace(std::unique_ptr& trace_shm) +{ + triton_trace_ = trace_shm->triton_trace_; + trace_context_ = trace_shm->trace_context_; +} + +void +InferenceTrace::SaveToSharedMemory( + std::unique_ptr& shm_pool) +{ + AllocatedSharedMemory infer_trace_shm = + shm_pool->Construct(); + infer_trace_shm_ptr_ = infer_trace_shm.data_.get(); + + infer_trace_shm_ptr_->triton_trace = triton_trace_; + + std::unique_ptr trace_context_shm = + PbString::Create(shm_pool, trace_context_); + + infer_trace_shm_ptr_->trace_context_shm_handle = + trace_context_shm->ShmHandle(); + + // Save the references to shared memory. + trace_context_shm_ = std::move(trace_context_shm); + infer_trace_shm_ = std::move(infer_trace_shm); + shm_handle_ = infer_trace_shm_.handle_; +} + +std::unique_ptr +InferenceTrace::LoadFromSharedMemory( + std::unique_ptr& shm_pool, + bi::managed_external_buffer::handle_t handle) +{ + AllocatedSharedMemory infer_trace_shm = + shm_pool->Load(handle); + InferenceTraceShm* infer_trace_shm_ptr = infer_trace_shm.data_.get(); + + std::unique_ptr trace_context_shm = PbString::LoadFromSharedMemory( + shm_pool, infer_trace_shm_ptr->trace_context_shm_handle); + + return std::unique_ptr( + new InferenceTrace(infer_trace_shm, trace_context_shm)); +} + +InferenceTrace::InferenceTrace( + AllocatedSharedMemory& infer_trace_shm, + std::unique_ptr& trace_context_shm) + : infer_trace_shm_(std::move(infer_trace_shm)), + trace_context_shm_(std::move(trace_context_shm)) +{ + infer_trace_shm_ptr_ = infer_trace_shm_.data_.get(); + shm_handle_ = infer_trace_shm_.handle_; + triton_trace_ = infer_trace_shm_ptr_->triton_trace; + trace_context_ = trace_context_shm_->String(); +} + +}}}; // namespace triton::backend::python diff --git a/src/infer_trace.h b/src/infer_trace.h new file mode 100644 index 00000000..aac9137f --- /dev/null +++ b/src/infer_trace.h @@ -0,0 +1,90 @@ +// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include + +#include "pb_string.h" +#include "pb_utils.h" + +namespace triton { namespace backend { namespace python { + +struct InferenceTraceShm { + bi::managed_external_buffer::handle_t trace_context_shm_handle; + // The address of the 'TRITONSERVER_InferTrace' object. + void* triton_trace; +}; + +// +// Inference Trace +// +class InferenceTrace { + public: + InferenceTrace(void* triton_trace, const std::string& ctxt) + : triton_trace_(triton_trace), trace_context_(ctxt) + { + } + InferenceTrace() : triton_trace_(nullptr), trace_context_("") {} + InferenceTrace(const InferenceTrace& rhs); + InferenceTrace(std::unique_ptr& trace_shm); + InferenceTrace& operator=(const InferenceTrace& rhs); + /// Save InferenceTrace object to shared memory. + /// \param shm_pool Shared memory pool to save the InferenceTrace object. + void SaveToSharedMemory(std::unique_ptr& shm_pool); + + /// Create a InferenceTrace object from shared memory. + /// \param shm_pool Shared memory pool + /// \param handle Shared memory handle of the InferenceTrace. + /// \return Returns the InferenceTrace in the specified handle + /// location. + static std::unique_ptr LoadFromSharedMemory( + std::unique_ptr& shm_pool, + bi::managed_external_buffer::handle_t handle); + + void* TritonTrace() { return triton_trace_; } + const std::string& Context() const { return trace_context_; } + + bi::managed_external_buffer::handle_t ShmHandle() { return shm_handle_; } + + private: + // The private constructor for creating a InferenceTrace object from shared + // memory. + InferenceTrace( + AllocatedSharedMemory& infer_trace_shm, + std::unique_ptr& trace_context_shm); + + void* triton_trace_; + std::string trace_context_; + + // Shared Memory Data Structures + AllocatedSharedMemory infer_trace_shm_; + InferenceTraceShm* infer_trace_shm_ptr_; + bi::managed_external_buffer::handle_t shm_handle_; + std::unique_ptr trace_context_shm_; +}; + +}}}; // namespace triton::backend::python diff --git a/src/pb_stub.cc b/src/pb_stub.cc index 6735b123..a9a910a1 100644 --- a/src/pb_stub.cc +++ b/src/pb_stub.cc @@ -1611,7 +1611,14 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) .export_values(); py::class_>( - module, "InferenceTrace"); + module, "InferenceTrace") + .def("get_context", [](InferenceTrace& self) -> py::object { + auto context = self.Context(); + if (context != "") { + return py::str(context); + } + return py::none(); + }); py::class_>( module, "InferenceRequest") @@ -1698,7 +1705,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) .def("set_flags", &InferRequest::SetFlags) .def("timeout", &InferRequest::Timeout) .def("parameters", &InferRequest::Parameters) - .def("trace", &InferRequest::Trace) + .def("trace", &InferRequest::GetTrace) .def( "exec", [](std::shared_ptr& infer_request, diff --git a/src/python_be.cc b/src/python_be.cc index 3e2a816e..57e6cffd 100644 --- a/src/python_be.cc +++ b/src/python_be.cc @@ -383,14 +383,25 @@ ModelInstanceState::SaveRequestsToSharedMemory( // Do not return if error in this case, because Triton core // will return an error if tracing is disabled (see PYBE PR#295). + // For the same reason, we do not log the error message, otherwise + // when Triton is compiled without tracing, it'll constantly log + // this error. TRITONSERVER_InferenceTrace* triton_trace; auto err = TRITONBACKEND_RequestTrace(request, &triton_trace); if (err != nullptr) { triton_trace = nullptr; TRITONSERVER_ErrorDelete(err); } + const char* val = nullptr; + if (triton_trace != nullptr) { + LOG_IF_ERROR( + TRITONSERVER_InferenceTraceContext(triton_trace, &val), + "failed to retrieve trace context"); + } + std::string context = (val != nullptr) ? std::string(val) : ""; - InferenceTrace trace = InferenceTrace(triton_trace); + InferenceTrace trace = + InferenceTrace(reinterpret_cast(triton_trace), context); uint64_t request_timeout; RETURN_IF_ERROR(TRITONBACKEND_InferenceRequestTimeoutMicroseconds( @@ -415,7 +426,6 @@ ModelInstanceState::SaveRequestsToSharedMemory( reinterpret_cast(request), PreferredMemory(PreferredMemory::kDefault, 0), trace); } - RETURN_IF_EXCEPTION(infer_request->SaveToSharedMemory(Stub()->ShmPool())); requests_shm[r] = infer_request->ShmHandle(); pb_infer_requests.emplace_back(std::move(infer_request)); diff --git a/src/request_executor.cc b/src/request_executor.cc index f89a0b30..78fa6ea2 100644 --- a/src/request_executor.cc +++ b/src/request_executor.cc @@ -374,9 +374,11 @@ RequestExecutor::Infer( irequest, InferRequestComplete, nullptr /* request_release_userp */)); TRITONSERVER_InferenceTrace* trace = nullptr; - if (infer_request->Trace().triton_trace_ != nullptr) { + if (infer_request->GetTrace().TritonTrace() != nullptr) { THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceTraceSpawnChildTrace( - infer_request->Trace().triton_trace_, &trace)); + reinterpret_cast( + infer_request->GetTrace().TritonTrace()), + &trace)); } const std::string& param_str = infer_request->Parameters();