Skip to content

Commit

Permalink
Add async decoupled execute
Browse files Browse the repository at this point in the history
  • Loading branch information
kthui committed Apr 2, 2024
1 parent b64b6da commit 4159726
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 6 deletions.
83 changes: 78 additions & 5 deletions src/pb_stub.cc
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,8 @@ Stub::Initialize(bi::managed_external_buffer::handle_t map_handle)
c_python_backend_utils.attr("InferenceResponse"));
c_python_backend_utils.attr("shared_memory") = py::cast(shm_pool_.get());

async_event_loop_ = py::none();

py::object TritonPythonModel = sys.attr("TritonPythonModel");
deserialize_bytes_ = python_backend_utils.attr("deserialize_bytes_tensor");
serialize_bytes_ = python_backend_utils.attr("serialize_byte_tensor");
Expand Down Expand Up @@ -690,11 +692,18 @@ Stub::ProcessRequestsDecoupled(RequestBatch* request_batch_shm_ptr)

py::object execute_return =
model_instance_.attr("execute")(py_request_list);
if (!py::isinstance<py::none>(execute_return)) {
throw PythonBackendException(
"Python model '" + name_ +
"' is using the decoupled mode and the execute function must "
"return None.");
bool is_coroutine = py::module::import("asyncio")
.attr("iscoroutine")(execute_return)
.cast<bool>();
if (is_coroutine) {
RunCoroutine(execute_return);
} else {
if (!py::isinstance<py::none>(execute_return)) {
throw PythonBackendException(
"Python model '" + name_ +
"' is using the decoupled mode and the execute function must "
"return None.");
}
}
}
}
Expand Down Expand Up @@ -870,6 +879,60 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr)
}
}

py::object
Stub::GetAsyncEventLoop()
{
if (py::isinstance<py::none>(async_event_loop_)) {
// Create the event loop if not already.
async_event_loop_ = py::module_::import("asyncio").attr("new_event_loop")();
py::object py_thread =
py::module_::import("threading")
.attr("Thread")(
"target"_a = async_event_loop_.attr("run_forever"),
"daemon"_a = true);
py_thread.attr("start")();
}
return async_event_loop_;
}

py::object
Stub::RunCoroutine(py::object coroutine)
{
py::object loop = GetAsyncEventLoop();
py::object py_future = py::module_::import("asyncio").attr(
"run_coroutine_threadsafe")(coroutine, loop);

{
std::lock_guard<std::mutex> lock(async_event_futures_mu_);

std::shared_ptr<std::future<void>> shared_future(new std::future<void>());
std::future<void> c_future = std::async(
std::launch::async, [this, shared_future, py_future]() mutable {
{
py::gil_scoped_acquire gil_acquire;
try {
py_future.attr("result")();
}
catch (const PythonBackendException& pb_exception) {
LOG_ERROR << pb_exception.what();
}
catch (const py::error_already_set& error) {
LOG_ERROR << error.what();
}
py_future = py::none();
}
{
std::lock_guard<std::mutex> lock(async_event_futures_mu_);
async_event_futures_.erase(shared_future);
}
});
*shared_future = std::move(c_future);
async_event_futures_.emplace(std::move(shared_future));
}

return py::none();
}

void
Stub::UpdateHealth()
{
Expand All @@ -881,6 +944,14 @@ void
Stub::Finalize()
{
finalizing_ = true;
// Stop async event loop if created.
if (!py::isinstance<py::none>(async_event_loop_)) {
if (!async_event_futures_.empty()) {
LOG_ERROR << "Finalizing stub with " << async_event_futures_.size()
<< " ongoing coroutines";
}
async_event_loop_.attr("stop")();
}
// Call finalize if exists.
if (initialized_ && py::hasattr(model_instance_, "finalize")) {
try {
Expand Down Expand Up @@ -943,6 +1014,8 @@ Stub::~Stub()

{
py::gil_scoped_acquire acquire;
async_event_futures_.clear();
async_event_loop_ = py::none();
model_instance_ = py::none();
}
stub_instance_.reset();
Expand Down
12 changes: 11 additions & 1 deletion src/pb_stub.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -31,6 +31,9 @@
#include <pybind11/stl.h>

#include <filesystem>
#include <future>
#include <memory>
#include <unordered_set>

#include "infer_request.h"
#include "infer_response.h"
Expand Down Expand Up @@ -255,6 +258,10 @@ class Stub {

void ProcessRequestsDecoupled(RequestBatch* request_batch_shm_ptr);

py::object GetAsyncEventLoop();

py::object RunCoroutine(py::object coroutine);

/// Get the memory manager message queue
std::unique_ptr<MessageQueue<uint64_t>>& MemoryManagerQueue();

Expand Down Expand Up @@ -363,6 +370,9 @@ class Stub {
py::object model_instance_;
py::object deserialize_bytes_;
py::object serialize_bytes_;
py::object async_event_loop_;
std::unordered_set<std::shared_ptr<std::future<void>>> async_event_futures_;
std::mutex async_event_futures_mu_;
std::unique_ptr<MessageQueue<bi::managed_external_buffer::handle_t>>
stub_message_queue_;
std::unique_ptr<MessageQueue<bi::managed_external_buffer::handle_t>>
Expand Down

0 comments on commit 4159726

Please sign in to comment.