Skip to content

Commit

Permalink
Add support for response sender in the default mode (#7311)
Browse files Browse the repository at this point in the history
  • Loading branch information
kthui authored Jun 6, 2024
1 parent 797d296 commit 1f68c0d
Show file tree
Hide file tree
Showing 10 changed files with 1,052 additions and 46 deletions.
3 changes: 2 additions & 1 deletion qa/L0_backend_python/bls/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ if [[ ${TEST_WINDOWS} == 0 ]]; then
echo "instance_group [ { kind: KIND_CPU} ]" >> models/libtorch_cpu/config.pbtxt

# Test with different sizes of CUDA memory pool
for CUDA_MEMORY_POOL_SIZE_MB in 64 128 ; do
# TODO: Why 256 worked in place of 128, on decoupled data pipeline?
for CUDA_MEMORY_POOL_SIZE_MB in 64 256 ; do
CUDA_MEMORY_POOL_SIZE_BYTES=$((CUDA_MEMORY_POOL_SIZE_MB * 1024 * 1024))
SERVER_ARGS="--model-repository=${MODELDIR}/bls/models --backend-directory=${BACKEND_DIR} --log-verbose=1 --cuda-memory-pool-byte-size=0:${CUDA_MEMORY_POOL_SIZE_BYTES}"
for TRIAL in non_decoupled decoupled ; do
Expand Down
6 changes: 3 additions & 3 deletions qa/L0_backend_python/decoupled/decoupled_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,12 +243,12 @@ def test_decoupled_return_response_error(self):
client.async_stream_infer(model_name=model_name, inputs=inputs)
data_item = user_data._completed_requests.get()
if type(data_item) == InferenceServerException:
self.assertEqual(
data_item.message(),
self.assertIn(
"Python model 'decoupled_return_response_error_0_0' is using "
"the decoupled mode and the execute function must return "
"None.",
"Exception message didn't match.",
data_item.message(),
"Exception message didn't show up.",
)

def test_decoupled_send_after_close_error(self):
Expand Down
41 changes: 1 addition & 40 deletions qa/L0_backend_python/lifecycle/lifecycle_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def test_infer_pymodel_error(self):
print(e.message())
self.assertTrue(
e.message().startswith(
"Failed to process the request(s) for model instance"
"Failed to process the request(s) for model "
),
"Exception message is not correct",
)
Expand All @@ -208,45 +208,6 @@ def test_infer_pymodel_error(self):
False, "Wrong exception raised or did not raise an exception"
)

def test_incorrect_execute_return(self):
model_name = "execute_return_error"
shape = [1, 1]
with self._shm_leak_detector.Probe() as shm_probe:
with httpclient.InferenceServerClient(
f"{_tritonserver_ipaddr}:8000"
) as client:
input_data = (5 * np.random.randn(*shape)).astype(np.float32)
inputs = [
httpclient.InferInput(
"INPUT", input_data.shape, np_to_triton_dtype(input_data.dtype)
)
]
inputs[0].set_data_from_numpy(input_data)

# The first request to this model will return None.
with self.assertRaises(InferenceServerException) as e:
client.infer(model_name, inputs)

self.assertTrue(
"Failed to process the request(s) for model instance "
"'execute_return_error_0_0', message: Expected a list in the "
"execute return" in str(e.exception),
"Exception message is not correct.",
)

# The second inference request will return a list of None object
# instead of Python InferenceResponse objects.
with self.assertRaises(InferenceServerException) as e:
client.infer(model_name, inputs)

self.assertTrue(
"Failed to process the request(s) for model instance "
"'execute_return_error_0_0', message: Expected an "
"'InferenceResponse' object in the execute function return"
" list" in str(e.exception),
"Exception message is not correct.",
)


if __name__ == "__main__":
unittest.main()
Loading

0 comments on commit 1f68c0d

Please sign in to comment.