Skip to content

Commit

Permalink
test: Fix requested output deleting extra outputs (#7866)
Browse files Browse the repository at this point in the history
  • Loading branch information
kthui authored Dec 11, 2024
1 parent f336fa6 commit 17c88a8
Show file tree
Hide file tree
Showing 4 changed files with 227 additions and 0 deletions.
27 changes: 27 additions & 0 deletions qa/L0_backend_python/io/io_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,33 @@ def test_requested_output_decoupled(self):
self.assertTrue(np.allclose(gpu_output_data[1:], next_gpu_output_data))
self.assertTrue(user_data._completed_requests.empty())

# Assert a prior crash is fixed regarding requested output on a decoupled model.
def test_requested_output_decoupled_prior_crash(self):
model_name = "llm"
prompt = "test"

text_input_data = np.array([[prompt]]).astype(object)
inputs = [grpcclient.InferInput("text_input", text_input_data.shape, "BYTES")]
inputs[-1].set_data_from_numpy(text_input_data)

requested_outputs = [grpcclient.InferRequestedOutput("text_output")]

user_data = UserData()
with grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001") as client:
client.start_stream(callback=partial(callback, user_data))
client.async_stream_infer(
model_name=model_name, inputs=inputs, outputs=requested_outputs
)
client.stop_stream()

outputs = ""
while not user_data._completed_requests.empty():
result = user_data._completed_requests.get(block=False)
if isinstance(result, InferenceServerException):
raise result
outputs += str(result.as_numpy("text_output")[0], encoding="utf-8")
self.assertGreater(len(outputs), 0, "text_output is empty")


if __name__ == "__main__":
unittest.main()
64 changes: 64 additions & 0 deletions qa/L0_backend_python/io/requested_output_model/config.pbtxt
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#
# This test case was added based on a prior crash. DO NOT MODIFY!
#

name: "llm"
backend: "python"
max_batch_size: 128

model_transaction_policy {
decoupled: True
}

input [
{
name: "text_input"
data_type: TYPE_STRING
dims: [ 1 ]
}
]
output [
{
name: "text_output"
data_type: TYPE_STRING
dims: [ -1 ]
},
{
name: "sequence_index"
data_type: TYPE_INT32
dims: [ 1 ]
}
]

instance_group [
{
count: 1
kind : KIND_CPU
}
]
110 changes: 110 additions & 0 deletions qa/L0_backend_python/io/requested_output_model/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#
# This test case was added based on a prior crash. DO NOT MODIFY!
#

import json
import traceback

import numpy as np
import triton_python_backend_utils as pb_utils


def get_valid_param_value(param, default_value=""):
value = param.get("string_value", "")
return default_value if value.startswith("${") or value == "" else value


class TritonPythonModel:
def initialize(self, args):
model_config = json.loads(args["model_config"])
self.output_config = pb_utils.get_output_config_by_name(
model_config, "text_output"
)
self.output_dtype = pb_utils.triton_string_to_numpy(
self.output_config["data_type"]
)
self.decoupled = pb_utils.using_decoupled_model_transaction_policy(model_config)
self.logger = pb_utils.Logger

def create_triton_tensors(self, index):
x = "bla" + str(index)
output = [x.encode("utf8")]
np_output = np.array(output).astype(self.output_dtype)
seq_idx = np.array([[0]]).astype(np.int32)

t1 = pb_utils.Tensor("text_output", np_output)
t2 = pb_utils.Tensor("sequence_index", seq_idx)
tensors = [t1, t2]
return tensors

def create_triton_response(self, index):
tensors = self.create_triton_tensors(index)
return pb_utils.InferenceResponse(output_tensors=tensors)

def execute(self, requests):
responses = []
for request in requests:
if self.decoupled:
response_sender = request.get_response_sender()
try:
for index in range(0, 1):
triton_response = self.create_triton_response(index)
if self.decoupled:
response_sender.send(triton_response)
else:
responses.append(triton_response)

if self.decoupled:
response_sender.send(
flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
)

except Exception:
self.logger.log_error(traceback.format_exc())
error_response = pb_utils.InferenceResponse(
output_tensors=[],
error=pb_utils.TritonError(traceback.format_exc()),
)

if self.decoupled:
response_sender.send(error_response)
response_sender.send(
flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
)
else:
responses.append(error_response)

if self.decoupled:
return None
else:
assert len(responses) == len(requests)
return responses

def finalize(self):
self.logger.log_info("Cleaning up...")
26 changes: 26 additions & 0 deletions qa/L0_backend_python/io/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,32 @@ done
kill $SERVER_PID
wait $SERVER_PID

# IOTest.test_requested_output_decoupled_prior_crash
rm -rf models && mkdir models
mkdir -p models/llm/1/
cp requested_output_model/config.pbtxt models/llm/
cp requested_output_model/model.py models/llm/1/

run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
cat $SERVER_LOG
RET=1
fi

SUBTEST="test_requested_output_decoupled_prior_crash"
set +e
python3 -m pytest --junitxml=${SUBTEST}.report.xml ${UNITTEST_PY}::IOTest::${SUBTEST} > ${CLIENT_LOG}.${SUBTEST}
if [ $? -ne 0 ]; then
echo -e "\n***\n*** IOTest.${SUBTEST} FAILED. \n***"
cat $CLIENT_LOG.${SUBTEST}
RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
echo -e "\n***\n*** IO test PASSED.\n***"
else
Expand Down

0 comments on commit 17c88a8

Please sign in to comment.