Skip to content

Commit

Permalink
Add L0_generative_sequence test (#6475)
Browse files Browse the repository at this point in the history
* Add testing backend and test

* Add test to build / CI. Minor fix on L0_http

* Format. Update backend documentation

* Fix up

* Address comment

* Add negative testing

* Fix up
  • Loading branch information
GuanLuo authored Nov 1, 2023
1 parent 638fcf9 commit f49493f
Show file tree
Hide file tree
Showing 11 changed files with 1,088 additions and 2 deletions.
2 changes: 2 additions & 0 deletions Dockerfile.QA
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,8 @@ RUN mkdir -p qa/common && \
mkdir qa/L0_query/models/query/1 && \
cp tritonbuild/tritonserver/backends/query/libtriton_query.so qa/L0_query/models/query/1/. && \
cp bin/query_test qa/L0_query/. && \
mkdir qa/L0_generative_sequence/models/generative_sequence/1 && \
cp tritonbuild/tritonserver/backends/generative_sequence/libtriton_generative_sequence.so qa/L0_generative_sequence/models/generative_sequence/1/. && \
cp bin/register_api_test qa/L0_register/. && \
cp bin/async_work_queue_test qa/L0_async_work_queue/. && \
cp tritonbuild/tritonserver/backends/implicit_state/libtriton_implicit_state.so \
Expand Down
1 change: 1 addition & 0 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -2037,6 +2037,7 @@ def cibase_build(
"sequence",
"dyna_sequence",
"distributed_addsub",
"generative_sequence",
):
be_install_dir = os.path.join(repo_install_dir, "backends", be)
if target_platform() == "windows":
Expand Down
175 changes: 175 additions & 0 deletions qa/L0_generative_sequence/generative_sequence_e2e.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
#!/usr/bin/env python
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import json

# GRPC streaming helpers..
import queue
import unittest
from functools import partial

import numpy as np
import requests
import sseclient
import test_util as tu
import tritonclient.grpc as grpcclient
from tritonclient.utils import InferenceServerException

MODEL_CONFIG_BASE = """
{{
"backend": "generative_sequence",
"max_batch_size": 4,
"input" : [
{{
"name": "INPUT",
"data_type": "TYPE_INT32",
"dims": [ 1 ]
}}
],
"output" : [
{{
"name": "OUTPUT",
"data_type": "TYPE_INT32",
"dims": [ 1 ]
}}
],
"model_transaction_policy" : {{
"decoupled": true
}},
{},
"instance_group" : [{{ "kind": "KIND_CPU" }}]
}}
"""


class UserData:
def __init__(self):
self._completed_requests = queue.Queue()


def callback(user_data, result, error):
if error:
user_data._completed_requests.put(error)
else:
user_data._completed_requests.put(result)


class GenerativeSequenceTest(tu.TestResultCollector):
def setUp(self):
# Always make sure the original config is used
with grpcclient.InferenceServerClient("localhost:8001") as triton_client:
triton_client.load_model("generative_sequence")

def test_generate_stream(self):
headers = {"Accept": "text/event-stream"}
url = "http://localhost:8000/v2/models/generative_sequence/generate_stream"
inputs = {"INPUT": 2}
res = requests.post(url, data=json.dumps(inputs), headers=headers)
res.raise_for_status()
client = sseclient.SSEClient(res)
res_count = 2
for event in client.events():
res_count -= 1
data = json.loads(event.data)
self.assertIn("OUTPUT", data)
self.assertEqual(res_count, data["OUTPUT"])
self.assertEqual(0, res_count)

def test_grpc_stream(self, sequence_id=0, sequence_start=False):
user_data = UserData()
with grpcclient.InferenceServerClient("localhost:8001") as triton_client:
triton_client.start_stream(callback=partial(callback, user_data))
inputs = []
inputs.append(grpcclient.InferInput("INPUT", [1, 1], "INT32"))
inputs[0].set_data_from_numpy(np.array([[2]], dtype=np.int32))

triton_client.async_stream_infer(
model_name="generative_sequence",
inputs=inputs,
sequence_id=sequence_id,
sequence_start=sequence_start,
)
res_count = 2
while res_count > 0:
data_item = user_data._completed_requests.get()
res_count -= 1
if type(data_item) == InferenceServerException:
raise data_item
else:
self.assertEqual(res_count, data_item.as_numpy("OUTPUT")[0][0])
self.assertEqual(0, res_count)

def test_unsupported_sequence_scheduler(self):
# Override model config with scheduler settings that do not support
# request rescheduling.
configs = [
r'"sequence_batching" : { "direct" : {}, "generative_sequence" : false }',
r'"sequence_batching" : { "oldest" : {}, "generative_sequence" : false }',
]
sid = 1
for sc in configs:
with grpcclient.InferenceServerClient("localhost:8001") as triton_client:
triton_client.load_model(
"generative_sequence", config=MODEL_CONFIG_BASE.format(sc)
)
with self.assertRaises(InferenceServerException) as context:
# Without specifying 'generative_sequence : true', the sequence
# batcher expects sequence parameters to be provided explicitly
self.test_grpc_stream(sequence_id=sid, sequence_start=True)
sid += 1
print(str(context.exception))
self.assertTrue(
"Request is released with TRITONSERVER_REQUEST_RELEASE_RESCHEDULE"
in str(context.exception)
)

def test_unsupported_dynamic_scheduler(self):
# Override model config with scheduler settings that do not support
# request rescheduling.
configs = [
r'"dynamic_batching" : {}',
]
for sc in configs:
with grpcclient.InferenceServerClient("localhost:8001") as triton_client:
triton_client.load_model(
"generative_sequence", config=MODEL_CONFIG_BASE.format(sc)
)
with self.assertRaises(InferenceServerException) as context:
self.test_grpc_stream()
print(str(context.exception))
self.assertTrue(
"Request is released with TRITONSERVER_REQUEST_RELEASE_RESCHEDULE"
in str(context.exception)
)


if __name__ == "__main__":
unittest.main()
48 changes: 48 additions & 0 deletions qa/L0_generative_sequence/models/generative_sequence/config.pbtxt
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
backend: "generative_sequence"
max_batch_size: 4
input [
{
name: "INPUT"
data_type: TYPE_INT32
dims: [ 1 ]
}
]
output [
{
name: "OUTPUT"
data_type: TYPE_INT32
dims: [ 1 ]
}
]
model_transaction_policy {
decoupled: True
}
sequence_batching {
generative_sequence : true
}
instance_group [{ kind: KIND_CPU }]
92 changes: 92 additions & 0 deletions qa/L0_generative_sequence/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#!/bin/bash
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
echo -e "Repository version must be specified"
echo -e "\n***\n*** Test Failed\n***"
exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

source ../common/util.sh

RET=0

CLIENT_LOG="./generative_sequence_client.log"
TEST_PY=./generative_sequence_e2e.py
EXPECTED_NUM_TESTS="4"
TEST_RESULT_FILE='test_results.txt'


export CUDA_VISIBLE_DEVICES=0

rm -fr *.log

pip install sseclient-py

SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=EXPLICIT"
SERVER_LOG="./inference_server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
cat $SERVER_LOG
exit 1
fi

set +e
python $TEST_PY >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
RET=1
else
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** Test Result Verification Failed\n***"
RET=1
fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
echo -e "\n***\n*** Test Passed\n***"
else
cat $CLIENT_LOG
cat $SERVER_LOG
echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET
2 changes: 1 addition & 1 deletion qa/L0_http/generate_endpoint_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def check_sse_responses(self, res, expected_res):
self.assertIn(key, data)
self.assertEqual(value, data[key])
res_count += 1
self.assertTrue(len(expected_res), res_count)
self.assertEqual(len(expected_res), res_count)
# Make sure there is no message in the wrong form
for remaining in client._read():
self.assertTrue(
Expand Down
3 changes: 2 additions & 1 deletion src/test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -101,6 +101,7 @@ add_subdirectory(repoagent/relocation_repoagent repoagent/relocation_repoagent)

add_subdirectory(distributed_addsub distributed_addsub)
add_subdirectory(dyna_sequence dyna_sequence)
add_subdirectory(generative_sequence generative_sequence)
add_subdirectory(implicit_state implicit_state)
add_subdirectory(query_backend query_backend)

Expand Down
Loading

0 comments on commit f49493f

Please sign in to comment.