Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
 into oandreeva_vllm_ci
  • Loading branch information
dyastremsky committed Oct 6, 2023
2 parents 59b4de9 + 2bf543b commit fc8cd18
Show file tree
Hide file tree
Showing 15 changed files with 2,051 additions and 295 deletions.
33 changes: 33 additions & 0 deletions qa/L0_backend_python/decoupled/decoupled_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,39 @@ def test_decoupled_send_after_close_error(self):
"The completed request size must be zero.",
)

def test_decoupled_execute_cancel(self):
model_name = "execute_cancel"
log_path = "decoupled_server.log"
execute_delay = 4.0 # seconds
shape = [1, 1]

user_data = UserData()
with grpcclient.InferenceServerClient("localhost:8001") as client:
client.start_stream(callback=partial(callback, user_data))
input_data = np.array([[execute_delay]], dtype=np.float32)
inputs = [
grpcclient.InferInput(
"EXECUTE_DELAY", shape, np_to_triton_dtype(input_data.dtype)
)
]
inputs[0].set_data_from_numpy(input_data)
client.async_stream_infer(model_name, inputs)
time.sleep(2) # model delay for decoupling request and response sender
time.sleep(2) # ensure the request is executing
client.stop_stream(cancel_requests=True)
time.sleep(2) # ensure the cancellation is delivered

self.assertFalse(user_data._completed_requests.empty())
while not user_data._completed_requests.empty():
data_item = user_data._completed_requests.get()
self.assertIsInstance(data_item, InferenceServerException)
self.assertEqual(data_item.status(), "StatusCode.CANCELLED")

with open(log_path, mode="r", encoding="utf-8", errors="strict") as f:
log_text = f.read()
self.assertIn("[execute_cancel] Request not cancelled at 1.0 s", log_text)
self.assertIn("[execute_cancel] Request cancelled at ", log_text)


if __name__ == "__main__":
unittest.main()
7 changes: 6 additions & 1 deletion qa/L0_backend_python/decoupled/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

CLIENT_PY=./decoupled_test.py
CLIENT_LOG="./decoupled_client.log"
EXPECTED_NUM_TESTS="5"
EXPECTED_NUM_TESTS="6"
TEST_RESULT_FILE='test_results.txt'
TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
SERVER=${TRITON_DIR}/bin/tritonserver
Expand All @@ -50,6 +50,11 @@ mkdir -p models/dlpack_add_sub/1/
cp ../../python_models/dlpack_add_sub/model.py models/dlpack_add_sub/1/
cp ../../python_models/dlpack_add_sub/config.pbtxt models/dlpack_add_sub/

mkdir -p models/execute_cancel/1/
cp ../../python_models/execute_cancel/model.py ./models/execute_cancel/1/
cp ../../python_models/execute_cancel/config.pbtxt ./models/execute_cancel/
echo "model_transaction_policy { decoupled: True }" >> ./models/execute_cancel/config.pbtxt

git clone https://github.com/triton-inference-server/python_backend -b $PYTHON_BACKEND_REPO_TAG
mkdir -p models/square_int32/1/
cp python_backend/examples/decoupled/square_model.py models/square_int32/1/model.py
Expand Down
38 changes: 38 additions & 0 deletions qa/L0_backend_python/lifecycle/lifecycle_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
sys.path.append("../../common")

import queue
import time
import unittest
from functools import partial

Expand Down Expand Up @@ -70,6 +71,7 @@ def test_error_code(self):
("UNAVAILABLE", "[StatusCode.UNAVAILABLE]"),
("UNSUPPORTED", "[StatusCode.UNIMPLEMENTED]"),
("ALREADY_EXISTS", "[StatusCode.ALREADY_EXISTS]"),
("CANCELLED", "[StatusCode.CANCELLED]"),
("(default)", "[StatusCode.INTERNAL] unrecognized"),
]
with self._shm_leak_detector.Probe() as shm_probe:
Expand All @@ -91,6 +93,42 @@ def test_error_code(self):
expected_grpc_error_start + " error code: " + error,
)

def test_execute_cancel(self):
model_name = "execute_cancel"
log_path = "lifecycle_server.log"
execute_delay = 4.0 # seconds
shape = [1, 1]
response = {"responded": False, "result": None, "error": None}

def callback(result, error):
response["responded"] = True
response["result"] = result
response["error"] = error

with self._shm_leak_detector.Probe() as shm_probe:
with grpcclient.InferenceServerClient("localhost:8001") as client:
input_data = np.array([[execute_delay]], dtype=np.float32)
inputs = [
grpcclient.InferInput(
"EXECUTE_DELAY", shape, np_to_triton_dtype(input_data.dtype)
)
]
inputs[0].set_data_from_numpy(input_data)
exec_future = client.async_infer(model_name, inputs, callback)
time.sleep(2) # ensure the request is executing
self.assertFalse(response["responded"])
exec_future.cancel()
time.sleep(2) # ensure the cancellation is delivered
self.assertTrue(response["responded"])

self.assertEqual(response["result"], None)
self.assertIsInstance(response["error"], InferenceServerException)
self.assertEqual(response["error"].status(), "StatusCode.CANCELLED")
with open(log_path, mode="r", encoding="utf-8", errors="strict") as f:
log_text = f.read()
self.assertIn("[execute_cancel] Request not cancelled at 1.0 s", log_text)
self.assertIn("[execute_cancel] Request cancelled at ", log_text)

def test_batch_error(self):
# The execute_error model returns an error for the first and third
# request and successfully processes the second request. This is making
Expand Down
6 changes: 5 additions & 1 deletion qa/L0_backend_python/lifecycle/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

CLIENT_LOG="./lifecycle_client.log"
EXPECTED_NUM_TESTS="4"
EXPECTED_NUM_TESTS="5"
TEST_RESULT_FILE='test_results.txt'
source ../common.sh
source ../../common/util.sh
Expand All @@ -44,6 +44,10 @@ mkdir -p models/error_code/1/
cp ../../python_models/error_code/model.py ./models/error_code/1/
cp ../../python_models/error_code/config.pbtxt ./models/error_code/

mkdir -p models/execute_cancel/1/
cp ../../python_models/execute_cancel/model.py ./models/execute_cancel/1/
cp ../../python_models/execute_cancel/config.pbtxt ./models/execute_cancel/

mkdir -p models/execute_error/1/
cp ../../python_models/execute_error/model.py ./models/execute_error/1/
cp ../../python_models/execute_error/config.pbtxt ./models/execute_error/
Expand Down
Loading

0 comments on commit fc8cd18

Please sign in to comment.