Merge branch 'main' of https://github.com/triton-inference-server/server

into oandreeva_vllm_ci
triton-inference-server · Oct 6, 2023 · fc8cd18 · fc8cd18
2 parents 59b4de9 + 2bf543b
commit fc8cd18
Show file tree

Hide file tree

Showing 15 changed files with 2,051 additions and 295 deletions.
diff --git a/qa/L0_backend_python/decoupled/decoupled_test.py b/qa/L0_backend_python/decoupled/decoupled_test.py
@@ -256,6 +256,39 @@ def test_decoupled_send_after_close_error(self):
                 "The completed request size must be zero.",
             )
 
+    def test_decoupled_execute_cancel(self):
+        model_name = "execute_cancel"
+        log_path = "decoupled_server.log"
+        execute_delay = 4.0  # seconds
+        shape = [1, 1]
+
+        user_data = UserData()
+        with grpcclient.InferenceServerClient("localhost:8001") as client:
+            client.start_stream(callback=partial(callback, user_data))
+            input_data = np.array([[execute_delay]], dtype=np.float32)
+            inputs = [
+                grpcclient.InferInput(
+                    "EXECUTE_DELAY", shape, np_to_triton_dtype(input_data.dtype)
+                )
+            ]
+            inputs[0].set_data_from_numpy(input_data)
+            client.async_stream_infer(model_name, inputs)
+            time.sleep(2)  # model delay for decoupling request and response sender
+            time.sleep(2)  # ensure the request is executing
+            client.stop_stream(cancel_requests=True)
+            time.sleep(2)  # ensure the cancellation is delivered
+
+        self.assertFalse(user_data._completed_requests.empty())
+        while not user_data._completed_requests.empty():
+            data_item = user_data._completed_requests.get()
+            self.assertIsInstance(data_item, InferenceServerException)
+            self.assertEqual(data_item.status(), "StatusCode.CANCELLED")
+
+        with open(log_path, mode="r", encoding="utf-8", errors="strict") as f:
+            log_text = f.read()
+        self.assertIn("[execute_cancel] Request not cancelled at 1.0 s", log_text)
+        self.assertIn("[execute_cancel] Request cancelled at ", log_text)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/qa/L0_backend_python/decoupled/test.sh b/qa/L0_backend_python/decoupled/test.sh
@@ -27,7 +27,7 @@
 
 CLIENT_PY=./decoupled_test.py
 CLIENT_LOG="./decoupled_client.log"
-EXPECTED_NUM_TESTS="5"
+EXPECTED_NUM_TESTS="6"
 TEST_RESULT_FILE='test_results.txt'
 TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
 SERVER=${TRITON_DIR}/bin/tritonserver
@@ -50,6 +50,11 @@ mkdir -p models/dlpack_add_sub/1/
 cp ../../python_models/dlpack_add_sub/model.py models/dlpack_add_sub/1/
 cp ../../python_models/dlpack_add_sub/config.pbtxt models/dlpack_add_sub/
 
+mkdir -p models/execute_cancel/1/
+cp ../../python_models/execute_cancel/model.py ./models/execute_cancel/1/
+cp ../../python_models/execute_cancel/config.pbtxt ./models/execute_cancel/
+echo "model_transaction_policy { decoupled: True }" >> ./models/execute_cancel/config.pbtxt
+
 git clone https://github.com/triton-inference-server/python_backend -b $PYTHON_BACKEND_REPO_TAG
 mkdir -p models/square_int32/1/
 cp python_backend/examples/decoupled/square_model.py models/square_int32/1/model.py

diff --git a/qa/L0_backend_python/lifecycle/lifecycle_test.py b/qa/L0_backend_python/lifecycle/lifecycle_test.py
@@ -31,6 +31,7 @@
 sys.path.append("../../common")
 
 import queue
+import time
 import unittest
 from functools import partial
 
@@ -70,6 +71,7 @@ def test_error_code(self):
             ("UNAVAILABLE", "[StatusCode.UNAVAILABLE]"),
             ("UNSUPPORTED", "[StatusCode.UNIMPLEMENTED]"),
             ("ALREADY_EXISTS", "[StatusCode.ALREADY_EXISTS]"),
+            ("CANCELLED", "[StatusCode.CANCELLED]"),
             ("(default)", "[StatusCode.INTERNAL] unrecognized"),
         ]
         with self._shm_leak_detector.Probe() as shm_probe:
@@ -91,6 +93,42 @@ def test_error_code(self):
                         expected_grpc_error_start + " error code: " + error,
                     )
 
+    def test_execute_cancel(self):
+        model_name = "execute_cancel"
+        log_path = "lifecycle_server.log"
+        execute_delay = 4.0  # seconds
+        shape = [1, 1]
+        response = {"responded": False, "result": None, "error": None}
+
+        def callback(result, error):
+            response["responded"] = True
+            response["result"] = result
+            response["error"] = error
+
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with grpcclient.InferenceServerClient("localhost:8001") as client:
+                input_data = np.array([[execute_delay]], dtype=np.float32)
+                inputs = [
+                    grpcclient.InferInput(
+                        "EXECUTE_DELAY", shape, np_to_triton_dtype(input_data.dtype)
+                    )
+                ]
+                inputs[0].set_data_from_numpy(input_data)
+                exec_future = client.async_infer(model_name, inputs, callback)
+                time.sleep(2)  # ensure the request is executing
+                self.assertFalse(response["responded"])
+                exec_future.cancel()
+                time.sleep(2)  # ensure the cancellation is delivered
+                self.assertTrue(response["responded"])
+
+        self.assertEqual(response["result"], None)
+        self.assertIsInstance(response["error"], InferenceServerException)
+        self.assertEqual(response["error"].status(), "StatusCode.CANCELLED")
+        with open(log_path, mode="r", encoding="utf-8", errors="strict") as f:
+            log_text = f.read()
+            self.assertIn("[execute_cancel] Request not cancelled at 1.0 s", log_text)
+            self.assertIn("[execute_cancel] Request cancelled at ", log_text)
+
     def test_batch_error(self):
         # The execute_error model returns an error for the first and third
         # request and successfully processes the second request. This is making

diff --git a/qa/L0_backend_python/lifecycle/test.sh b/qa/L0_backend_python/lifecycle/test.sh
@@ -26,7 +26,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 CLIENT_LOG="./lifecycle_client.log"
-EXPECTED_NUM_TESTS="4"
+EXPECTED_NUM_TESTS="5"
 TEST_RESULT_FILE='test_results.txt'
 source ../common.sh
 source ../../common/util.sh
@@ -44,6 +44,10 @@ mkdir -p models/error_code/1/
 cp ../../python_models/error_code/model.py ./models/error_code/1/
 cp ../../python_models/error_code/config.pbtxt ./models/error_code/
 
+mkdir -p models/execute_cancel/1/
+cp ../../python_models/execute_cancel/model.py ./models/execute_cancel/1/
+cp ../../python_models/execute_cancel/config.pbtxt ./models/execute_cancel/
+
 mkdir -p models/execute_error/1/
 cp ../../python_models/execute_error/model.py ./models/execute_error/1/
 cp ../../python_models/execute_error/config.pbtxt ./models/execute_error/