triton-inference-server · Tabrizian · Jul 7, 2023 · Jul 3, 2023 · Jul 4, 2023 · Jul 4, 2023
diff --git a/docs/user_guide/architecture.md b/docs/user_guide/architecture.md
@@ -1,5 +1,5 @@
 <!--
-# Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -298,7 +298,8 @@ additional tensors that have to be transferred.
 
 Implicit state management requires backend support. Currently, only
 [onnxruntime_backend](https://github.com/triton-inference-server/onnxruntime_backend)
-and [tensorrt_backend](https://github.com/triton-inference-server/tensorrt_backend)
+[tensorrt_backend](https://github.com/triton-inference-server/tensorrt_backend),
+and [pytorch_backend](https://github.com/triton-inference-server/pytorch_backend)
 support implicit state.
 
 ##### State Initialization

diff --git a/qa/L0_implicit_state/implicit_state.py b/qa/L0_implicit_state/implicit_state.py
@@ -37,7 +37,7 @@
 import unittest
 import test_util as tu
 
-BACKENDS = os.environ.get('BACKENDS', "onnx plan")
+BACKENDS = os.environ.get('BACKENDS', "onnx plan libtorch")
 
 
 class ImplicitStateTest(tu.TestResultCollector):
@@ -112,14 +112,25 @@ def test_no_update(self):
 
     def test_request_output_not_allowed(self):
         triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
-        inputs = []
-        inputs.append(tritonhttpclient.InferInput('INPUT', [1], 'INT32'))
-        inputs[0].set_data_from_numpy(np.asarray([1], dtype=np.int32))
-
-        outputs = []
-        outputs.append(tritonhttpclient.InferRequestedOutput('OUTPUT_STATE'))
 
         for backend in BACKENDS.split(" "):
+            inputs = []
+            if backend.strip() == 'libtorch':
+                inputs.append(
+                    tritonhttpclient.InferInput('INPUT__0', [1], 'INT32'))
+            else:
+                inputs.append(tritonhttpclient.InferInput(
+                    'INPUT', [1], 'INT32'))
+            inputs[0].set_data_from_numpy(np.asarray([1], dtype=np.int32))
+
+            outputs = []
+            if backend.strip() == 'libtorch':
+                outputs.append(
+                    tritonhttpclient.InferRequestedOutput('OUTPUT_STATE__1'))
+            else:
+                outputs.append(
+                    tritonhttpclient.InferRequestedOutput('OUTPUT_STATE'))
+
             with self.assertRaises(InferenceServerException) as e:
                 triton_client.infer(
                     model_name=f"{backend}_nobatch_sequence_int32",
@@ -128,30 +139,48 @@ def test_request_output_not_allowed(self):
                     sequence_id=1,
                     sequence_start=True,
                     sequence_end=True)
-            self.assertIn(
-                "unexpected inference output 'OUTPUT_STATE' for model",
-                str(e.exception))
+            if backend.strip() == 'libtorch':
+                self.assertIn(
+                    "unexpected inference output 'OUTPUT_STATE__1' for model",
+                    str(e.exception))
+            else:
+                self.assertIn(
+                    "unexpected inference output 'OUTPUT_STATE' for model",
+                    str(e.exception))
 
     def test_request_output(self):
         triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
-        inputs = []
-        inputs.append(tritonhttpclient.InferInput('INPUT', [1], 'INT32'))
-        inputs[0].set_data_from_numpy(np.asarray([1], dtype=np.int32))
-
-        outputs = []
-        outputs.append(tritonhttpclient.InferRequestedOutput('OUTPUT_STATE'))
-        outputs.append(tritonhttpclient.InferRequestedOutput('OUTPUT'))
-
         for backend in BACKENDS.split(" "):
+            inputs = []
+            if backend.strip() == 'libtorch':
+                inputs.append(
+                    tritonhttpclient.InferInput('INPUT__0', [1], 'INT32'))
+            inputs[0].set_data_from_numpy(np.asarray([1], dtype=np.int32))
+
+            outputs = []
+            if backend.strip() == 'libtorch':
+                outputs.append(
+                    tritonhttpclient.InferRequestedOutput('OUTPUT_STATE__1'))
+                outputs.append(
+                    tritonhttpclient.InferRequestedOutput('OUTPUT__0'))
+            else:
+                outputs.append(
+                    tritonhttpclient.InferRequestedOutput('OUTPUT_STATE'))
+                outputs.append(tritonhttpclient.InferRequestedOutput('OUTPUT'))
+
             result = triton_client.infer(
                 model_name=f"{backend}_nobatch_sequence_int32_output",
                 inputs=inputs,
                 outputs=outputs,
                 sequence_id=1,
                 sequence_start=True,
                 sequence_end=True)
-            self.assertTrue(result.as_numpy('OUTPUT_STATE')[0], 1)
-            self.assertTrue(result.as_numpy('OUTPUT')[0], 1)
+            if backend.strip() == 'libtorch':
+                self.assertTrue(result.as_numpy('OUTPUT_STATE__1')[0], 1)
+                self.assertTrue(result.as_numpy('OUTPUT__0')[0], 1)
+            else:
+                self.assertTrue(result.as_numpy('OUTPUT_STATE')[0], 1)
+                self.assertTrue(result.as_numpy('OUTPUT')[0], 1)
 
 
 if __name__ == '__main__':

diff --git a/qa/L0_implicit_state/test.sh b/qa/L0_implicit_state/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -41,7 +41,7 @@ DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
 TEST_RESULT_FILE='test_results.txt'
 
 export ENSEMBLES=0
-BACKENDS=${BACKENDS:="onnx plan"}
+BACKENDS=${BACKENDS:="libtorch onnx plan"}
 export BACKENDS
 export IMPLICIT_STATE=1
 INITIAL_STATE_ZERO=${INITIAL_STATE_ZERO:="0"}
@@ -78,9 +78,15 @@ for BACKEND in $BACKENDS; do
     rm -rf models/$model_name_allow_output
     cp -r $DATADIR/qa_sequence_implicit_model_repository/$model_name models/$model_name_allow_output
 
-    (cd models/$model_name_allow_output && \
-        sed -i "s/^name:.*/name: \"$model_name_allow_output\"/" config.pbtxt && \
-        echo -e "output [{ name: \"OUTPUT_STATE\" \n data_type: TYPE_INT32 \n dims: [ 1 ] }]" >> config.pbtxt)
+    if [ $BACKEND == "libtorch" ]; then
+    	(cd models/$model_name_allow_output && \
+    	    sed -i "s/^name:.*/name: \"$model_name_allow_output\"/" config.pbtxt && \
+    	    echo -e "output [{ name: \"OUTPUT_STATE__1\" \n data_type: TYPE_INT32 \n dims: [ 1 ] }]" >> config.pbtxt)
+    else
+    	(cd models/$model_name_allow_output && \
+    	    sed -i "s/^name:.*/name: \"$model_name_allow_output\"/" config.pbtxt && \
+    	    echo -e "output [{ name: \"OUTPUT_STATE\" \n data_type: TYPE_INT32 \n dims: [ 1 ] }]" >> config.pbtxt)
+    fi
 done
 
 CLIENT_LOG=`pwd`/client.log

diff --git a/qa/L0_sequence_batcher/sequence_batcher_test.py b/qa/L0_sequence_batcher/sequence_batcher_test.py
@@ -68,7 +68,7 @@
 _trials = ()
 if NO_BATCHING:
     for backend in BACKENDS.split(' '):
-        if (backend != "libtorch") and (backend != 'custom'):
+        if (backend != 'custom'):
             _trials += (backend + "_nobatch",)
 elif os.environ['BATCHER_TYPE'] == "VARIABLE":
     for backend in BACKENDS.split(' '):
@@ -130,10 +130,13 @@ def get_datatype(self, trial):
         if ("graphdef" in trial):
             return (np.dtype(object), np.bool_)
 
-        # Only test the string data type for ONNX models in implicit state
+        # Only test the string data type for ONNX and libtorch models in implicit state
         if IMPLICIT_STATE:
             if ("onnx" in trial):
                 return (np.dtype(object), np.int32, np.bool_)
+            if NO_BATCHING:
+                if ("libtorch" in trial):
+                    return (np.dtype(object), np.int32, np.bool_)
 
         return (np.int32, np.bool_)
 
@@ -156,7 +159,7 @@ def get_expected_result_implicit(self,
                                      trial,
                                      flag_str=None,
                                      dtype=None):
-        if dtype == np.dtype(object):
+        if dtype == np.dtype(object) and trial.startswith('onnx'):
             return value
 
         if INITIAL_STATE_FILE:

diff --git a/qa/L0_sequence_batcher/test.sh b/qa/L0_sequence_batcher/test.sh
@@ -191,6 +191,9 @@ function get_datatype () {
     if [[ $1 == "onnx" ]]; then
         dtype="object int32 bool"
     fi
+    if [[ $1 == "libtorch" ]]; then
+        dtype="object int32 bool"
+    fi
   fi
   echo $dtype
 }
@@ -268,6 +271,10 @@ fi
 
 for MODEL in $MODELS; do
   if [[ ! "$TEST_VALGRIND" -eq 1 ]]; then
+    # Skip libtorch string models
+    if [[ "$MODEL" =~ .*"libtorch".*"object".* ]]; then
+        continue
+    fi
     if [[ "$MODEL" =~ .*"python".* ]]; then
       generate_python_models "$MODEL" "models1"
     else
@@ -278,6 +285,11 @@ for MODEL in $MODELS; do
         sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 1/" config.pbtxt && \
         sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 1/" config.pbtxt)
 
+    # Skip libtorch string models
+    if [[ "$MODEL" =~ .*"libtorch".*"object".* ]]; then
+        continue
+    fi
+
     if [[ "$MODEL" =~ .*"python".* ]]; then
       generate_python_models "$MODEL" "models2"
     else
@@ -443,6 +455,10 @@ for BACKEND in $BACKENDS; do
 done
 
 for MODEL in $MODELS; do
+  # Skip libtorch string models
+  if [[ "$MODEL" =~ .*"libtorch".*"object".* ]]; then
+      continue
+  fi
   if [[ "$MODEL" =~ .*"python".* ]]; then
       generate_python_models "$MODEL" "modelsv"
   else