From 5732163a234127ad97833b4f65e14676443f1b1e Mon Sep 17 00:00:00 2001 From: Kris Hung Date: Wed, 21 Feb 2024 17:29:15 -0800 Subject: [PATCH] Fix busyop test for L0_memory_growth (#6900) * Switch to Python model for busyop test * Clean up * Address comment * Remove unused import --- qa/L0_memory_growth/busy_op_test.py | 52 +++++++++------------------ qa/L0_memory_growth/test.sh | 21 ++++++----- qa/python_models/busy_op/config.pbtxt | 52 +++++++++++++++++++++++++++ qa/python_models/busy_op/model.py | 49 +++++++++++++++++++++++++ 4 files changed, 128 insertions(+), 46 deletions(-) create mode 100644 qa/python_models/busy_op/config.pbtxt create mode 100644 qa/python_models/busy_op/model.py diff --git a/qa/L0_memory_growth/busy_op_test.py b/qa/L0_memory_growth/busy_op_test.py index 2814f38d8c..b7916090fa 100755 --- a/qa/L0_memory_growth/busy_op_test.py +++ b/qa/L0_memory_growth/busy_op_test.py @@ -1,6 +1,6 @@ #!/usr/bin/python -# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -30,9 +30,8 @@ from builtins import range import numpy as np -import tritongrpcclient as grpcclient -import tritonhttpclient as httpclient -from tritonclientutils import np_to_triton_dtype +import tritonclient.http as httpclient +from tritonclient.utils import np_to_triton_dtype FLAGS = None @@ -54,15 +53,6 @@ default="localhost:8000", help="Inference server URL. Default is localhost:8000.", ) - parser.add_argument( - "-i", - "--protocol", - type=str, - required=False, - default="http", - help='Protocol ("http"/"grpc") used to ' - + 'communicate with inference service. Default is "http".', - ) parser.add_argument("-m", "--model", type=str, required=True, help="Name of model.") parser.add_argument( "-n", @@ -71,38 +61,30 @@ required=True, help="Number of asynchronous requests to launch.", ) - parser.add_argument( - "-d", - "--delay", - type=int, - required=True, - help="Number of delay cycles to use as input to model.", - ) FLAGS = parser.parse_args() - if (FLAGS.protocol != "http") and (FLAGS.protocol != "grpc"): - print( - 'unexpected protocol "{}", expects "http" or "grpc"'.format(FLAGS.protocol) - ) - exit(1) - - client_util = httpclient if FLAGS.protocol == "http" else grpcclient # Run the busyop model which takes a delay as input. model_name = FLAGS.model - # Create the inference context for the model. - client = client_util.InferenceServerClient(FLAGS.url, verbose=FLAGS.verbose) + # Create the inference context for the model. Need to set the concurrency + # based on the number of requests so that the delivery of the async + # requests is not blocked. + # See the comment for more details: https://github.com/triton-inference-server/client/blob/r24.02/src/python/library/tritonclient/http/_client.py#L1501 + client = httpclient.InferenceServerClient( + FLAGS.url, verbose=FLAGS.verbose, concurrency=FLAGS.num_requests + ) # Collect async requests here requests = [] - # Create the data for the one input tensor - input_data = np.array([FLAGS.delay], dtype=np.int32) + # Create the data for the input tensor. Creating tensor size with 5 MB. + tensor_size = [1, 5 * 1024 * 1024] + input_data = np.random.randn(*tensor_size).astype(np.float32) inputs = [ - client_util.InferInput( - "in", input_data.shape, np_to_triton_dtype(input_data.dtype) + httpclient.InferInput( + "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype) ) ] inputs[0].set_data_from_numpy(input_data) @@ -110,8 +92,8 @@ # Send requests for i in range(FLAGS.num_requests): requests.append(client.async_infer(model_name, inputs)) - print("Sent request %d" % i) + print("Sent request %d" % i, flush=True) # wait for requests to finish for i in range(len(requests)): requests[i].get_result() - print("Received result %d" % i) + print("Received result %d" % i, flush=True) diff --git a/qa/L0_memory_growth/test.sh b/qa/L0_memory_growth/test.sh index 64277e6b6e..25f670f532 100755 --- a/qa/L0_memory_growth/test.sh +++ b/qa/L0_memory_growth/test.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -213,20 +213,18 @@ for MODEL in $(ls models); do set -e done -# Next perform a test that has unbound memory growth. Use the busy op model -# with a high delay in order to force requests to sit in the queue, and result +# Next perform a test that has unbound memory growth. Use the busy op Python model +# with a sleep function in order to force requests to sit in the queue, and result # in memory growth. BUSY_OP_TEST=busy_op_test.py -DELAY_CYCLES=2100000000 NUM_REQUESTS=100 rm -rf test_repo && mkdir test_repo -cp -r ${DATADIR}/qa_custom_ops/tf_custom_ops/graphdef_busyop test_repo/ +mkdir -p test_repo/busy_op/1/ +cp ../python_models/busy_op/model.py test_repo/busy_op/1/ +cp ../python_models/busy_op/config.pbtxt test_repo/busy_op -# Explicitly set library path so custom ops can find TF -LD_LIBRARY_PATH=/opt/tritonserver/backends/tensorflow:$LD_LIBRARY_PATH SERVER_ARGS="--model-repository=`pwd`/test_repo" -SERVER_LD_PRELOAD="${DATADIR}/qa_custom_ops/tf_custom_ops/libbusyop.so" LEAKCHECK_LOG="test_busyop.valgrind.log" MASSIF_LOG="test_busyop.massif" @@ -254,12 +252,12 @@ set +e # Run the busy_op test if no PTX issue was observed when launching server if [ $SKIP_BUSYOP -ne 1 ]; then SECONDS=0 - python $BUSY_OP_TEST -v -m graphdef_busyop -d $DELAY_CYCLES -n $NUM_REQUESTS > $CLIENT_LOG 2>&1 + python $BUSY_OP_TEST -v -m busy_op -n $NUM_REQUESTS > $CLIENT_LOG 2>&1 TEST_RETCODE=$? TEST_DURATION=$SECONDS if [ ${TEST_RETCODE} -ne 0 ]; then cat $CLIENT_LOG - echo -e "\n***\n*** Test graphdef_busyop Failed\n***" + echo -e "\n***\n*** busy_op_test.py Failed\n***" RET=1 fi set -e @@ -291,12 +289,13 @@ if [ $SKIP_BUSYOP -ne 1 ]; then # Always output memory usage for easier triage of MAX_ALLOWED_ALLOC settings in the future grep -i "Change in memory allocation" "${CLIENT_LOG}" || true fi + set -e if [ $RET -eq 0 ]; then echo -e "\n***\n*** Test Passed\n***" else - echo -e "\n***\n*** Test FAILED\n***" + echo -e "\n***\n*** Test Failed\n***" fi # Run only if both TRITON_FROM and TRITON_TO_DL are set diff --git a/qa/python_models/busy_op/config.pbtxt b/qa/python_models/busy_op/config.pbtxt new file mode 100644 index 0000000000..27f9003ab7 --- /dev/null +++ b/qa/python_models/busy_op/config.pbtxt @@ -0,0 +1,52 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +name: "busy_op" +backend: "python" +max_batch_size: 1 + +input [ + { + name: "INPUT0" + data_type: TYPE_FP32 + dims: [ -1 ] + } +] + +output [ + { + name: "OUTPUT0" + data_type: TYPE_FP32 + dims: [ -1 ] + } +] + +instance_group [ + { + count: 1 + kind : KIND_CPU + } +] diff --git a/qa/python_models/busy_op/model.py b/qa/python_models/busy_op/model.py new file mode 100644 index 0000000000..a68343881b --- /dev/null +++ b/qa/python_models/busy_op/model.py @@ -0,0 +1,49 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import time + +import triton_python_backend_utils as pb_utils + + +class TritonPythonModel: + """ + This model calls sleep for the first request in order to force requests to + sit in the queue, and result in memory growth. + """ + + def initialize(self, args): + self.sleep = True + + def execute(self, requests): + if self.sleep: + time.sleep(50) + self.sleep = False + responses = [] + for request in requests: + input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0") + out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy()) + responses.append(pb_utils.InferenceResponse([out_tensor])) + return responses