From 99a3f44077d7f8f3934a08054c63c506ddfd1f32 Mon Sep 17 00:00:00 2001 From: Indrajit Bhosale Date: Mon, 3 Jun 2024 09:08:07 -0700 Subject: [PATCH] Added new flag for GPU peer access API control (#7261) Co-authored-by: Iman Tabrizian --- qa/L0_metrics/test.sh | 28 ++++++++++++++++++++++++++++ qa/L0_trace/test.sh | 1 + src/command_line_parser.cc | 18 +++++++++++++++++- src/command_line_parser.h | 1 + 4 files changed, 47 insertions(+), 1 deletion(-) diff --git a/qa/L0_metrics/test.sh b/qa/L0_metrics/test.sh index 61d8bbea3b..4b244f1ba2 100755 --- a/qa/L0_metrics/test.sh +++ b/qa/L0_metrics/test.sh @@ -140,6 +140,33 @@ kill $SERVER_PID wait $SERVER_PID set -e +# Peer access GPU memory utilization Test +# Custom Pinned memory pool size +export CUSTOM_PINNED_MEMORY_POOL_SIZE=0 # bytes +export CUDA_VISIBLE_DEVICES=0 +SERVER_LOG="gpu_peer_memory_test_server.log" +CLIENT_LOG="gpu_peer_memory_test_client.log" + +SERVER_ARGS="$BASE_SERVER_ARGS --model-control-mode=explicit --log-verbose=1 --pinned-memory-pool-byte-size=$CUSTOM_PINNED_MEMORY_POOL_SIZE --enable-peer-access=FALSE --cuda-memory-pool-byte-size 0:0 --log-verbose=1" +run_and_check_server +#grep usage stats for triton server from nvidia-smi +memory_size_without_peering=$(nvidia-smi --query-compute-apps=pid,process_name,used_memory --format=csv,noheader,nounits | grep $(pgrep tritonserver) | awk '{print $3}') + +#nvidia-smi only lists process which use gpu memory with --enable-peer-access=FALSE nvidia-smi may not list tritonserver +if [ -z $memory_size_without_peering ]; then + memory_size_without_peering=0 +fi + +kill $SERVER_PID +wait $SERVER_PID + +# Check if memory usage HAS reduced to 0 after using the --enable-peer-access flag +if [ $memory_size_without_peering -ne 0 ]; then + # Print the memory usage for each GPU + echo "Disabling PEERING does not reduce GPU memory usage to ZERO" + echo -e "\n***\n*** GPU Peer enable failed. \n***" + RET=1 +fi ### GPU Metrics set +e @@ -411,3 +438,4 @@ else fi exit $RET + diff --git a/qa/L0_trace/test.sh b/qa/L0_trace/test.sh index 8a9172b02f..7d67afb3ba 100755 --- a/qa/L0_trace/test.sh +++ b/qa/L0_trace/test.sh @@ -777,6 +777,7 @@ SERVER_ARGS="--allow-sagemaker=true --model-control-mode=explicit \ --load-model=simple --load-model=ensemble_add_sub_int32_int32_int32 \ --load-model=repeat_int32 \ --load-model=input_all_required \ + --load-model=dynamic_batch \ --load-model=bls_simple --trace-config=level=TIMESTAMPS \ --load-model=trace_context --trace-config=rate=1 \ --trace-config=count=-1 --trace-config=mode=opentelemetry \ diff --git a/src/command_line_parser.cc b/src/command_line_parser.cc index 608d0bab03..53a103d33b 100644 --- a/src/command_line_parser.cc +++ b/src/command_line_parser.cc @@ -373,7 +373,8 @@ enum TritonOptionId { OPTION_BACKEND_CONFIG, OPTION_HOST_POLICY, OPTION_MODEL_LOAD_GPU_LIMIT, - OPTION_MODEL_NAMESPACING + OPTION_MODEL_NAMESPACING, + OPTION_ENABLE_PEER_ACCESS }; void @@ -461,6 +462,13 @@ TritonParser::SetupOptions() {OPTION_MODEL_NAMESPACING, "model-namespacing", Option::ArgBool, "Whether model namespacing is enable or not. If true, models with the " "same name can be served if they are in different namespace."}); + model_repo_options_.push_back( + {OPTION_ENABLE_PEER_ACCESS, "enable-peer-access", Option::ArgBool, + "Whether the server tries to enable peer access or not. Even when this " + "options is set to true, " + "peer access could still be not enabled because the underlying system " + "doesn't support it." + " The server will log a warning in this case. Default is true."}); #if defined(TRITON_ENABLE_HTTP) http_options_.push_back( @@ -1100,6 +1108,11 @@ TritonServerParameters::BuildTritonServerOptions() TRITONSERVER_ServerOptionsSetModelNamespacing( loptions, enable_model_namespacing_), "setting model namespacing"); + THROW_IF_ERR( + ParseException, + TRITONSERVER_ServerOptionsSetEnablePeerAccess( + loptions, enable_peer_access_), + "setting peer access"); #ifdef TRITON_ENABLE_LOGGING TRITONSERVER_ServerOptionsSetLogFile(loptions, log_file_.c_str()); @@ -1722,6 +1735,9 @@ TritonParser::Parse(int argc, char** argv) case OPTION_MODEL_NAMESPACING: lparams.enable_model_namespacing_ = ParseOption(optarg); break; + case OPTION_ENABLE_PEER_ACCESS: + lparams.enable_peer_access_ = ParseOption(optarg); + break; } } catch (const ParseException& pe) { diff --git a/src/command_line_parser.h b/src/command_line_parser.h index bf0cb72d3e..762ee87b6d 100644 --- a/src/command_line_parser.h +++ b/src/command_line_parser.h @@ -125,6 +125,7 @@ struct TritonServerParameters { // Model repository manager configuration bool enable_model_namespacing_{false}; + bool enable_peer_access_{true}; std::set model_repository_paths_{}; TRITONSERVER_ModelControlMode control_mode_{TRITONSERVER_MODEL_CONTROL_NONE}; std::set startup_models_{};