Skip to content

Commit

Permalink
Added new flag for GPU peer access API control (#7261)
Browse files Browse the repository at this point in the history
Co-authored-by: Iman Tabrizian <[email protected]>
  • Loading branch information
indrajit96 and Tabrizian authored Jun 3, 2024
1 parent 0df1fe7 commit 99a3f44
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 1 deletion.
28 changes: 28 additions & 0 deletions qa/L0_metrics/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,33 @@ kill $SERVER_PID
wait $SERVER_PID
set -e

# Peer access GPU memory utilization Test
# Custom Pinned memory pool size
export CUSTOM_PINNED_MEMORY_POOL_SIZE=0 # bytes
export CUDA_VISIBLE_DEVICES=0
SERVER_LOG="gpu_peer_memory_test_server.log"
CLIENT_LOG="gpu_peer_memory_test_client.log"

SERVER_ARGS="$BASE_SERVER_ARGS --model-control-mode=explicit --log-verbose=1 --pinned-memory-pool-byte-size=$CUSTOM_PINNED_MEMORY_POOL_SIZE --enable-peer-access=FALSE --cuda-memory-pool-byte-size 0:0 --log-verbose=1"
run_and_check_server
#grep usage stats for triton server from nvidia-smi
memory_size_without_peering=$(nvidia-smi --query-compute-apps=pid,process_name,used_memory --format=csv,noheader,nounits | grep $(pgrep tritonserver) | awk '{print $3}')

#nvidia-smi only lists process which use gpu memory with --enable-peer-access=FALSE nvidia-smi may not list tritonserver
if [ -z $memory_size_without_peering ]; then
memory_size_without_peering=0
fi

kill $SERVER_PID
wait $SERVER_PID

# Check if memory usage HAS reduced to 0 after using the --enable-peer-access flag
if [ $memory_size_without_peering -ne 0 ]; then
# Print the memory usage for each GPU
echo "Disabling PEERING does not reduce GPU memory usage to ZERO"
echo -e "\n***\n*** GPU Peer enable failed. \n***"
RET=1
fi

### GPU Metrics
set +e
Expand Down Expand Up @@ -411,3 +438,4 @@ else
fi

exit $RET

1 change: 1 addition & 0 deletions qa/L0_trace/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,7 @@ SERVER_ARGS="--allow-sagemaker=true --model-control-mode=explicit \
--load-model=simple --load-model=ensemble_add_sub_int32_int32_int32 \
--load-model=repeat_int32 \
--load-model=input_all_required \
--load-model=dynamic_batch \
--load-model=bls_simple --trace-config=level=TIMESTAMPS \
--load-model=trace_context --trace-config=rate=1 \
--trace-config=count=-1 --trace-config=mode=opentelemetry \
Expand Down
18 changes: 17 additions & 1 deletion src/command_line_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,8 @@ enum TritonOptionId {
OPTION_BACKEND_CONFIG,
OPTION_HOST_POLICY,
OPTION_MODEL_LOAD_GPU_LIMIT,
OPTION_MODEL_NAMESPACING
OPTION_MODEL_NAMESPACING,
OPTION_ENABLE_PEER_ACCESS
};

void
Expand Down Expand Up @@ -461,6 +462,13 @@ TritonParser::SetupOptions()
{OPTION_MODEL_NAMESPACING, "model-namespacing", Option::ArgBool,
"Whether model namespacing is enable or not. If true, models with the "
"same name can be served if they are in different namespace."});
model_repo_options_.push_back(
{OPTION_ENABLE_PEER_ACCESS, "enable-peer-access", Option::ArgBool,
"Whether the server tries to enable peer access or not. Even when this "
"options is set to true, "
"peer access could still be not enabled because the underlying system "
"doesn't support it."
" The server will log a warning in this case. Default is true."});

#if defined(TRITON_ENABLE_HTTP)
http_options_.push_back(
Expand Down Expand Up @@ -1100,6 +1108,11 @@ TritonServerParameters::BuildTritonServerOptions()
TRITONSERVER_ServerOptionsSetModelNamespacing(
loptions, enable_model_namespacing_),
"setting model namespacing");
THROW_IF_ERR(
ParseException,
TRITONSERVER_ServerOptionsSetEnablePeerAccess(
loptions, enable_peer_access_),
"setting peer access");

#ifdef TRITON_ENABLE_LOGGING
TRITONSERVER_ServerOptionsSetLogFile(loptions, log_file_.c_str());
Expand Down Expand Up @@ -1722,6 +1735,9 @@ TritonParser::Parse(int argc, char** argv)
case OPTION_MODEL_NAMESPACING:
lparams.enable_model_namespacing_ = ParseOption<bool>(optarg);
break;
case OPTION_ENABLE_PEER_ACCESS:
lparams.enable_peer_access_ = ParseOption<bool>(optarg);
break;
}
}
catch (const ParseException& pe) {
Expand Down
1 change: 1 addition & 0 deletions src/command_line_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ struct TritonServerParameters {

// Model repository manager configuration
bool enable_model_namespacing_{false};
bool enable_peer_access_{true};
std::set<std::string> model_repository_paths_{};
TRITONSERVER_ModelControlMode control_mode_{TRITONSERVER_MODEL_CONTROL_NONE};
std::set<std::string> startup_models_{};
Expand Down

0 comments on commit 99a3f44

Please sign in to comment.