Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added new flag for GPU peer access API control #7261

Merged
merged 11 commits into from
Jun 3, 2024
28 changes: 28 additions & 0 deletions qa/L0_metrics/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,33 @@ kill $SERVER_PID
wait $SERVER_PID
set -e

# Peer access GPU memory utilization Test
# Custom Pinned memory pool size
export CUSTOM_PINNED_MEMORY_POOL_SIZE=0 # bytes
export CUDA_VISIBLE_DEVICES=0
SERVER_LOG="gpu_peer_memory_test_server.log"
CLIENT_LOG="gpu_peer_memory_test_client.log"

SERVER_ARGS="$BASE_SERVER_ARGS --model-control-mode=explicit --log-verbose=1 --pinned-memory-pool-byte-size=$CUSTOM_PINNED_MEMORY_POOL_SIZE --enable-peer-access=FALSE --cuda-memory-pool-byte-size 0:0 --log-verbose=1"
run_and_check_server
#grep usage stats for triton server from nvidia-smi
memory_size_without_peering=$(nvidia-smi --query-compute-apps=pid,process_name,used_memory --format=csv,noheader,nounits | grep $(pgrep tritonserver) | awk '{print $3}')

#nvidia-smi only lists process which use gpu memory with --enable-peer-access=FALSE nvidia-smi may not list tritonserver
if [ -z $memory_size_without_peering ]; then
memory_size_without_peering=0
fi

kill $SERVER_PID
wait $SERVER_PID

# Check if memory usage HAS reduced to 0 after using the --enable-peer-access flag
if [ $memory_size_without_peering -ne 0 ]; then
# Print the memory usage for each GPU
echo "Disabling PEERING does not reduce GPU memory usage to ZERO"
echo -e "\n***\n*** GPU Peer enable failed. \n***"
RET=1
fi

### GPU Metrics
set +e
Expand Down Expand Up @@ -411,3 +438,4 @@ else
fi

exit $RET

1 change: 1 addition & 0 deletions qa/L0_trace/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,7 @@ SERVER_ARGS="--allow-sagemaker=true --model-control-mode=explicit \
--load-model=simple --load-model=ensemble_add_sub_int32_int32_int32 \
--load-model=repeat_int32 \
--load-model=input_all_required \
--load-model=dynamic_batch \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Were we missing this model before? Is it correctly added?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes this is a breakage cause by one of my changes in L0_trace.
Tested the pipeline for this change fixes currently failing L0_trace

--load-model=bls_simple --trace-config=level=TIMESTAMPS \
--load-model=trace_context --trace-config=rate=1 \
--trace-config=count=-1 --trace-config=mode=opentelemetry \
Expand Down
18 changes: 17 additions & 1 deletion src/command_line_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,8 @@ enum TritonOptionId {
OPTION_BACKEND_CONFIG,
OPTION_HOST_POLICY,
OPTION_MODEL_LOAD_GPU_LIMIT,
OPTION_MODEL_NAMESPACING
OPTION_MODEL_NAMESPACING,
OPTION_ENABLE_PEER_ACCESS
};

void
Expand Down Expand Up @@ -461,6 +462,13 @@ TritonParser::SetupOptions()
{OPTION_MODEL_NAMESPACING, "model-namespacing", Option::ArgBool,
"Whether model namespacing is enable or not. If true, models with the "
"same name can be served if they are in different namespace."});
model_repo_options_.push_back(
{OPTION_ENABLE_PEER_ACCESS, "enable-peer-access", Option::ArgBool,
"Whether the server tries to enable peer access or not. Even when this "
"options is set to true, "
"peer access could still be not enabled because the underlying system "
"doesn't support it."
" The server will log a warning in this case. Default is true."});

#if defined(TRITON_ENABLE_HTTP)
http_options_.push_back(
Expand Down Expand Up @@ -1100,6 +1108,11 @@ TritonServerParameters::BuildTritonServerOptions()
TRITONSERVER_ServerOptionsSetModelNamespacing(
loptions, enable_model_namespacing_),
"setting model namespacing");
THROW_IF_ERR(
ParseException,
TRITONSERVER_ServerOptionsSetEnablePeerAccess(
loptions, enable_peer_access_),
"setting peer access");

#ifdef TRITON_ENABLE_LOGGING
TRITONSERVER_ServerOptionsSetLogFile(loptions, log_file_.c_str());
Expand Down Expand Up @@ -1722,6 +1735,9 @@ TritonParser::Parse(int argc, char** argv)
case OPTION_MODEL_NAMESPACING:
lparams.enable_model_namespacing_ = ParseOption<bool>(optarg);
break;
case OPTION_ENABLE_PEER_ACCESS:
lparams.enable_peer_access_ = ParseOption<bool>(optarg);
break;
}
}
catch (const ParseException& pe) {
Expand Down
1 change: 1 addition & 0 deletions src/command_line_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ struct TritonServerParameters {

// Model repository manager configuration
bool enable_model_namespacing_{false};
bool enable_peer_access_{true};
std::set<std::string> model_repository_paths_{};
TRITONSERVER_ModelControlMode control_mode_{TRITONSERVER_MODEL_CONTROL_NONE};
std::set<std::string> startup_models_{};
Expand Down
Loading