Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add trace mode and trace config entries in trace settings API #7050

Merged
merged 19 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions qa/L0_trace/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,9 @@ fi
if [ `grep -c "\"trace_file\":\"trace_off_to_min.log\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_mode\":\"TRITON\"" ./curl.out` != "1" ]; then
rmccorm4 marked this conversation as resolved.
Show resolved Hide resolved
RET=1
fi

send_inference_requests "client_min.log" 10

Expand Down Expand Up @@ -244,6 +247,9 @@ fi
if [ `grep -c "\"trace_file\":\"global_trace.log\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_mode\":\"TRITON\"" ./curl.out` != "1" ]; then
RET=1
fi

# Use a different name
update_trace_setting "simple" '{"trace_file":"simple_trace.log","log_frequency":"2"}'
Expand All @@ -265,6 +271,9 @@ fi
if [ `grep -c "\"trace_file\":\"simple_trace.log\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_mode\":\"TRITON\"" ./curl.out` != "1" ]; then
RET=1
fi

send_inference_requests "client_simple.log" 10

Expand Down Expand Up @@ -345,6 +354,9 @@ fi
if [ `grep -c "\"trace_file\":\"update_trace.log\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_mode\":\"TRITON\"" ./curl.out` != "1" ]; then
RET=1
fi

# Send requests to simple where trace is explicitly disabled
send_inference_requests "client_update.log" 10
Expand Down Expand Up @@ -372,6 +384,9 @@ fi
if [ `grep -c "\"trace_file\":\"global_trace.log\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_mode\":\"TRITON\"" ./curl.out` != "1" ]; then
RET=1
fi

# Send requests to simple where now uses global setting
send_inference_requests "client_clear.log" 5
Expand Down Expand Up @@ -440,6 +455,9 @@ fi
if [ `grep -c "\"trace_file\":\"global_count.log\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_mode\":\"TRITON\"" ./curl.out` != "1" ]; then
RET=1
fi

# Set trace count
update_global_trace_setting '{"trace_count":"5"}'
Expand All @@ -461,6 +479,9 @@ fi
if [ `grep -c "\"trace_file\":\"global_count.log\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_mode\":\"TRITON\"" ./curl.out` != "1" ]; then
RET=1
fi

# Send requests to simple where trace is explicitly disabled
send_inference_requests "client_update.log" 10
Expand All @@ -484,6 +505,9 @@ fi
if [ `grep -c "\"trace_file\":\"global_count.log\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_mode\":\"TRITON\"" ./curl.out` != "1" ]; then
RET=1
fi

# Check if the indexed file has been generated when trace count reaches 0
if [ -f ./global_trace.log.0 ]; then
Expand Down Expand Up @@ -600,6 +624,9 @@ fi
if [ `grep -c "\"trace_file\":\"bls_trace.log\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_mode\":\"TRITON\"" ./curl.out` != "1" ]; then
RET=1
fi

set +e
# Send bls requests to make sure simple model is traced
Expand Down Expand Up @@ -806,6 +833,18 @@ fi
if [ `grep -c "\"trace_count\":\"-1\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_mode\":\"OPENTELEMETRY\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"bsp_max_export_batch_size\":\"512\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"bsp_schedule_delay\":\"5000\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"bsp_max_queue_size\":\"2048\"" ./curl.out` != "1" ]; then
RET=1
fi
indrajit96 marked this conversation as resolved.
Show resolved Hide resolved

set +e
# Send bls requests to make sure bls_simple model is NOT traced
Expand Down Expand Up @@ -869,6 +908,18 @@ fi
if [ `grep -c "\"trace_count\":\"1\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"trace_mode\":\"OPENTELEMETRY\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"bsp_max_export_batch_size\":\"512\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"bsp_schedule_delay\":\"5000\"" ./curl.out` != "1" ]; then
RET=1
fi
if [ `grep -c "\"bsp_max_queue_size\":\"2048\"" ./curl.out` != "1" ]; then
RET=1
fi

set +e
# Send bls requests to make sure bls_simple model is NOT traced
Expand Down
28 changes: 26 additions & 2 deletions src/grpc/grpc_server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1184,6 +1184,8 @@ CommonHandler::RegisterTrace()
int32_t count;
uint32_t log_frequency;
std::string filepath;
InferenceTraceMode trace_mode;
TraceConfigMap config_map;

if (!request.model_name().empty()) {
bool ready = false;
Expand Down Expand Up @@ -1391,7 +1393,8 @@ CommonHandler::RegisterTrace()
// Get current trace setting, this is needed even if the setting
// has been updated above as some values may not be provided in the request.
trace_manager_->GetTraceSetting(
request.model_name(), &level, &rate, &count, &log_frequency, &filepath);
request.model_name(), &level, &rate, &count, &log_frequency, &filepath,
&trace_mode, &config_map);
Comment on lines +1396 to +1397
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is probably due for some cleanup in the near future, where you can encapsulate this growing list of parameters into a struct/class or something if makes sense like:

GetTraceSetting(model_name, &trace_settings)

rather than all the individual args.

However, I think this can be a follow-up cleanup rather than a blocker for this change. Feel free to add a JIRA ticket if you agree.

I think some other sections could probably be condensed or cleaned up as well if encapsulating the fields in an object.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I will take this up in the Update PR for
https://jirasw.nvidia.com/browse/DLIS-4795

// level
{
inference::TraceSettingResponse::SettingValue level_setting;
Expand All @@ -1414,7 +1417,28 @@ CommonHandler::RegisterTrace()
(*response->mutable_settings())["log_frequency"].add_value(
std::to_string(log_frequency));
(*response->mutable_settings())["trace_file"].add_value(filepath);

(*response->mutable_settings())["trace_mode"].add_value(
trace_manager_->InferenceTraceModeString(trace_mode));
{
auto mode_key = std::to_string(trace_mode);
auto trace_options_it = config_map.find(mode_key);
if (trace_options_it != config_map.end()) {
for (const auto& [key, value] : trace_options_it->second) {
if ((key == "file") || (key == "log-frequency")) {
continue;
}
std::string valueAsString;
if (std::holds_alternative<std::string>(value)) {
valueAsString = std::get<std::string>(value);
} else if (std::holds_alternative<int>(value)) {
valueAsString = std::to_string(std::get<int>(value));
} else if (std::holds_alternative<uint32_t>(value)) {
valueAsString = std::to_string(std::get<uint32_t>(value));
}
(*response->mutable_settings())[key].add_value(valueAsString);
}
}
}
earlyexit:
GrpcStatusUtil::Create(status, err);
TRITONSERVER_ErrorDelete(err);
Expand Down
31 changes: 29 additions & 2 deletions src/http_server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1772,6 +1772,9 @@ HTTPAPIServer::HandleTrace(evhtp_request_t* req, const std::string& model_name)
int32_t count;
uint32_t log_frequency;
std::string filepath;
InferenceTraceMode trace_mode;
TraceConfigMap config_map;

if (!model_name.empty()) {
bool ready = false;
RETURN_AND_RESPOND_IF_ERR(
Expand Down Expand Up @@ -1967,7 +1970,8 @@ HTTPAPIServer::HandleTrace(evhtp_request_t* req, const std::string& model_name)
// Get current trace setting, this is needed even if the setting
// has been updated above as some values may not be provided in the request.
trace_manager_->GetTraceSetting(
model_name, &level, &rate, &count, &log_frequency, &filepath);
model_name, &level, &rate, &count, &log_frequency, &filepath, &trace_mode,
&config_map);
triton::common::TritonJson::Value trace_response(
triton::common::TritonJson::ValueType::OBJECT);
// level
Expand Down Expand Up @@ -1996,7 +2000,29 @@ HTTPAPIServer::HandleTrace(evhtp_request_t* req, const std::string& model_name)
trace_response.AddString("log_frequency", std::to_string(log_frequency)));
RETURN_AND_RESPOND_IF_ERR(
req, trace_response.AddString("trace_file", filepath));

RETURN_AND_RESPOND_IF_ERR(
req,
trace_response.AddString(
"trace_mode", trace_manager_->InferenceTraceModeString(trace_mode)));
auto mode_key = std::to_string(trace_mode);
auto trace_options_it = config_map.find(mode_key);
Comment on lines +2051 to +2052
Copy link
Contributor

@rmccorm4 rmccorm4 Apr 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Food for thought in the future with Olga, but I found this code misleading to read.

Upon reading it, I naively thought std::to_string(trace_mode) would be something like "TRITON" or "OPENTELEMETRY", but it's actually "0" or "1" based on order in the enum. InferenceTraceModeString is the one that actually converts to a string.

TLDR; I found it confusing that the keys in this map are "0" and "1" rather than the actual string versions of the modes. The keys in the map could probably just be the enums themselves (TRACE_MODE_TRITON or TRACE_MODE_OPENTELEMETRY [interpreted as integer 0 or 1] rather than string 0 or 1), or the actual string representations ("triton" and "opentelemtry").

No action here, just mentioning something I found misleading to think about down the line.

if (trace_options_it != config_map.end()) {
for (const auto& [key, value] : trace_options_it->second) {
if ((key == "file") || (key == "log-frequency")) {
continue;
}
std::string valueAsString;
if (std::holds_alternative<std::string>(value)) {
valueAsString = std::get<std::string>(value);
} else if (std::holds_alternative<int>(value)) {
valueAsString = std::to_string(std::get<int>(value));
} else if (std::holds_alternative<uint32_t>(value)) {
valueAsString = std::to_string(std::get<uint32_t>(value));
}
RETURN_AND_RESPOND_IF_ERR(
req, trace_response.AddString(key.c_str(), valueAsString));
}
}
triton::common::TritonJson::WriteBuffer buffer;
RETURN_AND_RESPOND_IF_ERR(req, trace_response.Write(&buffer));
evbuffer_add(req->buffer_out, buffer.Base(), buffer.Size());
Expand Down Expand Up @@ -4570,6 +4596,7 @@ HTTPAPIServer::Handle(evhtp_request_t* req)
} else if (kind == "") {
// model metadata
HandleModelMetadata(req, model_name, version);

indrajit96 marked this conversation as resolved.
Show resolved Hide resolved
return;
}
}
Expand Down
5 changes: 4 additions & 1 deletion src/tracer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,8 @@ void
TraceManager::GetTraceSetting(
const std::string& model_name, TRITONSERVER_InferenceTraceLevel* level,
uint32_t* rate, int32_t* count, uint32_t* log_frequency,
std::string* filepath)
std::string* filepath, InferenceTraceMode* trace_mode,
TraceConfigMap* config_map)
{
std::shared_ptr<TraceSetting> trace_setting;
{
Expand All @@ -282,6 +283,8 @@ TraceManager::GetTraceSetting(
*count = trace_setting->count_;
*log_frequency = trace_setting->log_frequency_;
*filepath = trace_setting->file_->FileName();
*trace_mode = trace_setting->mode_;
*config_map = trace_setting->config_map_;
}

void
Expand Down
4 changes: 3 additions & 1 deletion src/tracer.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ namespace triton { namespace server {

using TraceConfig = std::vector<
std::pair<std::string, std::variant<std::string, int, uint32_t>>>;
// Key is trace mode,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not only trace mode, IIRC, for global settings the key is an empty string

using TraceConfigMap = std::unordered_map<std::string, TraceConfig>;
#if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING)
using AbstractCarrier = otel_cntxt::propagation::TextMapCarrier;
Expand Down Expand Up @@ -168,7 +169,8 @@ class TraceManager {
void GetTraceSetting(
const std::string& model_name, TRITONSERVER_InferenceTraceLevel* level,
uint32_t* rate, int32_t* count, uint32_t* log_frequency,
std::string* filepath);
std::string* filepath, InferenceTraceMode* mode,
TraceConfigMap* config_map);

// Sets provided TraceSetting with correct trace settings for provided model.
void GetTraceSetting(
Expand Down
Loading