Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: Test model_metrics config and document histogram buckets override #7752

Merged
merged 7 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion docs/user_guide/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,24 @@ nv_inference_first_response_histogram_ms{model="my_model",version="1", le="5000"
nv_inference_first_response_histogram_ms{model="my_model",version="1", le="+Inf"} 37
```

Triton initializes histograms with default buckets for each, as shown above. Customization of buckets per metric is currently unsupported.
Triton initializes histograms with default buckets for each, as shown above.
Buckets can be overridden per family by specifying `model_metrics` in the
model configuration. For example:
```
// config.pbtxt
model_metrics {
metric_control: [
{
metric_identifier: {
family: "nv_inference_first_response_histogram_ms"
}
histogram_options: {
buckets: [ 1, 2, 4, 8 ]
}
}
]
}
```

#### Summaries

Expand Down
54 changes: 42 additions & 12 deletions qa/L0_metrics/histogram_metrics_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,31 +40,38 @@
import test_util as tu

MILLIS_PER_SEC = 1000
FIRST_RESPONSE_HISTOGRAM = "nv_inference_first_response_histogram_ms"


def get_histogram_metric_key(
metric_family, model_name, model_version, metric_type, le=""
):
if metric_type in ["count", "sum"]:
return f'{metric_family}_{metric_type}{{model="{model_name}",version="{model_version}"}}'
elif metric_type == "bucket":
return f'{metric_family}_{metric_type}{{model="{model_name}",version="{model_version}",le="{le}"}}'
else:
return None


class TestHistogramMetrics(tu.TestResultCollector):
class HistogramMetricsTest(tu.TestResultCollector):
yinggeh marked this conversation as resolved.
Show resolved Hide resolved
def setUp(self):
self.tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")

def get_histogram_metrics(self, metric_family: str):
def get_metrics(self):
r = requests.get(f"http://{self.tritonserver_ipaddr}:8002/metrics")
r.raise_for_status()
return r.text

def get_histogram_metrics(self, metric_family: str):
# Regular expression to match the pattern
pattern = f"^{metric_family}.*"
histogram_dict = {}

metrics = self.get_metrics()

# Find all matches in the text
matches = re.findall(pattern, r.text, re.MULTILINE)
matches = re.findall(pattern, metrics, re.MULTILINE)

for match in matches:
key, value = match.rsplit(" ")
Expand Down Expand Up @@ -135,24 +142,23 @@ def test_ensemble_decoupled(self):
)

# Checks metrics output
first_response_family = "nv_inference_first_response_histogram_ms"
histogram_dict = self.get_histogram_metrics(first_response_family)
histogram_dict = self.get_histogram_metrics(FIRST_RESPONSE_HISTOGRAM)

def check_existing_metrics(model_name, wait_secs_per_req, delta):
metric_count = get_histogram_metric_key(
first_response_family, model_name, "1", "count"
FIRST_RESPONSE_HISTOGRAM, model_name, "1", "count"
)
model_sum = get_histogram_metric_key(
first_response_family, model_name, "1", "sum"
metric_sum = get_histogram_metric_key(
FIRST_RESPONSE_HISTOGRAM, model_name, "1", "sum"
)
# Test histogram count
self.assertIn(metric_count, histogram_dict)
self.assertEqual(histogram_dict[metric_count], request_num)
# Test histogram sum
self.assertIn(model_sum, histogram_dict)
self.assertIn(metric_sum, histogram_dict)
self.assertTrue(
wait_secs_per_req * MILLIS_PER_SEC * request_num
<= histogram_dict[model_sum]
<= histogram_dict[metric_sum]
< (wait_secs_per_req + delta) * MILLIS_PER_SEC * request_num
)
# Prometheus histogram buckets are tested in metrics_api_test.cc::HistogramAPIHelper
Expand All @@ -165,14 +171,38 @@ def check_existing_metrics(model_name, wait_secs_per_req, delta):

# Test non-decoupled model metrics
non_decoupled_model_count = get_histogram_metric_key(
first_response_family, non_decoupled_model_name, "1", "count"
FIRST_RESPONSE_HISTOGRAM, non_decoupled_model_name, "1", "count"
)
non_decoupled_model_sum = get_histogram_metric_key(
first_response_family, non_decoupled_model_name, "1", "sum"
FIRST_RESPONSE_HISTOGRAM, non_decoupled_model_name, "1", "sum"
)
self.assertNotIn(non_decoupled_model_count, histogram_dict)
self.assertNotIn(non_decoupled_model_sum, histogram_dict)

def test_buckets_override(self):
yinggeh marked this conversation as resolved.
Show resolved Hide resolved
model_name = "async_execute_decouple"
metrics = self.get_metrics()
override_buckets = [
x for x in os.environ.get("OVERRIDE_BUCKETS").split(",")
] + ["+Inf"]
yinggeh marked this conversation as resolved.
Show resolved Hide resolved

# Check metric output
self.assertEqual(
metrics.count(FIRST_RESPONSE_HISTOGRAM + "_bucket"), len(override_buckets)
)
for le in override_buckets:
bucket_key = get_histogram_metric_key(
FIRST_RESPONSE_HISTOGRAM, model_name, "1", "bucket", le
)
self.assertIn(bucket_key, metrics)

def test_server_log_error(self):
# Test error in server log
with open(os.environ["SERVER_LOG"]) as f:
server_log = f.read()
server_log_error = os.environ["SERVER_LOG_ERROR"]
self.assertIn(server_log_error, server_log)


if __name__ == "__main__":
unittest.main()
2 changes: 1 addition & 1 deletion qa/L0_metrics/metrics_config_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def test_cache_counters_missing(self):
def test_inf_histograms_decoupled_exist(self):
metrics = self._get_metrics()
for metric in INF_HISTOGRAM_DECOUPLED_PATTERNS:
for suffix in ["_count", "_sum", ""]:
for suffix in ["_count", "_sum", "_bucket"]:
self.assertIn(metric + suffix, metrics)

def test_inf_histograms_decoupled_missing(self):
Expand Down
117 changes: 109 additions & 8 deletions qa/L0_metrics/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ BASE_SERVER_ARGS="--model-repository=${MODELDIR}"
SERVER_ARGS="${BASE_SERVER_ARGS}"
SERVER_LOG="./inference_server.log"
PYTHON_TEST="metrics_config_test.py"
HISTOGRAM_PYTEST="histogram_metrics_test.py"
source ../common/util.sh

CLIENT_LOG="client.log"
Expand Down Expand Up @@ -301,12 +302,12 @@ check_unit_test
kill_server

# Check default settings: Histograms should be disabled in decoupled model
decoupled_model_name="async_execute_decouple"
mkdir -p "${MODELDIR}/${decoupled_model_name}/1/"
cp ../python_models/${decoupled_model_name}/model.py ${MODELDIR}/${decoupled_model_name}/1/
cp ../python_models/${decoupled_model_name}/config.pbtxt ${MODELDIR}/${decoupled_model_name}/
decoupled_model="async_execute_decouple"
mkdir -p "${MODELDIR}/${decoupled_model}/1/"
cp ../python_models/${decoupled_model}/model.py ${MODELDIR}/${decoupled_model}/1/
cp ../python_models/${decoupled_model}/config.pbtxt ${MODELDIR}/${decoupled_model}/

SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=${decoupled_model_name}"
SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=${decoupled_model}"
run_and_check_server
python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_counters_exist 2>&1 | tee ${CLIENT_LOG}
check_unit_test
Expand All @@ -321,7 +322,7 @@ check_unit_test
kill_server

# Enable histograms in decoupled model
SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=${decoupled_model_name} --metrics-config histogram_latencies=true"
SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=${decoupled_model} --metrics-config histogram_latencies=true"
run_and_check_server
python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_counters_exist 2>&1 | tee ${CLIENT_LOG}
check_unit_test
Expand Down Expand Up @@ -460,17 +461,117 @@ check_unit_test "${expected_tests}"

### Test histogram data in ensemble decoupled model ###
MODELDIR="${PWD}/ensemble_decoupled"
SERVER_LOG="./histogram_ensemble_decoupled_server.log"
CLIENT_LOG="./histogram_ensemble_decoupled_client.log"
SERVER_ARGS="--model-repository=${MODELDIR} --metrics-config histogram_latencies=true --log-verbose=1"
PYTHON_TEST="histogram_metrics_test.py"
mkdir -p "${MODELDIR}"/ensemble/1
cp -r "${MODELDIR}"/async_execute_decouple "${MODELDIR}"/async_execute
sed -i "s/model_transaction_policy { decoupled: True }//" "${MODELDIR}"/async_execute/config.pbtxt

run_and_check_server
python3 ${PYTHON_TEST} 2>&1 | tee ${CLIENT_LOG}
python3 ${HISTOGRAM_PYTEST} HistogramMetricsTest.test_ensemble_decoupled 2>&1 | tee ${CLIENT_LOG}
kill_server
check_unit_test

### Test model metrics configuration
MODELDIR="${PWD}/model_metrics_model"
export SERVER_LOG="./model_metric_config_server.log"
CLIENT_LOG="./model_metric_config_client.log"
decoupled_model="async_execute_decouple"
SERVER_ARGS="--model-repository=${MODELDIR} --model-control-mode=explicit --load-model=${decoupled_model} --metrics-config histogram_latencies=true --log-verbose=1"
rm -rf "${MODELDIR}/${decoupled_model}"
mkdir -p "${MODELDIR}/${decoupled_model}/1/"
cp ../python_models/${decoupled_model}/model.py ${MODELDIR}/${decoupled_model}/1/

# Test valid model_metrics config
cp ../python_models/${decoupled_model}/config.pbtxt ${MODELDIR}/${decoupled_model}/
cat >> "${MODELDIR}/${decoupled_model}/config.pbtxt" << EOL
model_metrics {
metric_control: [
{
metric_identifier: {
family: "nv_inference_first_response_histogram_ms"
}
histogram_options: {
buckets: [ 1.0, 2.0, 4.0, 8.0 ]
}
}
]
}
EOL

run_and_check_server
export OVERRIDE_BUCKETS="1,2,4,8"
python3 ${HISTOGRAM_PYTEST} HistogramMetricsTest.test_buckets_override 2>&1 | tee ${CLIENT_LOG}
check_unit_test
kill_server

# Test invalid model_metrics configs
cp ../python_models/${decoupled_model}/config.pbtxt ${MODELDIR}/${decoupled_model}/
cat >> "${MODELDIR}/${decoupled_model}/config.pbtxt" << EOL
model_metrics {
metric_control: [{}]
}
EOL

run_server
if [ "$SERVER_PID" != "0" ]; then
echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG
RET=1
kill_server
else
export SERVER_LOG_ERROR="model_control must specify 'metric_identifier'"
python3 ${HISTOGRAM_PYTEST} HistogramMetricsTest.test_server_log_error 2>&1 | tee ${CLIENT_LOG}
check_unit_test
yinggeh marked this conversation as resolved.
Show resolved Hide resolved
fi

# Test invalid model_metrics messages
cp ../python_models/${decoupled_model}/config.pbtxt ${MODELDIR}/${decoupled_model}/
cat >> "${MODELDIR}/${decoupled_model}/config.pbtxt" << EOL
model_metrics {
metric_control: [
{
metric_identifier: {}
}
]
}
EOL

run_server
if [ "$SERVER_PID" != "0" ]; then
echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG
RET=1
kill_server
else
export SERVER_LOG_ERROR="metric_identifier must specify 'family'"
python3 ${HISTOGRAM_PYTEST} HistogramMetricsTest.test_server_log_error 2>&1 | tee ${CLIENT_LOG}
check_unit_test
fi

cp ../python_models/${decoupled_model}/config.pbtxt ${MODELDIR}/${decoupled_model}/
cat >> "${MODELDIR}/${decoupled_model}/config.pbtxt" << EOL
model_metrics {
metric_control: [
{
metric_identifier: {
family: ""
}
}
]
}
EOL

run_server
if [ "$SERVER_PID" != "0" ]; then
echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG
RET=1
kill_server
else
export SERVER_LOG_ERROR="metric_identifier must specify 'family'"
python3 ${HISTOGRAM_PYTEST} HistogramMetricsTest.test_server_log_error 2>&1 | tee ${CLIENT_LOG}
check_unit_test
fi

if [ $RET -eq 0 ]; then
echo -e "\n***\n*** Test Passed\n***"
else
Expand Down
Loading