triton-inference-server · indrajit96 · Jul 12, 2024 · Jul 5, 2024 · Jul 8, 2024 · Jul 8, 2024
diff --git a/qa/L0_request_cancellation/scheduler_test.py b/qa/L0_request_cancellation/scheduler_test.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -31,6 +31,7 @@
 import unittest
 
 import numpy as np
+import requests
 import tritonclient.grpc as grpcclient
 from tritonclient.utils import InferenceServerException
 
@@ -84,6 +85,20 @@ def _assert_streaming_response_is_cancelled(self, response):
                     cancelled_count += 1
         self.assertEqual(cancelled_count, 1)
 
+    def _get_metrics(self):
+        metrics_url = "http://localhost:8002/metrics"
+        r = requests.get(metrics_url)
+        r.raise_for_status()
+        return r.text
+
+    def _assert_cancel_metrics_sequence_oldest(self, model_name, count, metrics):
+        expected_metric = f'nv_inference_request_failure{{model="{model_name}",reason="CANCELED",version="1"}} {count}'
+        self.assertIn(expected_metric, metrics)
+
+    def _assert_cancel_metrics_sequence_direct(self, model_name, count, metrics):
+        expected_metric = f'nv_inference_request_failure{{model="{model_name}",reason="CANCELED",version="1"}} {count}'
+        self.assertIn(expected_metric, metrics)
+
     # Test queued requests on dynamic batch scheduler can be cancelled
     def test_dynamic_batch_scheduler_request_cancellation(self):
         model_name = "dynamic_batch"
@@ -228,6 +243,15 @@ def test_scheduler_streaming_request_cancellation(self):
         time.sleep(2)  # ensure reaper thread has responded
         self._assert_streaming_response_is_cancelled(response)
 
+    def test_zerror_metrics(self):
+        metrics = self._get_metrics()
+        # Check if we have counted correctly all cancellation for sequence_oldest model
+        # total expected 2 * 16 + 1
+        self._assert_cancel_metrics_sequence_oldest("sequence_oldest", 33, metrics)
+        # Check if we have counted correctly all cancellation for sequence_direct model
+        # total expected 2 * 2
+        self._assert_cancel_metrics_sequence_direct("sequence_direct", 4, metrics)
+
 
 if __name__ == "__main__":
     unittest.main()