triton-inference-server · rmccorm4 · Jul 10, 2024 · Jun 27, 2024 · Jun 28, 2024 · Jul 5, 2024
diff --git a/docs/protocol/extension_generate.md b/docs/protocol/extension_generate.md
@@ -1,5 +1,5 @@
 <!--
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -87,10 +87,12 @@ return an error.
 
     $generate_request =
     {
+      "id" : $string, #optional
       "text_input" : $string,
       "parameters" : $parameters #optional
     }
 
+* "id": An identifier for this request. Optional, but if specified this identifier must be returned in the response.
 * "text_input" : The text input that the model should generate output from.
 * "parameters" : An optional object containing zero or more parameters for this
   generate request expressed as key/value pairs. See
@@ -121,14 +123,15 @@ specification to set the parameters.
 Below is an example to send generate request with additional model parameters `stream` and `temperature`.
 
 ```
-$ curl -X POST localhost:8000/v2/models/mymodel/generate -d '{"text_input": "client input", "parameters": {"stream": false, "temperature": 0}}'
+$ curl -X POST localhost:8000/v2/models/mymodel/generate -d '{"id": "42", "text_input": "client input", "parameters": {"stream": false, "temperature": 0}}'
 
 POST /v2/models/mymodel/generate HTTP/1.1
 Host: localhost:8000
 Content-Type: application/json
 Content-Length: <xx>
 {
-  "text_input":  "client input",
+  "id" : "42",
+  "text_input" :  "client input",
   "parameters" :
     {
       "stream": false,
@@ -145,11 +148,13 @@ the HTTP body.
 
     $generate_response =
     {
+      "id" : $string
       "model_name" : $string,
       "model_version" : $string,
       "text_output" : $string
     }
 
+* "id" : The "id" identifier given in the request, if any.
 * "model_name" : The name of the model used for inference.
 * "model_version" : The specific model version used for inference.
 * "text_output" : The output of the inference.
@@ -159,6 +164,7 @@ the HTTP body.
 ```
 200
 {
+  "id" : "42"
   "model_name" : "mymodel",
   "model_version" : "1",
   "text_output" : "model output"

diff --git a/qa/L0_http/generate_endpoint_test.py b/qa/L0_http/generate_endpoint_test.py
@@ -142,6 +142,49 @@ def test_generate(self):
         self.assertIn("TEXT", data)
         self.assertEqual(text, data["TEXT"])
 
+    def test_request_id(self):
 EXPECTED_NUM_TESTS=15 
 EXPECTED_NUM_TESTS=15 
+        # Setup text based input
+        text = "hello world"
+        request_id = "42"
+
+        # Test when request id in request body
+        inputs = {"PROMPT": text, "id": request_id, "STREAM": False}
+        r = self.generate(self._model_name, inputs)
+        r.raise_for_status()
+
+        self.assertIn("Content-Type", r.headers)
+        self.assertEqual(r.headers["Content-Type"], "application/json")
+
+        data = r.json()
+        self.assertIn("id", data)
+        self.assertEqual(request_id, data["id"])
+        self.assertIn("TEXT", data)
+        self.assertEqual(text, data["TEXT"])
+
+        # Test when request id not in request body
+        inputs = {"PROMPT": text, "STREAM": False}
+        r = self.generate(self._model_name, inputs)
+        r.raise_for_status()
+
+        self.assertIn("Content-Type", r.headers)
+        self.assertEqual(r.headers["Content-Type"], "application/json")
+
+        data = r.json()
+        self.assertNotIn("id", data)
+
+        # Test when request id is empty
+        inputs = {"PROMPT": text, "id": "", "STREAM": False}
+        r = self.generate(self._model_name, inputs)
+        r.raise_for_status()
+
+        self.assertIn("Content-Type", r.headers)
+        self.assertEqual(r.headers["Content-Type"], "application/json")
+
+        data = r.json()
+        self.assertNotIn("id", data)
+        self.assertIn("TEXT", data)
+        self.assertEqual(text, data["TEXT"])
+
     def test_generate_stream(self):
         # Setup text-based input
         text = "hello world"

diff --git a/qa/L0_http/test.sh b/qa/L0_http/test.sh
@@ -662,7 +662,7 @@ fi
 ## Python Unit Tests
 TEST_RESULT_FILE='test_results.txt'
 PYTHON_TEST=generate_endpoint_test.py
-EXPECTED_NUM_TESTS=15
+EXPECTED_NUM_TESTS=16
 set +e
 python $PYTHON_TEST >$CLIENT_LOG 2>&1
 if [ $? -ne 0 ]; then

diff --git a/src/http_server.cc b/src/http_server.cc
@@ -3327,6 +3327,8 @@ HTTPAPIServer::HandleGenerate(
   //   thus the string must live as long as the JSON message).
   triton::common::TritonJson::Value request;
   RETURN_AND_CALLBACK_IF_ERR(EVRequestToJson(req, &request), error_callback);
+  RETURN_AND_CALLBACK_IF_ERR(
+      ParseJsonTritonRequestID(request, irequest), error_callback);
 
   RETURN_AND_CALLBACK_IF_ERR(
       generate_request->ConvertGenerateRequest(