chore: Update MLServer protobuf (#45)

#### Motivation Related to [updating the MLServer runtime image](kserve/modelmesh-serving#355), the `ModelRepository` endpoint was deprecated. References: kserve/modelmesh-serving#159 SeldonIO/MLServer#616 #### Modifications - Updated protobuf - Updated mock server testing - Updated runtime-adapter code to call new endpoint #### Result - MLServer runtime adapter no longer uses the deprecated model repository API Signed-off-by: Rafael Vasquez <[email protected]>
kserve · May 26, 2023 · f9dc1dc · f9dc1dc
1 parent 600f092
commit f9dc1dc
Show file tree

Hide file tree

Showing 10 changed files with 1,631 additions and 1,513 deletions.
diff --git a/internal/proto/mlserver/dataplane/dataplane.pb.go b/internal/proto/mlserver/dataplane/dataplane.pb.go
diff --git a/internal/proto/mlserver/dataplane/dataplane.proto b/internal/proto/mlserver/dataplane/dataplane.proto
@@ -1,4 +1,4 @@
-// Copied from https://github.com/SeldonIO/MLServer/blob/9a7c77f93dd61ac395e389280195d8adc413aad7/proto/dataplane.proto
+// Copied from https://github.com/SeldonIO/MLServer/blob/1.2.4/proto/dataplane.proto
 syntax = "proto3";
 
 package inference;
@@ -25,6 +25,18 @@ service GRPCInferenceService
 
   // Perform inference using a specific model.
   rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {}
+
+  // Get the index of model repository contents.
+  rpc RepositoryIndex(RepositoryIndexRequest)
+      returns (RepositoryIndexResponse) {}
+
+  // Load or reload a model from a repository.
+  rpc RepositoryModelLoad(RepositoryModelLoadRequest)
+      returns (RepositoryModelLoadResponse) {}
+
+  // Unload a model.
+  rpc RepositoryModelUnload(RepositoryModelUnloadRequest)
+      returns (RepositoryModelUnloadResponse) {}
 }
 
 
@@ -113,6 +125,10 @@ message ModelMetadataResponse
     // The tensor shape. A variable-size dimension is represented
     // by a -1 value.
     repeated int64 shape = 3;
+
+    // Optional default parameters for input.
+    // NOTE: This is an extension to the standard
+    map<string, InferParameter> parameters = 4;
   }
 
   // The model name.
@@ -129,6 +145,10 @@ message ModelMetadataResponse
 
   // The model's outputs.
   repeated TensorMetadata outputs = 5;
+
+  // Optional default parameters for the request / response.
+  // NOTE: This is an extension to the standard
+  map<string, InferParameter> parameters = 6;
 }
 
 //
@@ -152,6 +172,8 @@ message ModelInferRequest
     map<string, InferParameter> parameters = 4;
 
     // The input tensor data.
+    // This field must not be specified if tensor contents are being specified
+    // in ModelInferRequest.raw_input_contents.
     InferTensorContents contents = 5;
   }
 
@@ -185,6 +207,25 @@ message ModelInferRequest
   // The requested output tensors for the inference. Optional, if not
   // specified all outputs produced by the model will be returned.
   repeated InferRequestedOutputTensor outputs = 6;
+
+  // The data contained in an input tensor can be represented in "raw" bytes
+  // form or in the repeated type that matches the tensor's data type. Using
+  // the "raw" bytes form will typically allow higher performance due to the
+  // way protobuf allocation and reuse interacts with GRPC. For example, see
+  // https://github.com/grpc/grpc/issues/23231.
+  //
+  // To use the raw representation 'raw_input_contents' must be initialized
+  // with data for each tensor in the same order as 'inputs'. For each tensor,
+  // the size of this content must match what is expected by the tensor's shape
+  // and data type. The raw data must be the flattened, one-dimensional,
+  // row-major order of the tensor elements without any stride or padding
+  // between the elements. Note that the FP16 and BF16 data types must be
+  // represented as raw content as there is no specific data type for a 16-bit
+  // float type.
+  //
+  // If this field is specified then InferInputTensor::contents must not be
+  // specified for any input tensor.
+  repeated bytes raw_input_contents = 7;
 }
 
 message ModelInferResponse
@@ -205,6 +246,8 @@ message ModelInferResponse
     map<string, InferParameter> parameters = 4;
 
     // The output tensor data.
+    // This field must not be specified if tensor contents are being specified
+    // in ModelInferResponse.raw_output_contents.
     InferTensorContents contents = 5;
   }
 
@@ -222,6 +265,25 @@ message ModelInferResponse
 
   // The output tensors holding inference results.
   repeated InferOutputTensor outputs = 5;
+
+  // The data contained in an output tensor can be represented in "raw" bytes
+  // form or in the repeated type that matches the tensor's data type. Using
+  // the "raw" bytes form will typically allow higher performance due to the
+  // way protobuf allocation and reuse interacts with GRPC. For example, see
+  // https://github.com/grpc/grpc/issues/23231.
+  //
+  // To use the raw representation 'raw_output_contents' must be initialized
+  // with data for each tensor in the same order as 'outputs'. For each tensor,
+  // the size of this content must match what is expected by the tensor's shape
+  // and data type. The raw data must be the flattened, one-dimensional,
+  // row-major order of the tensor elements without any stride or padding
+  // between the elements. Note that the FP16 and BF16 data types must be
+  // represented as raw content as there is no specific data type for a 16-bit
+  // float type.
+  //
+  // If this field is specified then InferOutputTensor::contents must not be
+  // specified for any output tensor.
+  repeated bytes raw_output_contents = 6;
 }
 
 
@@ -296,3 +358,98 @@ message InferTensorContents
   // one-dimensional, row-major order of the tensor elements.
   repeated bytes bytes_contents = 8;
 }
+
+//
+// Messages for the Repository API
+//
+// NOTE: These messages used to exist previously on a different protobuf
+// definition. However, they have now been merged with the main
+// GRPCInferenceService.
+//
+
+
+// An model repository parameter value.
+message ModelRepositoryParameter
+{
+  // The parameter value can be a string, an int64 or a boolean
+  oneof parameter_choice
+  {
+    // A boolean parameter value.
+    bool bool_param = 1;
+
+    // An int64 parameter value.
+    int64 int64_param = 2;
+
+    // A string parameter value.
+    string string_param = 3;
+
+    // A bytes parameter value.
+    bytes bytes_param = 4;
+  }
+}
+
+
+message RepositoryIndexRequest
+{
+  // The name of the repository. If empty the index is returned
+  // for all repositories.
+  string repository_name = 1;
+
+  // If true return only models currently ready for inferencing.
+  bool ready = 2;
+}
+
+message RepositoryIndexResponse
+{
+  // Index entry for a model.
+  message ModelIndex {
+    // The name of the model.
+    string name = 1;
+
+    // The version of the model.
+    string version = 2;
+
+    // The state of the model.
+    string state = 3;
+
+    // The reason, if any, that the model is in the given state.
+    string reason = 4;
+  }
+
+  // An index entry for each model.
+  repeated ModelIndex models = 1;
+}
+
+message RepositoryModelLoadRequest
+{
+  // The name of the repository to load from. If empty the model
+  // is loaded from any repository.
+  string repository_name = 1;
+
+  // The name of the model to load, or reload.
+  string model_name = 2;
+
+  // Optional model repository request parameters.
+  map<string, ModelRepositoryParameter> parameters = 3;
+}
+
+message RepositoryModelLoadResponse
+{
+}
+
+message RepositoryModelUnloadRequest
+{
+  // The name of the repository from which the model was originally
+  // loaded. If empty the repository is not considered.
+  string repository_name = 1;
+
+  // The name of the model to unload.
+  string model_name = 2;
+
+  // Optional model repository request parameters.
+  map<string, ModelRepositoryParameter> parameters = 3;
+}
+
+message RepositoryModelUnloadResponse
+{
+}
diff --git a/internal/proto/mlserver/dataplane/dataplane_grpc.pb.go b/internal/proto/mlserver/dataplane/dataplane_grpc.pb.go