Skip to content

Commit

Permalink
chore: Update MLServer protobuf (#45)
Browse files Browse the repository at this point in the history
#### Motivation

Related to [updating the MLServer runtime image](kserve/modelmesh-serving#355), the `ModelRepository` endpoint was deprecated. 

References:
kserve/modelmesh-serving#159
SeldonIO/MLServer#616

#### Modifications
- Updated protobuf 
- Updated mock server testing
- Updated runtime-adapter code to call new endpoint

#### Result
- MLServer runtime adapter no longer uses the deprecated model repository API

Signed-off-by: Rafael Vasquez <[email protected]>
  • Loading branch information
rafvasq authored May 26, 2023
1 parent 600f092 commit f9dc1dc
Show file tree
Hide file tree
Showing 10 changed files with 1,631 additions and 1,513 deletions.
1,390 changes: 1,102 additions & 288 deletions internal/proto/mlserver/dataplane/dataplane.pb.go

Large diffs are not rendered by default.

159 changes: 158 additions & 1 deletion internal/proto/mlserver/dataplane/dataplane.proto
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copied from https://github.com/SeldonIO/MLServer/blob/9a7c77f93dd61ac395e389280195d8adc413aad7/proto/dataplane.proto
// Copied from https://github.com/SeldonIO/MLServer/blob/1.2.4/proto/dataplane.proto
syntax = "proto3";

package inference;
Expand All @@ -25,6 +25,18 @@ service GRPCInferenceService

// Perform inference using a specific model.
rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {}

// Get the index of model repository contents.
rpc RepositoryIndex(RepositoryIndexRequest)
returns (RepositoryIndexResponse) {}

// Load or reload a model from a repository.
rpc RepositoryModelLoad(RepositoryModelLoadRequest)
returns (RepositoryModelLoadResponse) {}

// Unload a model.
rpc RepositoryModelUnload(RepositoryModelUnloadRequest)
returns (RepositoryModelUnloadResponse) {}
}


Expand Down Expand Up @@ -113,6 +125,10 @@ message ModelMetadataResponse
// The tensor shape. A variable-size dimension is represented
// by a -1 value.
repeated int64 shape = 3;

// Optional default parameters for input.
// NOTE: This is an extension to the standard
map<string, InferParameter> parameters = 4;
}

// The model name.
Expand All @@ -129,6 +145,10 @@ message ModelMetadataResponse

// The model's outputs.
repeated TensorMetadata outputs = 5;

// Optional default parameters for the request / response.
// NOTE: This is an extension to the standard
map<string, InferParameter> parameters = 6;
}

//
Expand All @@ -152,6 +172,8 @@ message ModelInferRequest
map<string, InferParameter> parameters = 4;

// The input tensor data.
// This field must not be specified if tensor contents are being specified
// in ModelInferRequest.raw_input_contents.
InferTensorContents contents = 5;
}

Expand Down Expand Up @@ -185,6 +207,25 @@ message ModelInferRequest
// The requested output tensors for the inference. Optional, if not
// specified all outputs produced by the model will be returned.
repeated InferRequestedOutputTensor outputs = 6;

// The data contained in an input tensor can be represented in "raw" bytes
// form or in the repeated type that matches the tensor's data type. Using
// the "raw" bytes form will typically allow higher performance due to the
// way protobuf allocation and reuse interacts with GRPC. For example, see
// https://github.com/grpc/grpc/issues/23231.
//
// To use the raw representation 'raw_input_contents' must be initialized
// with data for each tensor in the same order as 'inputs'. For each tensor,
// the size of this content must match what is expected by the tensor's shape
// and data type. The raw data must be the flattened, one-dimensional,
// row-major order of the tensor elements without any stride or padding
// between the elements. Note that the FP16 and BF16 data types must be
// represented as raw content as there is no specific data type for a 16-bit
// float type.
//
// If this field is specified then InferInputTensor::contents must not be
// specified for any input tensor.
repeated bytes raw_input_contents = 7;
}

message ModelInferResponse
Expand All @@ -205,6 +246,8 @@ message ModelInferResponse
map<string, InferParameter> parameters = 4;

// The output tensor data.
// This field must not be specified if tensor contents are being specified
// in ModelInferResponse.raw_output_contents.
InferTensorContents contents = 5;
}

Expand All @@ -222,6 +265,25 @@ message ModelInferResponse

// The output tensors holding inference results.
repeated InferOutputTensor outputs = 5;

// The data contained in an output tensor can be represented in "raw" bytes
// form or in the repeated type that matches the tensor's data type. Using
// the "raw" bytes form will typically allow higher performance due to the
// way protobuf allocation and reuse interacts with GRPC. For example, see
// https://github.com/grpc/grpc/issues/23231.
//
// To use the raw representation 'raw_output_contents' must be initialized
// with data for each tensor in the same order as 'outputs'. For each tensor,
// the size of this content must match what is expected by the tensor's shape
// and data type. The raw data must be the flattened, one-dimensional,
// row-major order of the tensor elements without any stride or padding
// between the elements. Note that the FP16 and BF16 data types must be
// represented as raw content as there is no specific data type for a 16-bit
// float type.
//
// If this field is specified then InferOutputTensor::contents must not be
// specified for any output tensor.
repeated bytes raw_output_contents = 6;
}


Expand Down Expand Up @@ -296,3 +358,98 @@ message InferTensorContents
// one-dimensional, row-major order of the tensor elements.
repeated bytes bytes_contents = 8;
}

//
// Messages for the Repository API
//
// NOTE: These messages used to exist previously on a different protobuf
// definition. However, they have now been merged with the main
// GRPCInferenceService.
//


// An model repository parameter value.
message ModelRepositoryParameter
{
// The parameter value can be a string, an int64 or a boolean
oneof parameter_choice
{
// A boolean parameter value.
bool bool_param = 1;

// An int64 parameter value.
int64 int64_param = 2;

// A string parameter value.
string string_param = 3;

// A bytes parameter value.
bytes bytes_param = 4;
}
}


message RepositoryIndexRequest
{
// The name of the repository. If empty the index is returned
// for all repositories.
string repository_name = 1;

// If true return only models currently ready for inferencing.
bool ready = 2;
}

message RepositoryIndexResponse
{
// Index entry for a model.
message ModelIndex {
// The name of the model.
string name = 1;

// The version of the model.
string version = 2;

// The state of the model.
string state = 3;

// The reason, if any, that the model is in the given state.
string reason = 4;
}

// An index entry for each model.
repeated ModelIndex models = 1;
}

message RepositoryModelLoadRequest
{
// The name of the repository to load from. If empty the model
// is loaded from any repository.
string repository_name = 1;

// The name of the model to load, or reload.
string model_name = 2;

// Optional model repository request parameters.
map<string, ModelRepositoryParameter> parameters = 3;
}

message RepositoryModelLoadResponse
{
}

message RepositoryModelUnloadRequest
{
// The name of the repository from which the model was originally
// loaded. If empty the repository is not considered.
string repository_name = 1;

// The name of the model to unload.
string model_name = 2;

// Optional model repository request parameters.
map<string, ModelRepositoryParameter> parameters = 3;
}

message RepositoryModelUnloadResponse
{
}
116 changes: 115 additions & 1 deletion internal/proto/mlserver/dataplane/dataplane_grpc.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit f9dc1dc

Please sign in to comment.