Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Update MLServer protobuf #45

Merged
merged 4 commits into from
May 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,390 changes: 1,102 additions & 288 deletions internal/proto/mlserver/dataplane/dataplane.pb.go

Large diffs are not rendered by default.

159 changes: 158 additions & 1 deletion internal/proto/mlserver/dataplane/dataplane.proto
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copied from https://github.com/SeldonIO/MLServer/blob/9a7c77f93dd61ac395e389280195d8adc413aad7/proto/dataplane.proto
// Copied from https://github.com/SeldonIO/MLServer/blob/1.2.4/proto/dataplane.proto
syntax = "proto3";

package inference;
Expand All @@ -25,6 +25,18 @@ service GRPCInferenceService

// Perform inference using a specific model.
rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {}

// Get the index of model repository contents.
rpc RepositoryIndex(RepositoryIndexRequest)
returns (RepositoryIndexResponse) {}

// Load or reload a model from a repository.
rpc RepositoryModelLoad(RepositoryModelLoadRequest)
returns (RepositoryModelLoadResponse) {}

// Unload a model.
rpc RepositoryModelUnload(RepositoryModelUnloadRequest)
returns (RepositoryModelUnloadResponse) {}
}


Expand Down Expand Up @@ -113,6 +125,10 @@ message ModelMetadataResponse
// The tensor shape. A variable-size dimension is represented
// by a -1 value.
repeated int64 shape = 3;

// Optional default parameters for input.
// NOTE: This is an extension to the standard
map<string, InferParameter> parameters = 4;
}

// The model name.
Expand All @@ -129,6 +145,10 @@ message ModelMetadataResponse

// The model's outputs.
repeated TensorMetadata outputs = 5;

// Optional default parameters for the request / response.
// NOTE: This is an extension to the standard
map<string, InferParameter> parameters = 6;
}

//
Expand All @@ -152,6 +172,8 @@ message ModelInferRequest
map<string, InferParameter> parameters = 4;

// The input tensor data.
// This field must not be specified if tensor contents are being specified
// in ModelInferRequest.raw_input_contents.
InferTensorContents contents = 5;
}

Expand Down Expand Up @@ -185,6 +207,25 @@ message ModelInferRequest
// The requested output tensors for the inference. Optional, if not
// specified all outputs produced by the model will be returned.
repeated InferRequestedOutputTensor outputs = 6;

// The data contained in an input tensor can be represented in "raw" bytes
// form or in the repeated type that matches the tensor's data type. Using
// the "raw" bytes form will typically allow higher performance due to the
// way protobuf allocation and reuse interacts with GRPC. For example, see
// https://github.com/grpc/grpc/issues/23231.
//
// To use the raw representation 'raw_input_contents' must be initialized
// with data for each tensor in the same order as 'inputs'. For each tensor,
// the size of this content must match what is expected by the tensor's shape
// and data type. The raw data must be the flattened, one-dimensional,
// row-major order of the tensor elements without any stride or padding
// between the elements. Note that the FP16 and BF16 data types must be
// represented as raw content as there is no specific data type for a 16-bit
// float type.
//
// If this field is specified then InferInputTensor::contents must not be
// specified for any input tensor.
repeated bytes raw_input_contents = 7;
}

message ModelInferResponse
Expand All @@ -205,6 +246,8 @@ message ModelInferResponse
map<string, InferParameter> parameters = 4;

// The output tensor data.
// This field must not be specified if tensor contents are being specified
// in ModelInferResponse.raw_output_contents.
InferTensorContents contents = 5;
}

Expand All @@ -222,6 +265,25 @@ message ModelInferResponse

// The output tensors holding inference results.
repeated InferOutputTensor outputs = 5;

// The data contained in an output tensor can be represented in "raw" bytes
// form or in the repeated type that matches the tensor's data type. Using
// the "raw" bytes form will typically allow higher performance due to the
// way protobuf allocation and reuse interacts with GRPC. For example, see
// https://github.com/grpc/grpc/issues/23231.
//
// To use the raw representation 'raw_output_contents' must be initialized
// with data for each tensor in the same order as 'outputs'. For each tensor,
// the size of this content must match what is expected by the tensor's shape
// and data type. The raw data must be the flattened, one-dimensional,
// row-major order of the tensor elements without any stride or padding
// between the elements. Note that the FP16 and BF16 data types must be
// represented as raw content as there is no specific data type for a 16-bit
// float type.
//
// If this field is specified then InferOutputTensor::contents must not be
// specified for any output tensor.
repeated bytes raw_output_contents = 6;
}


Expand Down Expand Up @@ -296,3 +358,98 @@ message InferTensorContents
// one-dimensional, row-major order of the tensor elements.
repeated bytes bytes_contents = 8;
}

//
// Messages for the Repository API
//
// NOTE: These messages used to exist previously on a different protobuf
// definition. However, they have now been merged with the main
// GRPCInferenceService.
//


// An model repository parameter value.
message ModelRepositoryParameter
{
// The parameter value can be a string, an int64 or a boolean
oneof parameter_choice
{
// A boolean parameter value.
bool bool_param = 1;

// An int64 parameter value.
int64 int64_param = 2;

// A string parameter value.
string string_param = 3;

// A bytes parameter value.
bytes bytes_param = 4;
}
}


message RepositoryIndexRequest
{
// The name of the repository. If empty the index is returned
// for all repositories.
string repository_name = 1;

// If true return only models currently ready for inferencing.
bool ready = 2;
}

message RepositoryIndexResponse
{
// Index entry for a model.
message ModelIndex {
// The name of the model.
string name = 1;

// The version of the model.
string version = 2;

// The state of the model.
string state = 3;

// The reason, if any, that the model is in the given state.
string reason = 4;
}

// An index entry for each model.
repeated ModelIndex models = 1;
}

message RepositoryModelLoadRequest
{
// The name of the repository to load from. If empty the model
// is loaded from any repository.
string repository_name = 1;

// The name of the model to load, or reload.
string model_name = 2;

// Optional model repository request parameters.
map<string, ModelRepositoryParameter> parameters = 3;
}

message RepositoryModelLoadResponse
{
}

message RepositoryModelUnloadRequest
{
// The name of the repository from which the model was originally
// loaded. If empty the repository is not considered.
string repository_name = 1;

// The name of the model to unload.
string model_name = 2;

// Optional model repository request parameters.
map<string, ModelRepositoryParameter> parameters = 3;
}

message RepositoryModelUnloadResponse
{
}
116 changes: 115 additions & 1 deletion internal/proto/mlserver/dataplane/dataplane_grpc.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading