Skip to content

Commit

Permalink
Added onnx model gRPC protocol test on triton (#1790)
Browse files Browse the repository at this point in the history
Added Automation Test for RHOAIENG-11565
  • Loading branch information
CFSNM authored Sep 19, 2024
2 parents c2822a7 + 4b75415 commit 2b2b2ed
Show file tree
Hide file tree
Showing 7 changed files with 459 additions and 6 deletions.
326 changes: 326 additions & 0 deletions ods_ci/tests/Resources/Files/triton/grpc_predict_v2.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,326 @@
// Copyright 2020 kubeflow.org.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";
package inference;

// Inference Server GRPC endpoints.
service GRPCInferenceService
{
// The ServerLive API indicates if the inference server is able to receive
// and respond to metadata and inference requests.
rpc ServerLive(ServerLiveRequest) returns (ServerLiveResponse) {}

// The ServerReady API indicates if the server is ready for inferencing.
rpc ServerReady(ServerReadyRequest) returns (ServerReadyResponse) {}

// The ModelReady API indicates if a specific model is ready for inferencing.
rpc ModelReady(ModelReadyRequest) returns (ModelReadyResponse) {}

// The ServerMetadata API provides information about the server. Errors are
// indicated by the google.rpc.Status returned for the request. The OK code
// indicates success and other codes indicate failure.
rpc ServerMetadata(ServerMetadataRequest) returns (ServerMetadataResponse) {}

// The per-model metadata API provides information about a model. Errors are
// indicated by the google.rpc.Status returned for the request. The OK code
// indicates success and other codes indicate failure.
rpc ModelMetadata(ModelMetadataRequest) returns (ModelMetadataResponse) {}

// The ModelInfer API performs inference using the specified model. Errors are
// indicated by the google.rpc.Status returned for the request. The OK code
// indicates success and other codes indicate failure.
rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {}
}

message ServerLiveRequest {}

message ServerLiveResponse
{
// True if the inference server is live, false if not live.
bool live = 1;
}

message ServerReadyRequest {}

message ServerReadyResponse
{
// True if the inference server is ready, false if not ready.
bool ready = 1;
}

message ModelReadyRequest
{
// The name of the model to check for readiness.
string name = 1;

// The version of the model to check for readiness. If not given the
// server will choose a version based on the model and internal policy.
string version = 2;
}

message ModelReadyResponse
{
// True if the model is ready, false if not ready.
bool ready = 1;
}

message ServerMetadataRequest {}

message ServerMetadataResponse
{
// The server name.
string name = 1;

// The server version.
string version = 2;

// The extensions supported by the server.
repeated string extensions = 3;
}

message ModelMetadataRequest
{
// The name of the model.
string name = 1;

// The version of the model to check for readiness. If not given the
// server will choose a version based on the model and internal policy.
string version = 2;
}

message ModelMetadataResponse
{
// Metadata for a tensor.
message TensorMetadata
{
// The tensor name.
string name = 1;

// The tensor data type.
string datatype = 2;

// The tensor shape. A variable-size dimension is represented
// by a -1 value.
repeated int64 shape = 3;
}

// The model name.
string name = 1;

// The versions of the model available on the server.
repeated string versions = 2;

// The model's platform. See Platforms.
string platform = 3;

// The model's inputs.
repeated TensorMetadata inputs = 4;

// The model's outputs.
repeated TensorMetadata outputs = 5;
}

message ModelInferRequest
{
// An input tensor for an inference request.
message InferInputTensor
{
// The tensor name.
string name = 1;

// The tensor data type.
string datatype = 2;

// The tensor shape.
repeated int64 shape = 3;

// Optional inference input tensor parameters.
map<string, InferParameter> parameters = 4;

// The tensor contents using a data-type format. This field must
// not be specified if "raw" tensor contents are being used for
// the inference request.
InferTensorContents contents = 5;
}

// An output tensor requested for an inference request.
message InferRequestedOutputTensor
{
// The tensor name.
string name = 1;

// Optional requested output tensor parameters.
map<string, InferParameter> parameters = 2;
}

// The name of the model to use for inferencing.
string model_name = 1;

// The version of the model to use for inference. If not given the
// server will choose a version based on the model and internal policy.
string model_version = 2;

// Optional identifier for the request. If specified will be
// returned in the response.
string id = 3;

// Optional inference parameters.
map<string, InferParameter> parameters = 4;

// The input tensors for the inference.
repeated InferInputTensor inputs = 5;

// The requested output tensors for the inference. Optional, if not
// specified all outputs produced by the model will be returned.
repeated InferRequestedOutputTensor outputs = 6;

// The data contained in an input tensor can be represented in "raw"
// bytes form or in the repeated type that matches the tensor's data
// type. To use the raw representation 'raw_input_contents' must be
// initialized with data for each tensor in the same order as
// 'inputs'. For each tensor, the size of this content must match
// what is expected by the tensor's shape and data type. The raw
// data must be the flattened, one-dimensional, row-major order of
// the tensor elements without any stride or padding between the
// elements. Note that the FP16 and BF16 data types must be represented as
// raw content as there is no specific data type for a 16-bit float type.
//
// If this field is specified then InferInputTensor::contents must
// not be specified for any input tensor.
repeated bytes raw_input_contents = 7;
}

message ModelInferResponse
{
// An output tensor returned for an inference request.
message InferOutputTensor
{
// The tensor name.
string name = 1;

// The tensor data type.
string datatype = 2;

// The tensor shape.
repeated int64 shape = 3;

// Optional output tensor parameters.
map<string, InferParameter> parameters = 4;

// The tensor contents using a data-type format. This field must
// not be specified if "raw" tensor contents are being used for
// the inference response.
InferTensorContents contents = 5;
}

// The name of the model used for inference.
string model_name = 1;

// The version of the model used for inference.
string model_version = 2;

// The id of the inference request if one was specified.
string id = 3;

// Optional inference response parameters.
map<string, InferParameter> parameters = 4;

// The output tensors holding inference results.
repeated InferOutputTensor outputs = 5;

// The data contained in an output tensor can be represented in
// "raw" bytes form or in the repeated type that matches the
// tensor's data type. To use the raw representation 'raw_output_contents'
// must be initialized with data for each tensor in the same order as
// 'outputs'. For each tensor, the size of this content must match
// what is expected by the tensor's shape and data type. The raw
// data must be the flattened, one-dimensional, row-major order of
// the tensor elements without any stride or padding between the
// elements. Note that the FP16 and BF16 data types must be represented as
// raw content as there is no specific data type for a 16-bit float type.
//
// If this field is specified then InferOutputTensor::contents must
// not be specified for any output tensor.
repeated bytes raw_output_contents = 6;
}

// An inference parameter value. The Parameters message describes a
// “name”/”value” pair, where the “name” is the name of the parameter
// and the “value” is a boolean, integer, or string corresponding to
// the parameter.
message InferParameter
{
// The parameter value can be a string, an int64, a boolean
// or a message specific to a predefined parameter.
oneof parameter_choice
{
// A boolean parameter value.
bool bool_param = 1;

// An int64 parameter value.
int64 int64_param = 2;

// A string parameter value.
string string_param = 3;
}
}

// The data contained in a tensor represented by the repeated type
// that matches the tensor's data type. Protobuf oneof is not used
// because oneofs cannot contain repeated fields.
message InferTensorContents
{
// Representation for BOOL data type. The size must match what is
// expected by the tensor's shape. The contents must be the flattened,
// one-dimensional, row-major order of the tensor elements.
repeated bool bool_contents = 1;

// Representation for INT8, INT16, and INT32 data types. The size
// must match what is expected by the tensor's shape. The contents
// must be the flattened, one-dimensional, row-major order of the
// tensor elements.
repeated int32 int_contents = 2;

// Representation for INT64 data types. The size must match what
// is expected by the tensor's shape. The contents must be the
// flattened, one-dimensional, row-major order of the tensor elements.
repeated int64 int64_contents = 3;

// Representation for UINT8, UINT16, and UINT32 data types. The size
// must match what is expected by the tensor's shape. The contents
// must be the flattened, one-dimensional, row-major order of the
// tensor elements.
repeated uint32 uint_contents = 4;

// Representation for UINT64 data types. The size must match what
// is expected by the tensor's shape. The contents must be the
// flattened, one-dimensional, row-major order of the tensor elements.
repeated uint64 uint64_contents = 5;

// Representation for FP32 data type. The size must match what is
// expected by the tensor's shape. The contents must be the flattened,
// one-dimensional, row-major order of the tensor elements.
repeated float fp32_contents = 6;

// Representation for FP64 data type. The size must match what is
// expected by the tensor's shape. The contents must be the flattened,
// one-dimensional, row-major order of the tensor elements.
repeated double fp64_contents = 7;

// Representation for BYTES data type. The size must match what is
// expected by the tensor's shape. The contents must be the flattened,
// one-dimensional, row-major order of the tensor elements.
repeated bytes bytes_contents = 8;
}

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"modelName":"densenet_onnx","modelVersion":"1","id":"test1","outputs":[{"name":"fc6_1","datatype":"FP32","shape":["1000"]}],"rawOutputContents":["crbswY2eFMHy2ljBr3gZwk41nsGFaUfAwRMFwmtDScGn5h7BrHeHwdxaF8GCdt7A4DwkwezErcF98U/BQW4Iwbxw88BHxz7BmN4fwVPN7sGNiG7B5blvwUd0P8F3MqTB4RxUwbKIucHzFATCn6r7wTvnpcH5wczBxbHcwZmxvsE2KabByCZewaYnrkAMlIzBQuTfv1Mozz84gQ9B+IBkwanLXMGIUhXBIDmSwG6UfcHxIofBeiZKwTf1KEDOJ0PBAunpwdgtwMF3dCLB7/CNQDjkUEEnZu5AYr9PQCD7DUATXPlByjflwEOTasGPwhs95/zYQfriPcEGbXTBrvKpQHfmI8F/0ofBT3UfQZVHAEBHosw+kavuwUTvScGkB8a/m3UEwWWaT0B+3q3ApvsUQfTd6b/5zw5A6lbTwN7mC0Eq3wvCr6kJwovtc8DlbVjBtE8RwjkXA8JVDobBiAFfwUy7h8HLr9vBOZ5/waKBC8KYJojAJNBDwSJe6D/kgLrBVIeNwDm/z8BMv2HByQWnwFvF3sArnAnCQ0/QwSzPAcKd45PBIta0wYjICsLMab/BMNVWwb8KC8AVgiTCbRhSwSSN60BG/ePAse6rwXwZvMGfAgbCeYuTvvCQLcDvJonAP6qNwUsRFj9HFY1ARftxwaXOAsE46/rB8EmFwaBmz8FmkaPB/XuXwd+XTcEIDQbCXAW8wdX6ZcDhwQTCy3VCwWxDQcFKUszA6aYCwrWFpUDvVpjB6ondP0pUQcE9BWvAG6TYwcASsMEJa5nBsIC6waho8cGww+PB6ta6wbxBxjyvvOlAnwBXQeRlTcDGOCvA5cOpwJj/KkLaobG+18rdwDbLMkC0W4jALF6uwP5p2cBdSqbBh512wAbP1sGLRZrB2UxhwcfuGcFKOMnBPlh+wRB3p8Gl1jbBEQ+/v4NMJkGoerG+ApdbwcTZDcExhpPB7NQLwpOJ777S7IHBOLQNQUUNo79UXRrAc8wNwKkJCkG5X0BAFNUSwchLTsDHdTlAqJxSwHkrlkDtUI+8MjQ1wX4DVD5nY/xAmfPhQHht8T+0+4hBlbOEwCi+V0Gy8au9WvGtQAHNO8FG+6TBt100wV4Dk8GOEp3BxxekwT/52b92r8rB3CUmwby4LcFJdnHB0cwJQbTjv8CeYh3B82xmQAfyS0HNhSlBWaenwOTCW0AoQB3Bcd7xwIXQrUHo3ie/LGRiQXphF0EytZNBkQ8DQTASp8BfMo9AiR8iwaMbmEAkx9nAyzfyP1xFTsENAGNBzM+Tv37pjb5UCM7APYwVwW0UukCEoc5ACX+BwVWM/cBEcZxA56qHP2UiNUCSYzrBfIGswEJn1kHwkpnA501/weD5m0Bt8CzBz5r8QNNSzUB+x/s/9ymbP48J5b6ae4ZBzri0QcTDp0FCtGZBrvSlQaGtrcFRjJ7BmdHawf0zYMHKkjnB+ECGwQxM78DugSbBTOnJwZPTPsDMZMDB1QwzwbaenMFAwo9BXFwVQRrQmz/0OL/BCHB/P84i5sFUmP7AEAsswWX0hcHqmJrBfS+TwXUyicEepmVA1vS2wQOJ88GHK7/BZs/BwclFm8GRoNLBn+QDwXdfqsB+nmFAFqxNQRmHkMFU5qLBJCnOwOB7q0DQS/bAzTeDwdm0uEGFOEVBJLugQVeFtsCqObTA9YWtwKTqEMLAyprBPml2wdO0WUC/t+rA5mMbQXFtkMFeyZRBwfWywSstE8FdGsC/1+eKQRPUq7+c/ObBCZBBwNaKsMDpu9hADkbHv29S1MF/iTrBqs4TwvL5BsJd3pTBmITIwSpjZ8Flo9/Bbm20wXY3HMI29RPCpuICwup/LMI3wPXBZ9DOwUjeEMLMa/7Bz7gFwp9wfMFHxjPBIB20wTVmoMHR7KvBeEiWwQhSU8HJXwTBRmEOwvt/n8He5AvCEmkzwRFYLMFP2jjBB2jNwUhwCcHCUx/BkZ1BwS/r+8HIX+zBp2bgwBtyGMFz3uzBfjsdwrhsBsL9H4XBFm/gwCr328F+uDXBG+PVwfXgMsBxZoXB3uK7wUlJy8G5tZfBrr9WwdpAB8Kf25jBFjRTwZYhFMLJqETBSQqKwVzvgMG0h8LBQviZwaHCvMF6YfFBG99XwYO7rED1LIpAw8RDQfYE2MES+hxB+zNGQZVjV8GiYZlBWhUewdLeg0L2raXBFWJuQYfGiEFtzPA+5Vv9QLZDQEBAPNvAqOAdQhkNt0GtM2Y/0DAKwcJfiDz6aK9Bq5PjQZgApL+vrarBAM9XQWDnfkGLJypAEarxQGiMM8Fl3AVCa1cFQRak7sG81P/BAda1vyO3rUG3kZ9AtBOFQaFkj0BBB3FBcM2xQcErfUFgZpnA8hztQGiOUz+2zH9Btr0OQU0dIcF5D8nBPl0NwFnHEMHbNTXBctmqQOcoVkEZTpe/rm6oQf3lMkLBYKzB4n1PQWOmTsGL/9pAMKOjQNblqEHonttBRH0TwTtsyr9PEVjBamqFQTvAjEE58LlBySEPwc+MMsE5Jp1AtB+EwfsaQkEyvwDBhYkvwSrkG0CDGjhCel49vrQNw0DUXDRBUSHpwHhJh0An1ctBvQYLQUOvykANcZtB1qA7QVZ5xkDnAWHB69YawQXDB8HiuYJBA0SPQN78tMHHIA3BGP23QTiktUDfiIjB0iqFwReXqr8LJwBCaoNxQmRIVkLdtxxC9Cb0QJj8VUHmOUJAigJvwayBaMDxXHNBeQHaQOArPcH5hae/qLkJQnlKnsDest1BgmlkwZx2X0Gi8dVBQ+vLQTCFAUHa30C/bEbDwCJ/BEJbCRlCA9HXP57QnMHOtmVCJoeXQT9qHkJgGhs/lNXDQXrE6UEC7ZjBAoGRwZgLw0H6aXS+BQ3YwaUI+kGe48hBScYqQjw9wkDdMvJBYfOBQUmNq8FQlCtBaX2YQbwUTUJt4sxBDwSAQcIHl0Fz9TDBNTFeQbCzGkHQqZ1A6h3wvvt5L0KPAbRBA4QrwGhX1cADW9NAosItwaelbkFq6NVADy28QUEfkMBafZDBOolTQYt7K0HGhfJBFUwXQDKI7kEfBBrAlcpMwW4+vkFuS2FB8HBJQFVhQMGlU6ZB9gqAQeOtUMBztutBXfxUQVQRAUG4jG9ARCeSQVaX1b4hUZZAWkwNQSdF7UBYOb7ALOyPP8YQScGvYbdACM+bwYfXwUEPd8lBLp/RQdnsWEHwDcNAOCTiwewGx0G2VhtCSYSLvzaHIr/ybLZATErGQc9jE0GQ3cs/EH7WQGaGEkEkcra/R9J4Qau7AkIgKx9B3VH6QFOPcEKp3nVBWTxMwaXdl0C7BM3AjxejQdpShcE73zBBThCcQdPZ28FKZDZBVJMBwfualEG7WEBCCLNWQaDgEsKZ9b9BqbYeQT7PGcC1a+hBGEX2wH7NpMEAnhhCq9yxQQfWAkHXJ9NBoAQvvgcs1UFtvsZBR8bswOr5C8F8mwFCNsbHQaS2xsAWWzrBakuBQYEmIkF1uyBA1kIZQcIQpUHjLqxB11u2wdDaGEGki2rBqQvowFKE/UEF3uTA/QlOQbfkREHF1qnAcfJSwfn9JEHNnAW/em6hQLmFvEHho/hAhGgGwrxrskGMrYdAq+dJPqW0ikEI1KhBNEIGQnMRU8FS+YxBPWttQFaPhEFDiB0/KnFAwed2ocDA6pLA2/j6wVup3762GYBBzCsYP2mouL/+DYM+ifhJwExf3cApzf/Ad6imQHPbaMDP3qlBWtGwwPB2RLxF8MQ/z1edPwelI0FAOb7AooLSQCTktcAEG+tBE6KHQe6s6sAForFB2UikQPlGAsIRfDrBgw2/QVpveUG6WAQ/uC4kQWlphEG8ZlNCVlA3QoLjZMECaQRCqrgewRwT20H8iBzBZCHcQYkcvsDs+BVBKRO5Pnq13EHsvGNB1JHXwMqe8UHmsEBB+kcgwK+nukC0giPBgSumwd7f2EEZSVfBf5IwQfsLmkFQs5ZB24+6QaRrtcHkLLM/L4jVPpzdc0EnNt9A05X6wJyRT0IZe+S/++BfwRv3JsAaKppBA+MoQTSR08EF1NtBJn8dQRmRAMLHw8vAwRgIQkc6A0L2PgvAtHKnQV0/m0HgNDLBpomjvxt1eT/eovVB6YYPQdx4XsGZrR5Ah7vBwL5Zt0GfdYNADTknwWzvBkJHyhNCrmB9QYycWsGf94NBKJ8GQfZSuUHyNjHALCVfQS9jyEBaKmBB9PeuQfytnMHmA2vAqiOPQdwq1MB6DCfApxEDwVuxP0FgqKRByOkUQDtUPsF3HUPASOiHQRsLdEEu8JG/fk+eQcgZG78hyFxBnvP+P9u3hkE2CxLBh6moQW7lJcFvYgRBvgyiwMaYlUAblTtCcEYEQfXyXsHC6oFA7QrSQfCLokEdI19B4F/ywMjGyUG1RPhB6NDIQYXc+MBE7idBzFMtQcZtRMHG7JbBEEp9wUOI28GxW/1BZ/10QVdqfkEbsaBA8H28QYEzg0BsYi9BlmH0QArmQ0F99D5Bcu84Qq6mvMHDENBAaqJqQtEfOEF6EhlC60T8QSHrQkAcDtJA0yKrQCTNAcFA2FPAVTKkwbfrK0Lqs+BAogAev5E7DkFiBInARb0bQaMw0D/FrhfBn6dnwSlKOULEP5TAnYCkQXvhFcGkDrVBdn0fQFGpqsGYLDBBp6VxwRKY0EFBFo/BZ6WlQYSpgsAaAKPAi164QXWXlUFE76g/k1uhwUpLW0HuHXTB+IstQfpDzUC9XW/ACTA9QclM8kGxhwzBDwQTQRLxk0ELp4BBqc3LQQPHFEKkxiZCKy7RQPK6AcF0HCNCwHm7QfOCQ0E1ylFBBTgiwV7njEGt8rVB9abWQV4XzT/6Fu5AXX0kwedACML+Ev3BUKUGQC7KQULFOA9BMpncQb/n40FiF9pBwQclwRE6UkH+mjRBWJalQMcx/EArevXAODoLwXKIVUFfzYBAO1nWQG0+kEEjWqhBtsuvwD+qCEFhaRZA7nn6voqpwEHRZBZBt89Dv1JEOMFfYavB1u4tQO5TlD8xF1vA7k2ZvpIQG8FB629B6KemQVoelUFl67VBfZqKQU6UV0EAhBHBNCwEQin06UBfib1AsfsGQbsUb8BKyAlBSaN0QW63zcDjTAhBvaAGwZD+RkAOGxvBKlIIQo1aLEJMy39CqhEAQoOoxsCtt+NADdXiPymzDMGoeoDBnjwJwYaWQcF4GShB+8sdvzs1JcGnHjHByDiAwBwcl0EWbVvAegd1wO9soEG9BwrCBmWiQd2Bib+8jYjBw2Jhv5Adu8G4BIHBv0z5wWZ3BcKElV/BMe6wQAPnvcGmp5JAqHhTwQ=="]}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
apiVersion: serving.kserve.io/v1alpha1
kind: ServingRuntime
metadata:
name: triton-kserve-grpc
spec:
annotations:
prometheus.kserve.io/path: /metrics
prometheus.kserve.io/port: "8002"
containers:
- args:
- tritonserver
- --model-store=/mnt/models
- --grpc-port=9000
- --http-port=8080
- --allow-grpc=true
- --allow-http=true
image: nvcr.io/nvidia/tritonserver:23.05-py3
name: kserve-container
ports:
- containerPort: 9000
name: h2c
protocol: TCP
volumeMounts:
- mountPath: /dev/shm
name: shm
resources:
limits:
cpu: "1"
memory: 2Gi
requests:
cpu: "1"
memory: 2Gi
protocolVersions:
- v2
- grpc-v2
supportedModelFormats:
- autoSelect: true
name: tensorrt
priority: 1
version: "8"
- autoSelect: true
name: tensorflow
priority: 1
version: "1"
- autoSelect: true
name: tensorflow
priority: 1
version: "2"
- autoSelect: true
name: onnx
priority: 1
version: "1"
- name: pytorch
version: "1"
- autoSelect: true
name: triton
priority: 1
version: "2"
volumes:
- emptyDir: null
medium: Memory
sizeLimit: 2Gi
name: shm
Loading

0 comments on commit 2b2b2ed

Please sign in to comment.