-
Notifications
You must be signed in to change notification settings - Fork 81
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added onnx model gRPC protocol test on triton #1790
Merged
Merged
Changes from 11 commits
Commits
Show all changes
18 commits
Select commit
Hold shift + click to select a range
ee600cb
Initial commit
Raghul-M 1fa3c31
initial commit
Raghul-M 743488e
Initial changes
Raghul-M 49bad00
merged changes
Raghul-M 58a99e3
modified files
Raghul-M 0747689
changes
Raghul-M a3eaece
added proto file
Raghul-M 422caa1
modified code
Raghul-M 6c7ed8b
changes
Raghul-M 71f1987
resolved merge conflicts
Raghul-M e1f291a
inference comparison
Raghul-M a574c77
resolved comparison and added delete step
Raghul-M 3e1a843
resolved comparison and added delete step
Raghul-M 79d205d
resolved comparison
Raghul-M fb91233
removed sleep
Raghul-M b5de823
xpath changes
Raghul-M c007ab3
RunTHisTest removed
Raghul-M 4b75415
replaced sleep with timeout
Raghul-M File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
326 changes: 326 additions & 0 deletions
326
ods_ci/tests/Resources/Files/triton/grpc_predict_v2.proto
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,326 @@ | ||
// Copyright 2020 kubeflow.org. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
syntax = "proto3"; | ||
package inference; | ||
|
||
// Inference Server GRPC endpoints. | ||
service GRPCInferenceService | ||
{ | ||
// The ServerLive API indicates if the inference server is able to receive | ||
// and respond to metadata and inference requests. | ||
rpc ServerLive(ServerLiveRequest) returns (ServerLiveResponse) {} | ||
|
||
// The ServerReady API indicates if the server is ready for inferencing. | ||
rpc ServerReady(ServerReadyRequest) returns (ServerReadyResponse) {} | ||
|
||
// The ModelReady API indicates if a specific model is ready for inferencing. | ||
rpc ModelReady(ModelReadyRequest) returns (ModelReadyResponse) {} | ||
|
||
// The ServerMetadata API provides information about the server. Errors are | ||
// indicated by the google.rpc.Status returned for the request. The OK code | ||
// indicates success and other codes indicate failure. | ||
rpc ServerMetadata(ServerMetadataRequest) returns (ServerMetadataResponse) {} | ||
|
||
// The per-model metadata API provides information about a model. Errors are | ||
// indicated by the google.rpc.Status returned for the request. The OK code | ||
// indicates success and other codes indicate failure. | ||
rpc ModelMetadata(ModelMetadataRequest) returns (ModelMetadataResponse) {} | ||
|
||
// The ModelInfer API performs inference using the specified model. Errors are | ||
// indicated by the google.rpc.Status returned for the request. The OK code | ||
// indicates success and other codes indicate failure. | ||
rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {} | ||
} | ||
|
||
message ServerLiveRequest {} | ||
|
||
message ServerLiveResponse | ||
{ | ||
// True if the inference server is live, false if not live. | ||
bool live = 1; | ||
} | ||
|
||
message ServerReadyRequest {} | ||
|
||
message ServerReadyResponse | ||
{ | ||
// True if the inference server is ready, false if not ready. | ||
bool ready = 1; | ||
} | ||
|
||
message ModelReadyRequest | ||
{ | ||
// The name of the model to check for readiness. | ||
string name = 1; | ||
|
||
// The version of the model to check for readiness. If not given the | ||
// server will choose a version based on the model and internal policy. | ||
string version = 2; | ||
} | ||
|
||
message ModelReadyResponse | ||
{ | ||
// True if the model is ready, false if not ready. | ||
bool ready = 1; | ||
} | ||
|
||
message ServerMetadataRequest {} | ||
|
||
message ServerMetadataResponse | ||
{ | ||
// The server name. | ||
string name = 1; | ||
|
||
// The server version. | ||
string version = 2; | ||
|
||
// The extensions supported by the server. | ||
repeated string extensions = 3; | ||
} | ||
|
||
message ModelMetadataRequest | ||
{ | ||
// The name of the model. | ||
string name = 1; | ||
|
||
// The version of the model to check for readiness. If not given the | ||
// server will choose a version based on the model and internal policy. | ||
string version = 2; | ||
} | ||
|
||
message ModelMetadataResponse | ||
{ | ||
// Metadata for a tensor. | ||
message TensorMetadata | ||
{ | ||
// The tensor name. | ||
string name = 1; | ||
|
||
// The tensor data type. | ||
string datatype = 2; | ||
|
||
// The tensor shape. A variable-size dimension is represented | ||
// by a -1 value. | ||
repeated int64 shape = 3; | ||
} | ||
|
||
// The model name. | ||
string name = 1; | ||
|
||
// The versions of the model available on the server. | ||
repeated string versions = 2; | ||
|
||
// The model's platform. See Platforms. | ||
string platform = 3; | ||
|
||
// The model's inputs. | ||
repeated TensorMetadata inputs = 4; | ||
|
||
// The model's outputs. | ||
repeated TensorMetadata outputs = 5; | ||
} | ||
|
||
message ModelInferRequest | ||
{ | ||
// An input tensor for an inference request. | ||
message InferInputTensor | ||
{ | ||
// The tensor name. | ||
string name = 1; | ||
|
||
// The tensor data type. | ||
string datatype = 2; | ||
|
||
// The tensor shape. | ||
repeated int64 shape = 3; | ||
|
||
// Optional inference input tensor parameters. | ||
map<string, InferParameter> parameters = 4; | ||
|
||
// The tensor contents using a data-type format. This field must | ||
// not be specified if "raw" tensor contents are being used for | ||
// the inference request. | ||
InferTensorContents contents = 5; | ||
} | ||
|
||
// An output tensor requested for an inference request. | ||
message InferRequestedOutputTensor | ||
{ | ||
// The tensor name. | ||
string name = 1; | ||
|
||
// Optional requested output tensor parameters. | ||
map<string, InferParameter> parameters = 2; | ||
} | ||
|
||
// The name of the model to use for inferencing. | ||
string model_name = 1; | ||
|
||
// The version of the model to use for inference. If not given the | ||
// server will choose a version based on the model and internal policy. | ||
string model_version = 2; | ||
|
||
// Optional identifier for the request. If specified will be | ||
// returned in the response. | ||
string id = 3; | ||
|
||
// Optional inference parameters. | ||
map<string, InferParameter> parameters = 4; | ||
|
||
// The input tensors for the inference. | ||
repeated InferInputTensor inputs = 5; | ||
|
||
// The requested output tensors for the inference. Optional, if not | ||
// specified all outputs produced by the model will be returned. | ||
repeated InferRequestedOutputTensor outputs = 6; | ||
|
||
// The data contained in an input tensor can be represented in "raw" | ||
// bytes form or in the repeated type that matches the tensor's data | ||
// type. To use the raw representation 'raw_input_contents' must be | ||
// initialized with data for each tensor in the same order as | ||
// 'inputs'. For each tensor, the size of this content must match | ||
// what is expected by the tensor's shape and data type. The raw | ||
// data must be the flattened, one-dimensional, row-major order of | ||
// the tensor elements without any stride or padding between the | ||
// elements. Note that the FP16 and BF16 data types must be represented as | ||
// raw content as there is no specific data type for a 16-bit float type. | ||
// | ||
// If this field is specified then InferInputTensor::contents must | ||
// not be specified for any input tensor. | ||
repeated bytes raw_input_contents = 7; | ||
} | ||
|
||
message ModelInferResponse | ||
{ | ||
// An output tensor returned for an inference request. | ||
message InferOutputTensor | ||
{ | ||
// The tensor name. | ||
string name = 1; | ||
|
||
// The tensor data type. | ||
string datatype = 2; | ||
|
||
// The tensor shape. | ||
repeated int64 shape = 3; | ||
|
||
// Optional output tensor parameters. | ||
map<string, InferParameter> parameters = 4; | ||
|
||
// The tensor contents using a data-type format. This field must | ||
// not be specified if "raw" tensor contents are being used for | ||
// the inference response. | ||
InferTensorContents contents = 5; | ||
} | ||
|
||
// The name of the model used for inference. | ||
string model_name = 1; | ||
|
||
// The version of the model used for inference. | ||
string model_version = 2; | ||
|
||
// The id of the inference request if one was specified. | ||
string id = 3; | ||
|
||
// Optional inference response parameters. | ||
map<string, InferParameter> parameters = 4; | ||
|
||
// The output tensors holding inference results. | ||
repeated InferOutputTensor outputs = 5; | ||
|
||
// The data contained in an output tensor can be represented in | ||
// "raw" bytes form or in the repeated type that matches the | ||
// tensor's data type. To use the raw representation 'raw_output_contents' | ||
// must be initialized with data for each tensor in the same order as | ||
// 'outputs'. For each tensor, the size of this content must match | ||
// what is expected by the tensor's shape and data type. The raw | ||
// data must be the flattened, one-dimensional, row-major order of | ||
// the tensor elements without any stride or padding between the | ||
// elements. Note that the FP16 and BF16 data types must be represented as | ||
// raw content as there is no specific data type for a 16-bit float type. | ||
// | ||
// If this field is specified then InferOutputTensor::contents must | ||
// not be specified for any output tensor. | ||
repeated bytes raw_output_contents = 6; | ||
} | ||
|
||
// An inference parameter value. The Parameters message describes a | ||
// “name”/”value” pair, where the “name” is the name of the parameter | ||
// and the “value” is a boolean, integer, or string corresponding to | ||
// the parameter. | ||
message InferParameter | ||
{ | ||
// The parameter value can be a string, an int64, a boolean | ||
// or a message specific to a predefined parameter. | ||
oneof parameter_choice | ||
{ | ||
// A boolean parameter value. | ||
bool bool_param = 1; | ||
|
||
// An int64 parameter value. | ||
int64 int64_param = 2; | ||
|
||
// A string parameter value. | ||
string string_param = 3; | ||
} | ||
} | ||
|
||
// The data contained in a tensor represented by the repeated type | ||
// that matches the tensor's data type. Protobuf oneof is not used | ||
// because oneofs cannot contain repeated fields. | ||
message InferTensorContents | ||
{ | ||
// Representation for BOOL data type. The size must match what is | ||
// expected by the tensor's shape. The contents must be the flattened, | ||
// one-dimensional, row-major order of the tensor elements. | ||
repeated bool bool_contents = 1; | ||
|
||
// Representation for INT8, INT16, and INT32 data types. The size | ||
// must match what is expected by the tensor's shape. The contents | ||
// must be the flattened, one-dimensional, row-major order of the | ||
// tensor elements. | ||
repeated int32 int_contents = 2; | ||
|
||
// Representation for INT64 data types. The size must match what | ||
// is expected by the tensor's shape. The contents must be the | ||
// flattened, one-dimensional, row-major order of the tensor elements. | ||
repeated int64 int64_contents = 3; | ||
|
||
// Representation for UINT8, UINT16, and UINT32 data types. The size | ||
// must match what is expected by the tensor's shape. The contents | ||
// must be the flattened, one-dimensional, row-major order of the | ||
// tensor elements. | ||
repeated uint32 uint_contents = 4; | ||
|
||
// Representation for UINT64 data types. The size must match what | ||
// is expected by the tensor's shape. The contents must be the | ||
// flattened, one-dimensional, row-major order of the tensor elements. | ||
repeated uint64 uint64_contents = 5; | ||
|
||
// Representation for FP32 data type. The size must match what is | ||
// expected by the tensor's shape. The contents must be the flattened, | ||
// one-dimensional, row-major order of the tensor elements. | ||
repeated float fp32_contents = 6; | ||
|
||
// Representation for FP64 data type. The size must match what is | ||
// expected by the tensor's shape. The contents must be the flattened, | ||
// one-dimensional, row-major order of the tensor elements. | ||
repeated double fp64_contents = 7; | ||
|
||
// Representation for BYTES data type. The size must match what is | ||
// expected by the tensor's shape. The contents must be the flattened, | ||
// one-dimensional, row-major order of the tensor elements. | ||
repeated bytes bytes_contents = 8; | ||
} |
1 change: 1 addition & 0 deletions
1
ods_ci/tests/Resources/Files/triton/kserve-triton-onnx-gRPC-input.json
Large diffs are not rendered by default.
Oops, something went wrong.
1 change: 1 addition & 0 deletions
1
ods_ci/tests/Resources/Files/triton/kserve-triton-onnx-gRPC-output.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"modelName":"densenet_onnx","modelVersion":"1","id":"test1","outputs":[{"name":"fc6_1","datatype":"FP32","shape":["1000"]}],"rawOutputContents":["crbswY2eFMHy2ljBr3gZwk41nsGFaUfAwRMFwmtDScGn5h7BrHeHwdxaF8GCdt7A4DwkwezErcF98U/BQW4Iwbxw88BHxz7BmN4fwVPN7sGNiG7B5blvwUd0P8F3MqTB4RxUwbKIucHzFATCn6r7wTvnpcH5wczBxbHcwZmxvsE2KabByCZewaYnrkAMlIzBQuTfv1Mozz84gQ9B+IBkwanLXMGIUhXBIDmSwG6UfcHxIofBeiZKwTf1KEDOJ0PBAunpwdgtwMF3dCLB7/CNQDjkUEEnZu5AYr9PQCD7DUATXPlByjflwEOTasGPwhs95/zYQfriPcEGbXTBrvKpQHfmI8F/0ofBT3UfQZVHAEBHosw+kavuwUTvScGkB8a/m3UEwWWaT0B+3q3ApvsUQfTd6b/5zw5A6lbTwN7mC0Eq3wvCr6kJwovtc8DlbVjBtE8RwjkXA8JVDobBiAFfwUy7h8HLr9vBOZ5/waKBC8KYJojAJNBDwSJe6D/kgLrBVIeNwDm/z8BMv2HByQWnwFvF3sArnAnCQ0/QwSzPAcKd45PBIta0wYjICsLMab/BMNVWwb8KC8AVgiTCbRhSwSSN60BG/ePAse6rwXwZvMGfAgbCeYuTvvCQLcDvJonAP6qNwUsRFj9HFY1ARftxwaXOAsE46/rB8EmFwaBmz8FmkaPB/XuXwd+XTcEIDQbCXAW8wdX6ZcDhwQTCy3VCwWxDQcFKUszA6aYCwrWFpUDvVpjB6ondP0pUQcE9BWvAG6TYwcASsMEJa5nBsIC6waho8cGww+PB6ta6wbxBxjyvvOlAnwBXQeRlTcDGOCvA5cOpwJj/KkLaobG+18rdwDbLMkC0W4jALF6uwP5p2cBdSqbBh512wAbP1sGLRZrB2UxhwcfuGcFKOMnBPlh+wRB3p8Gl1jbBEQ+/v4NMJkGoerG+ApdbwcTZDcExhpPB7NQLwpOJ777S7IHBOLQNQUUNo79UXRrAc8wNwKkJCkG5X0BAFNUSwchLTsDHdTlAqJxSwHkrlkDtUI+8MjQ1wX4DVD5nY/xAmfPhQHht8T+0+4hBlbOEwCi+V0Gy8au9WvGtQAHNO8FG+6TBt100wV4Dk8GOEp3BxxekwT/52b92r8rB3CUmwby4LcFJdnHB0cwJQbTjv8CeYh3B82xmQAfyS0HNhSlBWaenwOTCW0AoQB3Bcd7xwIXQrUHo3ie/LGRiQXphF0EytZNBkQ8DQTASp8BfMo9AiR8iwaMbmEAkx9nAyzfyP1xFTsENAGNBzM+Tv37pjb5UCM7APYwVwW0UukCEoc5ACX+BwVWM/cBEcZxA56qHP2UiNUCSYzrBfIGswEJn1kHwkpnA501/weD5m0Bt8CzBz5r8QNNSzUB+x/s/9ymbP48J5b6ae4ZBzri0QcTDp0FCtGZBrvSlQaGtrcFRjJ7BmdHawf0zYMHKkjnB+ECGwQxM78DugSbBTOnJwZPTPsDMZMDB1QwzwbaenMFAwo9BXFwVQRrQmz/0OL/BCHB/P84i5sFUmP7AEAsswWX0hcHqmJrBfS+TwXUyicEepmVA1vS2wQOJ88GHK7/BZs/BwclFm8GRoNLBn+QDwXdfqsB+nmFAFqxNQRmHkMFU5qLBJCnOwOB7q0DQS/bAzTeDwdm0uEGFOEVBJLugQVeFtsCqObTA9YWtwKTqEMLAyprBPml2wdO0WUC/t+rA5mMbQXFtkMFeyZRBwfWywSstE8FdGsC/1+eKQRPUq7+c/ObBCZBBwNaKsMDpu9hADkbHv29S1MF/iTrBqs4TwvL5BsJd3pTBmITIwSpjZ8Flo9/Bbm20wXY3HMI29RPCpuICwup/LMI3wPXBZ9DOwUjeEMLMa/7Bz7gFwp9wfMFHxjPBIB20wTVmoMHR7KvBeEiWwQhSU8HJXwTBRmEOwvt/n8He5AvCEmkzwRFYLMFP2jjBB2jNwUhwCcHCUx/BkZ1BwS/r+8HIX+zBp2bgwBtyGMFz3uzBfjsdwrhsBsL9H4XBFm/gwCr328F+uDXBG+PVwfXgMsBxZoXB3uK7wUlJy8G5tZfBrr9WwdpAB8Kf25jBFjRTwZYhFMLJqETBSQqKwVzvgMG0h8LBQviZwaHCvMF6YfFBG99XwYO7rED1LIpAw8RDQfYE2MES+hxB+zNGQZVjV8GiYZlBWhUewdLeg0L2raXBFWJuQYfGiEFtzPA+5Vv9QLZDQEBAPNvAqOAdQhkNt0GtM2Y/0DAKwcJfiDz6aK9Bq5PjQZgApL+vrarBAM9XQWDnfkGLJypAEarxQGiMM8Fl3AVCa1cFQRak7sG81P/BAda1vyO3rUG3kZ9AtBOFQaFkj0BBB3FBcM2xQcErfUFgZpnA8hztQGiOUz+2zH9Btr0OQU0dIcF5D8nBPl0NwFnHEMHbNTXBctmqQOcoVkEZTpe/rm6oQf3lMkLBYKzB4n1PQWOmTsGL/9pAMKOjQNblqEHonttBRH0TwTtsyr9PEVjBamqFQTvAjEE58LlBySEPwc+MMsE5Jp1AtB+EwfsaQkEyvwDBhYkvwSrkG0CDGjhCel49vrQNw0DUXDRBUSHpwHhJh0An1ctBvQYLQUOvykANcZtB1qA7QVZ5xkDnAWHB69YawQXDB8HiuYJBA0SPQN78tMHHIA3BGP23QTiktUDfiIjB0iqFwReXqr8LJwBCaoNxQmRIVkLdtxxC9Cb0QJj8VUHmOUJAigJvwayBaMDxXHNBeQHaQOArPcH5hae/qLkJQnlKnsDest1BgmlkwZx2X0Gi8dVBQ+vLQTCFAUHa30C/bEbDwCJ/BEJbCRlCA9HXP57QnMHOtmVCJoeXQT9qHkJgGhs/lNXDQXrE6UEC7ZjBAoGRwZgLw0H6aXS+BQ3YwaUI+kGe48hBScYqQjw9wkDdMvJBYfOBQUmNq8FQlCtBaX2YQbwUTUJt4sxBDwSAQcIHl0Fz9TDBNTFeQbCzGkHQqZ1A6h3wvvt5L0KPAbRBA4QrwGhX1cADW9NAosItwaelbkFq6NVADy28QUEfkMBafZDBOolTQYt7K0HGhfJBFUwXQDKI7kEfBBrAlcpMwW4+vkFuS2FB8HBJQFVhQMGlU6ZB9gqAQeOtUMBztutBXfxUQVQRAUG4jG9ARCeSQVaX1b4hUZZAWkwNQSdF7UBYOb7ALOyPP8YQScGvYbdACM+bwYfXwUEPd8lBLp/RQdnsWEHwDcNAOCTiwewGx0G2VhtCSYSLvzaHIr/ybLZATErGQc9jE0GQ3cs/EH7WQGaGEkEkcra/R9J4Qau7AkIgKx9B3VH6QFOPcEKp3nVBWTxMwaXdl0C7BM3AjxejQdpShcE73zBBThCcQdPZ28FKZDZBVJMBwfualEG7WEBCCLNWQaDgEsKZ9b9BqbYeQT7PGcC1a+hBGEX2wH7NpMEAnhhCq9yxQQfWAkHXJ9NBoAQvvgcs1UFtvsZBR8bswOr5C8F8mwFCNsbHQaS2xsAWWzrBakuBQYEmIkF1uyBA1kIZQcIQpUHjLqxB11u2wdDaGEGki2rBqQvowFKE/UEF3uTA/QlOQbfkREHF1qnAcfJSwfn9JEHNnAW/em6hQLmFvEHho/hAhGgGwrxrskGMrYdAq+dJPqW0ikEI1KhBNEIGQnMRU8FS+YxBPWttQFaPhEFDiB0/KnFAwed2ocDA6pLA2/j6wVup3762GYBBzCsYP2mouL/+DYM+ifhJwExf3cApzf/Ad6imQHPbaMDP3qlBWtGwwPB2RLxF8MQ/z1edPwelI0FAOb7AooLSQCTktcAEG+tBE6KHQe6s6sAForFB2UikQPlGAsIRfDrBgw2/QVpveUG6WAQ/uC4kQWlphEG8ZlNCVlA3QoLjZMECaQRCqrgewRwT20H8iBzBZCHcQYkcvsDs+BVBKRO5Pnq13EHsvGNB1JHXwMqe8UHmsEBB+kcgwK+nukC0giPBgSumwd7f2EEZSVfBf5IwQfsLmkFQs5ZB24+6QaRrtcHkLLM/L4jVPpzdc0EnNt9A05X6wJyRT0IZe+S/++BfwRv3JsAaKppBA+MoQTSR08EF1NtBJn8dQRmRAMLHw8vAwRgIQkc6A0L2PgvAtHKnQV0/m0HgNDLBpomjvxt1eT/eovVB6YYPQdx4XsGZrR5Ah7vBwL5Zt0GfdYNADTknwWzvBkJHyhNCrmB9QYycWsGf94NBKJ8GQfZSuUHyNjHALCVfQS9jyEBaKmBB9PeuQfytnMHmA2vAqiOPQdwq1MB6DCfApxEDwVuxP0FgqKRByOkUQDtUPsF3HUPASOiHQRsLdEEu8JG/fk+eQcgZG78hyFxBnvP+P9u3hkE2CxLBh6moQW7lJcFvYgRBvgyiwMaYlUAblTtCcEYEQfXyXsHC6oFA7QrSQfCLokEdI19B4F/ywMjGyUG1RPhB6NDIQYXc+MBE7idBzFMtQcZtRMHG7JbBEEp9wUOI28GxW/1BZ/10QVdqfkEbsaBA8H28QYEzg0BsYi9BlmH0QArmQ0F99D5Bcu84Qq6mvMHDENBAaqJqQtEfOEF6EhlC60T8QSHrQkAcDtJA0yKrQCTNAcFA2FPAVTKkwbfrK0Lqs+BAogAev5E7DkFiBInARb0bQaMw0D/FrhfBn6dnwSlKOULEP5TAnYCkQXvhFcGkDrVBdn0fQFGpqsGYLDBBp6VxwRKY0EFBFo/BZ6WlQYSpgsAaAKPAi164QXWXlUFE76g/k1uhwUpLW0HuHXTB+IstQfpDzUC9XW/ACTA9QclM8kGxhwzBDwQTQRLxk0ELp4BBqc3LQQPHFEKkxiZCKy7RQPK6AcF0HCNCwHm7QfOCQ0E1ylFBBTgiwV7njEGt8rVB9abWQV4XzT/6Fu5AXX0kwedACML+Ev3BUKUGQC7KQULFOA9BMpncQb/n40FiF9pBwQclwRE6UkH+mjRBWJalQMcx/EArevXAODoLwXKIVUFfzYBAO1nWQG0+kEEjWqhBtsuvwD+qCEFhaRZA7nn6voqpwEHRZBZBt89Dv1JEOMFfYavB1u4tQO5TlD8xF1vA7k2ZvpIQG8FB629B6KemQVoelUFl67VBfZqKQU6UV0EAhBHBNCwEQin06UBfib1AsfsGQbsUb8BKyAlBSaN0QW63zcDjTAhBvaAGwZD+RkAOGxvBKlIIQo1aLEJMy39CqhEAQoOoxsCtt+NADdXiPymzDMGoeoDBnjwJwYaWQcF4GShB+8sdvzs1JcGnHjHByDiAwBwcl0EWbVvAegd1wO9soEG9BwrCBmWiQd2Bib+8jYjBw2Jhv5Adu8G4BIHBv0z5wWZ3BcKElV/BMe6wQAPnvcGmp5JAqHhTwQ=="]} | ||
63 changes: 63 additions & 0 deletions
63
ods_ci/tests/Resources/Files/triton/triton_onnx_gRPC_servingruntime.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
apiVersion: serving.kserve.io/v1alpha1 | ||
kind: ServingRuntime | ||
metadata: | ||
name: triton-kserve-grpc | ||
spec: | ||
annotations: | ||
prometheus.kserve.io/path: /metrics | ||
prometheus.kserve.io/port: "8002" | ||
containers: | ||
- args: | ||
- tritonserver | ||
- --model-store=/mnt/models | ||
- --grpc-port=9000 | ||
- --http-port=8080 | ||
- --allow-grpc=true | ||
- --allow-http=true | ||
image: nvcr.io/nvidia/tritonserver:23.05-py3 | ||
name: kserve-container | ||
ports: | ||
- containerPort: 9000 | ||
name: h2c | ||
protocol: TCP | ||
volumeMounts: | ||
- mountPath: /dev/shm | ||
name: shm | ||
resources: | ||
limits: | ||
cpu: "1" | ||
memory: 2Gi | ||
requests: | ||
cpu: "1" | ||
memory: 2Gi | ||
protocolVersions: | ||
- v2 | ||
- grpc-v2 | ||
supportedModelFormats: | ||
- autoSelect: true | ||
name: tensorrt | ||
priority: 1 | ||
version: "8" | ||
- autoSelect: true | ||
name: tensorflow | ||
priority: 1 | ||
version: "1" | ||
- autoSelect: true | ||
name: tensorflow | ||
priority: 1 | ||
version: "2" | ||
- autoSelect: true | ||
name: onnx | ||
priority: 1 | ||
version: "1" | ||
- name: pytorch | ||
version: "1" | ||
- autoSelect: true | ||
name: triton | ||
priority: 1 | ||
version: "2" | ||
volumes: | ||
- emptyDir: null | ||
medium: Memory | ||
sizeLimit: 2Gi | ||
name: shm |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
isn't the input/output the same as the ones used in REST test?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's different
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why? because it is using a different model? otherwise I don't expect input to change based on the protocol
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The input is different, so the output is also different for REST and gRPC
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yeah ofc. I meant to say that I was expecting the input (and then the output ofc) for the same model to be the same regardless the protocol