From 4823f0449645cdb7cbf864a9e78be094315cf225 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Thu, 25 Aug 2022 16:56:40 -0700 Subject: [PATCH] feat: Add V2 Inference API support for OVMS runtime Motivation Intel's OpenVINO Model Server has historically only supported the v1 (TFS) prediction API, but may also support v2 in upcoming versions. Modifications Adjust the built-in OVMS adapter to advertise the V2 gRPC Service APIs in addition to V1. Result KServe V2 Inference API will work with OVMS in ModelMesh Serving once supported by the OVMS container. Signed-off-by: Nick Hill --- model-mesh-ovms-adapter/server/const.go | 6 ++++-- model-mesh-ovms-adapter/server/server.go | 8 ++++++-- model-mesh-triton-adapter/server/server.go | 2 -- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/model-mesh-ovms-adapter/server/const.go b/model-mesh-ovms-adapter/server/const.go index 1b05d540..8221e648 100644 --- a/model-mesh-ovms-adapter/server/const.go +++ b/model-mesh-ovms-adapter/server/const.go @@ -14,6 +14,8 @@ package server const ( - ovmsModelSubdir string = "_ovms_models" - onnxModelFilename string = "model.onnx" + tfsGrpcServiceName string = "tensorflow.serving.PredictionService" + kServeV2GrpcServiceName string = "inference.GRPCInferenceService" + ovmsModelSubdir string = "_ovms_models" + onnxModelFilename string = "model.onnx" ) diff --git a/model-mesh-ovms-adapter/server/server.go b/model-mesh-ovms-adapter/server/server.go index 8a71bc4c..679cf397 100644 --- a/model-mesh-ovms-adapter/server/server.go +++ b/model-mesh-ovms-adapter/server/server.go @@ -221,10 +221,14 @@ func (s *OvmsAdapterServer) RuntimeStatus(ctx context.Context, req *mmesh.Runtim runtimeStatus.RuntimeVersion = s.AdapterConfig.RuntimeVersion runtimeStatus.LimitModelConcurrency = s.AdapterConfig.LimitModelConcurrency > 0 - // OVMS only supports the Predict API currently + path_1 := []uint32{1} path_1_1 := []uint32{1, 1} // PredictRequest[model_spec][name] mis := make(map[string]*mmesh.RuntimeStatusResponse_MethodInfo) - mis["tensorflow.serving.PredictionService/Predict"] = &mmesh.RuntimeStatusResponse_MethodInfo{IdInjectionPath: path_1_1} + // V1 (TFS) API + mis[tfsGrpcServiceName+"/Predict"] = &mmesh.RuntimeStatusResponse_MethodInfo{IdInjectionPath: path_1_1} + // KServe V2 API + mis[kServeV2GrpcServiceName+"/ModelInfer"] = &mmesh.RuntimeStatusResponse_MethodInfo{IdInjectionPath: path_1} + mis[kServeV2GrpcServiceName+"/ModelMetadata"] = &mmesh.RuntimeStatusResponse_MethodInfo{IdInjectionPath: path_1} runtimeStatus.MethodInfos = mis log.Info("runtimeStatus", "Status", runtimeStatus) diff --git a/model-mesh-triton-adapter/server/server.go b/model-mesh-triton-adapter/server/server.go index 5dc126c4..7c38bb19 100644 --- a/model-mesh-triton-adapter/server/server.go +++ b/model-mesh-triton-adapter/server/server.go @@ -261,8 +261,6 @@ func (s *TritonAdapterServer) RuntimeStatus(ctx context.Context, req *mmesh.Runt path1 := []uint32{1} mis := make(map[string]*mmesh.RuntimeStatusResponse_MethodInfo) - - // only support Transform for now mis[tritonServiceName+"/ModelInfer"] = &mmesh.RuntimeStatusResponse_MethodInfo{IdInjectionPath: path1} mis[tritonServiceName+"/ModelMetadata"] = &mmesh.RuntimeStatusResponse_MethodInfo{IdInjectionPath: path1} runtimeStatus.MethodInfos = mis