From 4823f0449645cdb7cbf864a9e78be094315cf225 Mon Sep 17 00:00:00 2001
From: Nick Hill <nickhill@us.ibm.com>
Date: Thu, 25 Aug 2022 16:56:40 -0700
Subject: [PATCH] feat: Add V2 Inference API support for OVMS runtime

Motivation

Intel's OpenVINO Model Server has historically only supported the v1 (TFS) prediction API, but may also support v2 in upcoming versions.

Modifications

Adjust the built-in OVMS adapter to advertise the V2 gRPC Service APIs in addition to V1.

Result

KServe V2 Inference API will work with OVMS in ModelMesh Serving once supported by the OVMS container.

Signed-off-by: Nick Hill <nickhill@us.ibm.com>
---
 model-mesh-ovms-adapter/server/const.go    | 6 ++++--
 model-mesh-ovms-adapter/server/server.go   | 8 ++++++--
 model-mesh-triton-adapter/server/server.go | 2 --
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/model-mesh-ovms-adapter/server/const.go b/model-mesh-ovms-adapter/server/const.go
index 1b05d540..8221e648 100644
--- a/model-mesh-ovms-adapter/server/const.go
+++ b/model-mesh-ovms-adapter/server/const.go
@@ -14,6 +14,8 @@
 package server
 
 const (
-	ovmsModelSubdir   string = "_ovms_models"
-	onnxModelFilename string = "model.onnx"
+	tfsGrpcServiceName      string = "tensorflow.serving.PredictionService"
+	kServeV2GrpcServiceName string = "inference.GRPCInferenceService"
+	ovmsModelSubdir         string = "_ovms_models"
+	onnxModelFilename       string = "model.onnx"
 )
diff --git a/model-mesh-ovms-adapter/server/server.go b/model-mesh-ovms-adapter/server/server.go
index 8a71bc4c..679cf397 100644
--- a/model-mesh-ovms-adapter/server/server.go
+++ b/model-mesh-ovms-adapter/server/server.go
@@ -221,10 +221,14 @@ func (s *OvmsAdapterServer) RuntimeStatus(ctx context.Context, req *mmesh.Runtim
 	runtimeStatus.RuntimeVersion = s.AdapterConfig.RuntimeVersion
 	runtimeStatus.LimitModelConcurrency = s.AdapterConfig.LimitModelConcurrency > 0
 
-	// OVMS only supports the Predict API currently
+	path_1 := []uint32{1}
 	path_1_1 := []uint32{1, 1} // PredictRequest[model_spec][name]
 	mis := make(map[string]*mmesh.RuntimeStatusResponse_MethodInfo)
-	mis["tensorflow.serving.PredictionService/Predict"] = &mmesh.RuntimeStatusResponse_MethodInfo{IdInjectionPath: path_1_1}
+	// V1 (TFS) API
+	mis[tfsGrpcServiceName+"/Predict"] = &mmesh.RuntimeStatusResponse_MethodInfo{IdInjectionPath: path_1_1}
+	// KServe V2 API
+	mis[kServeV2GrpcServiceName+"/ModelInfer"] = &mmesh.RuntimeStatusResponse_MethodInfo{IdInjectionPath: path_1}
+	mis[kServeV2GrpcServiceName+"/ModelMetadata"] = &mmesh.RuntimeStatusResponse_MethodInfo{IdInjectionPath: path_1}
 	runtimeStatus.MethodInfos = mis
 
 	log.Info("runtimeStatus", "Status", runtimeStatus)
diff --git a/model-mesh-triton-adapter/server/server.go b/model-mesh-triton-adapter/server/server.go
index 5dc126c4..7c38bb19 100644
--- a/model-mesh-triton-adapter/server/server.go
+++ b/model-mesh-triton-adapter/server/server.go
@@ -261,8 +261,6 @@ func (s *TritonAdapterServer) RuntimeStatus(ctx context.Context, req *mmesh.Runt
 	path1 := []uint32{1}
 
 	mis := make(map[string]*mmesh.RuntimeStatusResponse_MethodInfo)
-
-	// only support Transform for now
 	mis[tritonServiceName+"/ModelInfer"] = &mmesh.RuntimeStatusResponse_MethodInfo{IdInjectionPath: path1}
 	mis[tritonServiceName+"/ModelMetadata"] = &mmesh.RuntimeStatusResponse_MethodInfo{IdInjectionPath: path1}
 	runtimeStatus.MethodInfos = mis