Skip to content

Commit

Permalink
feat: Pass ServingRuntime inference endpoint to built-in adapter (#227)
Browse files Browse the repository at this point in the history
#### Motivation

It may be useful for some built-in runtime adapters to have the model server's inferencing endpoint information in addition to the existing "management" port number that's passed (in case it is different).

#### Modifications

- Set a new `RUNTIME_DATA_ENDPOINT` env var on the built-in adapter container
- Parse the `ADAPTER_PORT` env var value from the ServingRuntime `grpcEndpoint` field instead of hardcoding to 8085


Signed-off-by: Nick Hill <[email protected]>
  • Loading branch information
njhill authored Sep 8, 2022
1 parent fc1a494 commit 96ca69d
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 2 deletions.
28 changes: 26 additions & 2 deletions controllers/modelmesh/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ package modelmesh

import (
"fmt"
"regexp"
"strconv"
"strings"

Expand Down Expand Up @@ -187,15 +188,38 @@ func (m *Deployment) addRuntimeToDeployment(deployment *appsv1.Deployment) error
ReadOnly: true,
})

var rtDataEndpoint string
if rt.Spec.GrpcDataEndpoint != nil {
rtDataEndpoint = *rt.Spec.GrpcDataEndpoint
if err := addDomainSocketMount(rt, &builtInAdapterContainer); err != nil {
return err
}
} else {
rtDataEndpoint = fmt.Sprintf("port:%d", rt.Spec.BuiltInAdapter.RuntimeManagementPort)
}

adapterPort := "8085"
if rt.Spec.GrpcMultiModelManagementEndpoint != nil {
ep := *rt.Spec.GrpcMultiModelManagementEndpoint
if match, _ := regexp.MatchString("^port:[0-9]+$", ep); !match {
return fmt.Errorf("Built-in adapter grpcEndpoint must be of the form \"port:N\": %s", ep)
}
adapterPort = strings.TrimPrefix(ep, "port:")
}

builtInAdapterContainer.Env = []corev1.EnvVar{
{
Name: "ADAPTER_PORT",
Value: "8085",
Value: adapterPort,
},
{
Name: "RUNTIME_PORT",
Value: strconv.Itoa(rt.Spec.BuiltInAdapter.RuntimeManagementPort),
},
{
Name: "RUNTIME_DATA_ENDPOINT",
Value: rtDataEndpoint,
},
{
Name: "CONTAINER_MEM_REQ_BYTES",
ValueFrom: &corev1.EnvVarSource{
Expand All @@ -222,7 +246,7 @@ func (m *Deployment) addRuntimeToDeployment(deployment *appsv1.Deployment) error
Value: runtimeVersion,
},
{}, {}, {}, {}, // allocate larger array to avoid reallocation
}[:7]
}[:8]

if mlc, ok := rt.Annotations["maxLoadingConcurrency"]; ok {
builtInAdapterContainer.Env = append(builtInAdapterContainer.Env, corev1.EnvVar{
Expand Down
10 changes: 10 additions & 0 deletions controllers/testdata/servingruntime_controller.golden
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ spec:
value: "8085"
- name: RUNTIME_PORT
value: "8001"
- name: RUNTIME_DATA_ENDPOINT
value: port:8001
- name: CONTAINER_MEM_REQ_BYTES
valueFrom:
resourceFieldRef:
Expand Down Expand Up @@ -362,6 +364,8 @@ spec:
value: "8085"
- name: RUNTIME_PORT
value: "8001"
- name: RUNTIME_DATA_ENDPOINT
value: port:8001
- name: CONTAINER_MEM_REQ_BYTES
valueFrom:
resourceFieldRef:
Expand Down Expand Up @@ -566,6 +570,8 @@ spec:
value: "8085"
- name: RUNTIME_PORT
value: "8001"
- name: RUNTIME_DATA_ENDPOINT
value: port:8001
- name: CONTAINER_MEM_REQ_BYTES
valueFrom:
resourceFieldRef:
Expand Down Expand Up @@ -810,6 +816,8 @@ spec:
value: "8085"
- name: RUNTIME_PORT
value: "8888"
- name: RUNTIME_DATA_ENDPOINT
value: port:8001
- name: CONTAINER_MEM_REQ_BYTES
valueFrom:
resourceFieldRef:
Expand Down Expand Up @@ -1046,6 +1054,8 @@ spec:
value: "8085"
- name: RUNTIME_PORT
value: "8001"
- name: RUNTIME_DATA_ENDPOINT
value: port:8001
- name: CONTAINER_MEM_REQ_BYTES
valueFrom:
resourceFieldRef:
Expand Down

0 comments on commit 96ca69d

Please sign in to comment.