Skip to content

Commit

Permalink
update caikit+tgis runtime yaml
Browse files Browse the repository at this point in the history
  • Loading branch information
bdattoma committed Feb 14, 2024
1 parent a56e45f commit 8d5900e
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,41 @@ spec:
containers:
- name: kserve-container
image: quay.io/modh/text-generation-inference@sha256:a17a2868644929ee844ceb2778ac3f6db0936824d9b89d11ea7aa059466fcd0b
## Note: cannot add readiness/liveness probes to this container because knative will refuse them
command: ["text-generation-launcher"]
args: ["--model-name=/mnt/models/artifacts/"]
args: ["--model-name=/mnt/models/artifacts/"]
env:
- name: TRANSFORMERS_CACHE
value: /tmp/transformers_cache
# resources: # configure as required
# requests:
# cpu: 8
# memory: 16Gi
- name: transformer-container
image: quay.io/modh/caikit-tgis-serving@sha256:ce6b66bb847608dac5eacd7f9123d2a076a06893d7f37f2da5876a8930527513
command: ["python", "-m", "caikit.runtime.grpc_server"]
env:
- name: RUNTIME_LOCAL_MODELS_DIR
value: /mnt/models
ports:
- containerPort: 8085
name: h2c
protocol: TCP
readinessProbe:
exec:
command:
- python
- -m
- caikit_health_probe
- readiness
livenessProbe:
exec:
command:
- python
- -m
- caikit_health_probe
- liveness
# resources: # configure as required
# requests:
# cpu: 8
# memory: 16Gi
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,43 @@ spec:
- name: kserve-container
image: quay.io/modh/text-generation-inference@sha256:a17a2868644929ee844ceb2778ac3f6db0936824d9b89d11ea7aa059466fcd0b
command: ["text-generation-launcher"]
args: ["--model-name=/mnt/models/artifacts/"]
args: ["--model-name=/mnt/models/artifacts/"]
env:
- name: TRANSFORMERS_CACHE
value: /tmp/transformers_cache
# resources: # configure as required
# requests:
# cpu: 8
# memory: 16Gi
- name: transformer-container
image: quay.io/modh/caikit-tgis-serving@sha256:ce6b66bb847608dac5eacd7f9123d2a076a06893d7f37f2da5876a8930527513
env:
- name: RUNTIME_LOCAL_MODELS_DIR
value: /mnt/models
- name: TRANSFORMERS_CACHE
value: /tmp/transformers_cache
- name: RUNTIME_GRPC_ENABLED
value: "false"
- name: RUNTIME_HTTP_ENABLED
value: "true"
ports:
- containerPort: 8080
protocol: TCP
readinessProbe:
exec:
command:
- python
- -m
- caikit_health_probe
- readiness
livenessProbe:
exec:
command:
- python
- -m
- caikit_health_probe
- liveness
# resources: # configure as required
# requests:
# cpu: 8
# memory: 16Gi

0 comments on commit 8d5900e

Please sign in to comment.