Update yaml file for the tutorials (#98)

Signed-off-by: junchenj <[email protected]>
vllm-project · Feb 9, 2025 · b06f7b3 · b06f7b3
1 parent 1ad41fd
commit b06f7b3
Show file tree

Hide file tree

Showing 7 changed files with 30 additions and 5 deletions.
diff --git a/tutorials/01-minimal-helm-installation.md b/tutorials/01-minimal-helm-installation.md
@@ -36,9 +36,10 @@ The vLLM Production Stack repository provides a predefined configuration file, `
 
 ```yaml
 servingEngineSpec:
+  runtimeClassName: ""
   modelSpec:
   - name: "opt125m"
-    repository: "lmcache/vllm-openai"
+    repository: "vllm/vllm-openai"
     tag: "latest"
     modelURL: "facebook/opt-125m"
 
@@ -48,8 +49,6 @@ servingEngineSpec:
     requestMemory: "16Gi"
     requestGPU: 1
 
-# set replicaCount to 2 or more to set up multiple vLLM instances
-
     pvcStorage: "10Gi"
 ```
 
@@ -65,13 +64,15 @@ Explanation of the key fields:
 - **`requestGPU`**: Specifies the number of GPUs required.
 - **`pvcStorage`**: Allocates persistent storage for the model.
 
+**Note:** If you intend to set up TWO vllm pods, please refer to `tutorials/assets/values-01-2pods-minimal-example.yaml`.
+
 #### 1.2: Deploy the Helm Chart
 
 Deploy the Helm chart using the predefined configuration file:
 
 ```bash
-helm repo add vllm https://vllm-project.github.io/production-stack
-helm install vllm vllm/vllm-stack -f tutorials/assets/values-01-minimal-example.yaml
+sudo helm repo add vllm https://vllm-project.github.io/production-stack
+sudo helm install vllm vllm/vllm-stack -f tutorials/assets/values-01-minimal-example.yaml
 ```
 
 Explanation of the command:

diff --git a/tutorials/assets/values-01-2pods-minimal-example.yaml b/tutorials/assets/values-01-2pods-minimal-example.yaml
@@ -0,0 +1,19 @@
+servingEngineSpec:
+  runtimeClassName: ""
+  modelSpec:
+  - name: "opt125m"
+    repository: "vllm/vllm-openai"
+    tag: "latest"
+    modelURL: "facebook/opt-125m"
+
+    replicaCount: 2
+
+    requestCPU: 6
+    requestMemory: "16Gi"
+    requestGPU: 0.5
+
+    pvcStorage: "10Gi"
+
+    vllmConfig:
+      maxModelLen: 1024
+      extraArgs: ["--disable-log-requests", "--gpu-memory-utilization", "0.4"]
diff --git a/tutorials/assets/values-01-minimal-example.yaml b/tutorials/assets/values-01-minimal-example.yaml
@@ -1,4 +1,5 @@
 servingEngineSpec:
+  runtimeClassName: ""
   modelSpec:
   - name: "opt125m"
     repository: "vllm/vllm-openai"

diff --git a/tutorials/assets/values-02-basic-config.yaml b/tutorials/assets/values-02-basic-config.yaml
@@ -1,4 +1,5 @@
 servingEngineSpec:
+  runtimeClassName: ""
   modelSpec:
   - name: "llama3"
     repository: "vllm/vllm-openai"

diff --git a/tutorials/assets/values-03-match-pv.yaml b/tutorials/assets/values-03-match-pv.yaml
@@ -1,4 +1,5 @@
 servingEngineSpec:
+  runtimeClassName: ""
   modelSpec:
   - name: "llama3"
     repository: "vllm/vllm-openai"

diff --git a/tutorials/assets/values-04-multiple-models.yaml b/tutorials/assets/values-04-multiple-models.yaml
@@ -1,4 +1,5 @@
 servingEngineSpec:
+  runtimeClassName: ""
   modelSpec:
   - name: "llama3"
     repository: "vllm/vllm-openai"

diff --git a/tutorials/assets/values-05-cpu-offloading.yaml b/tutorials/assets/values-05-cpu-offloading.yaml
@@ -1,4 +1,5 @@
 servingEngineSpec:
+  runtimeClassName: ""
   modelSpec:
   - name: "mistral"
     repository: "lmcache/vllm-openai"