feat: support resource limits, added docs for them

Close #2 Signed-off-by: Neko Ayaka <[email protected]>
nekomeowww · Apr 25, 2024 · c464635 · c464635
1 parent aa668b5
commit c464635
Show file tree

Hide file tree

Showing 10 changed files with 263 additions and 9 deletions.
diff --git a/api/ollama/v1/model_types.go b/api/ollama/v1/model_types.go
@@ -59,20 +59,25 @@ type ModelSpec struct {
 	// +patchMergeKey=name
 	// +patchStrategy=merge
 	ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty" patchStrategy:"merge" patchMergeKey:"name" protobuf:"bytes,4,rep,name=imagePullSecrets"`
+	// Compute Resources required by this container.
+	// Cannot be updated.
+	// More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+	// +optional
+	Resources corev1.ResourceRequirements `json:"resources,omitempty" protobuf:"bytes,5,opt,name=resources"`
 	// storageClassName is the name of StorageClass to which this persistent volume belongs. Empty value
 	// means that this volume does not belong to any StorageClass.
 	// +optional
-	StorageClassName *string `json:"storageClassName,omitempty" protobuf:"bytes,5,opt,name=storageClassName"`
+	StorageClassName *string `json:"storageClassName,omitempty" protobuf:"bytes,6,opt,name=storageClassName"`
 	// persistentVolumeClaimVolumeSource represents a reference to a
 	// PersistentVolumeClaim in the same namespace.
 	// More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims
 	// +optional
-	PersistentVolumeClaim *corev1.PersistentVolumeClaimVolumeSource `json:"persistentVolumeClaim,omitempty" protobuf:"bytes,6,opt,name=persistentVolumeClaim"`
+	PersistentVolumeClaim *corev1.PersistentVolumeClaimVolumeSource `json:"persistentVolumeClaim,omitempty" protobuf:"bytes,7,opt,name=persistentVolumeClaim"`
 	// spec defines a specification of a persistent volume owned by the cluster.
 	// Provisioned by an administrator.
 	// More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistent-volumes
 	// +optional
-	PersistentVolume *ModelPersistentVolumeSpec `json:"persistentVolume,omitempty" protobuf:"bytes,7,opt,name=persistentVolume"`
+	PersistentVolume *ModelPersistentVolumeSpec `json:"persistentVolume,omitempty" protobuf:"bytes,8,opt,name=persistentVolume"`
 }
 
 type ConditionType string

diff --git a/config/crd/bases/ollama.ayaka.io_models.yaml b/config/crd/bases/ollama.ayaka.io_models.yaml
@@ -117,6 +117,64 @@ spec:
                   zero and not specified. Defaults to 1.
                 format: int32
                 type: integer
+              resources:
+                description: |-
+                  Compute Resources required by this container.
+                  Cannot be updated.
+                  More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+                properties:
+                  claims:
+                    description: |-
+                      Claims lists the names of resources, defined in spec.resourceClaims,
+                      that are used by this container.
+
+
+                      This is an alpha field and requires enabling the
+                      DynamicResourceAllocation feature gate.
+
+
+                      This field is immutable. It can only be set for containers.
+                    items:
+                      description: ResourceClaim references one entry in PodSpec.ResourceClaims.
+                      properties:
+                        name:
+                          description: |-
+                            Name must match the name of one entry in pod.spec.resourceClaims of
+                            the Pod where this field is used. It makes that resource available
+                            inside a container.
+                          type: string
+                      required:
+                      - name
+                      type: object
+                    type: array
+                    x-kubernetes-list-map-keys:
+                    - name
+                    x-kubernetes-list-type: map
+                  limits:
+                    additionalProperties:
+                      anyOf:
+                      - type: integer
+                      - type: string
+                      pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                      x-kubernetes-int-or-string: true
+                    description: |-
+                      Limits describes the maximum amount of compute resources allowed.
+                      More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+                    type: object
+                  requests:
+                    additionalProperties:
+                      anyOf:
+                      - type: integer
+                      - type: string
+                      pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                      x-kubernetes-int-or-string: true
+                    description: |-
+                      Requests describes the minimum amount of compute resources required.
+                      If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+                      otherwise to an implementation-defined value. Requests cannot exceed Limits.
+                      More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+                    type: object
+                type: object
               storageClassName:
                 description: |-
                   storageClassName is the name of StorageClass to which this persistent volume belongs. Empty value

diff --git a/docs/pages/en/references/cli/commands/deploy.md b/docs/pages/en/references/cli/commands/deploy.md
@@ -36,6 +36,14 @@ kollama deploy phi --expose
 kollama deploy phi --expose --service-type=LoadBalancer
 ```
 
+### Deploy [`Model`](/pages/en/references/crd/model) with resources limits
+
+The following example deploys the `phi` model with CPU limit to `1` and memory limit to `1Gi`.
+
+```shell
+kollama deploy phi --limit=cpu=1 --limit=memory=1Gi
+```
+
 ## Flags
 
 ### `--namespace`
@@ -55,6 +63,79 @@ Model image to deploy.
 - If not specified, the [`Model`](/pages/en/references/crd/model) name will be used as the image name (will be pulled from `registry.ollama.ai/library/<model name>` by default if no registry is specified). For example, if the [`Model`](/pages/en/references/crd/model) name is `phi`, the image name will be `registry.ollama.ai/library/phi:latest`.
 - If not specified, the tag will be latest.
 
+### `--limit` (supports multiple flags)
+
+> Multiple limits can be specified by using the flag multiple times.
+
+Resource limits for the deployed [`Model`](/pages/en/references/crd/model). This is useful for clusters that don't have a large enough number of resources, or if you want to deploy multiple [`Models`](/pages/en/references/crd/model) in a cluster with limited resources.
+
+::: tip For resource limits on NVIDIA, AMD GPUs...
+
+In Kubernetes, any GPU resource follows this pattern for resources labels:
+
+```yaml
+resources:
+  limits:
+    gpu-vendor.example/example-gpu: 1 # requesting 1 GPU
+```
+
+Using `nvidia.com/gpu` allows you to limit the number of NVIDIA GPUs, therefore when using `kollama deploy` you can use `--limit nvidia.com/gpu=1` to specify the number of NVIDIA GPUs as `1`:
+
+```shell
+kollama deploy phi --limit=nvidia.com/gpu=1
+```
+
+this is what it may looks like in the YAML configuration file:
+
+
+```yaml
+resources:
+  limits:
+    nvidia.com/gpu: 1 # requesting 1 GPU # [!code focus]
+```
+
+> [Documentation on using resource labels with `nvidia/k8s-device-plugin`](https://github.com/NVIDIA/k8s-device-plugin?tab=readme-ov-file#enabling-gpu-support-in-kubernetes)
+
+Using `amd.com/gpu` allows you to limit the number of AMD GPUs, therefore when using `kollama deploy` you can use `--limit amd.com/gpu=1` to specify the number of AMD GPUs as `1`.
+
+```shell
+kollama deploy phi --limit=amd.com/gpu=1
+```
+
+this is what it may looks like in the YAML configuration file:
+
+```yaml
+resources:
+  limits:
+    amd.com/gpu: 1 # requesting a GPU  # [!code focus]
+```
+
+> [Example YAML manifest of labels with `ROCm/k8s-device-plugin`](https://github.com/ROCm/k8s-device-plugin/blob/4607bf06b700e53803d566e0bf9555f773f0b4f1/example/pod/alexnet-gpu.yaml)
+
+Your can read more here: [Schedule GPUs | Kubernetes](https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/)
+
+:::
+
+::: details I have deployed [`Model`](/pages/en/references/crd/model), but I want to change the resource limit...
+
+Of course you can, with the [`kubectl set resources`](https://kubernetes.io/zh-cn/docs/reference/kubectl/generated/kubectl_set/kubectl_set_resources/) command, you can change the resource limit:
+
+```shell
+kubectl set resources deployment -l model.ollama.ayaka.io=<model name> --limits cpu=4
+```
+
+For memory limits:
+
+```shell
+kubectl set resources deployment -l model.ollama.ayaka.io=<model name> --limits memory=8Gi
+```
+
+:::
+
+The format is `<resource>=<quantity>`.
+
+For example: `--limit=cpu=1` `--limit=memory=1Gi`.
+
 ### `--storage-class`
 
 ```shell

diff --git a/docs/pages/en/references/crd/model.md b/docs/pages/en/references/crd/model.md
@@ -19,6 +19,15 @@ spec:
   # Use the model image `phi`
   image: phi
   imagePullPolicy: IfNotPresent
+  resources:
+    limits:
+      cpu: 4
+      memory: 8Gi
+      nvidia.com/gpu: 1 # If you got GPUs
+    requests:
+      cpu: 4
+      memory: 8Gi
+      nvidia.com/gpu: 1 # If you got GPUs
   storageClassName: local-path
   # If you have your own PersistentVolumeClaim created
   persistentVolumeClaim: your-pvc

diff --git a/docs/pages/zh-CN/references/cli/commands/deploy.md b/docs/pages/zh-CN/references/cli/commands/deploy.md
@@ -52,6 +52,14 @@ kollama deploy phi --expose --node-port=30000
 kollama deploy phi --expose --service-type=LoadBalancer
 ```
 
+### 部署有着资源限制的 [`Model`](/pages/zh-CN/references/crd/model)
+
+下面的示例部署了 `phi` 模型，并限制 CPU 使用率为 `1` 个核心，内存使用量为 `1Gi`。
+
+```shell
+kollama deploy phi --limit=cpu=1 --limit=memory=1Gi
+```
+
 ## 选项
 
 ### `--namespace`
@@ -71,6 +79,77 @@ kollama deploy phi --image=registry.ollama.ai/library/phi:latest
 - 如果未指定，将使用 [`Model`](/pages/zh-CN/references/crd/model) 名称作为镜像名称（如果未指定镜像仓库（Registry），这个时候会默认从 `registry.ollama.ai/library/<model name>` 拉取）。例如，如果 [`Model`](/pages/zh-CN/references/crd/model) 名称是 `phi`，最终获取的镜像名称将是 `registry.ollama.ai/library/phi:latest`。
 - 如果没有指定，标签将会使用 `latest` 的。
 
+### `--limit`（支持多次使用）
+
+> 多次使用该选项可指定多个资源限制。
+
+为即将部署的 [`Model`](/pages/zh-CN/references/crd/model) 指定资源限制。这对于没有足够多资源的集群，或者是希望在有限资源的集群中部署多个 [`Model`](/pages/zh-CN/references/crd/model) 是非常有用的。
+
+::: tip 对于 NVIDIA、AMD GPU 的资源限制...
+
+在 Kubernetes 中，任何 GPU 资源都遵循这个格式：
+
+```yaml
+resources:
+  limits:
+    gpu-vendor.example/example-gpu: 1 # requesting 1 GPU
+```
+
+使用 `nvidia.com/gpu` 可以限制 NVIDIA GPU 的数量，因此，在使用 `kollama deploy` 时，你可以使用 `--limit nvidia.com/gpu=1` 来指定 NVIDIA GPU 的数量为 `1`：
+
+```shell
+kollama deploy phi --limit=nvidia.com/gpu=1
+```
+
+```yaml
+resources:
+  limits:
+    nvidia.com/gpu: 1 # requesting 1 GPU # [!code focus]
+```
+
+> [有关配合 `nvidia/k8s-device-plugin` 使用资源标签的文档](https://github.com/NVIDIA/k8s-device-plugin?tab=readme-ov-file#enabling-gpu-support-in-kubernetes)
+
+使用 `amd.com/gpu` 可以限制 AMD GPU 的数量，在使用 `kollama deploy` 时，你可以使用 `--limit amd.com/gpu=1` 来指定 AMD GPU 的数量为 `1`。
+
+```shell
+kollama deploy phi --limit=amd.com/gpu=1
+```
+
+最终会渲染为：
+
+```yaml
+resources:
+  limits:
+    amd.com/gpu: 1 # requesting a GPU  # [!code focus]
+```
+
+> [关于配合 `ROCm/k8s-device-plugin` 使用 Label 的 YAML 配置文件的示例](https://github.com/ROCm/k8s-device-plugin/blob/4607bf06b700e53803d566e0bf9555f773f0b4f1/example/pod/alexnet-gpu.yaml)
+
+你可以在这里阅读更多：[调度 GPUs | Kubernetes](https://kubernetes.io/zh-cn/docs/tasks/manage-gpus/scheduling-gpus/)
+
+:::
+
+::: details 我已经部署过 [`Model`](/pages/zh-CN/references/crd/model)，但是我想要更改资源限制...
+
+当然可以，用 [`kubectl set resources`](https://kubernetes.io/zh-cn/docs/reference/kubectl/generated/kubectl_set/kubectl_set_resources/) 命令来可以更改资源限制：
+
+```shell
+kubectl set resources deployment -l model.ollama.ayaka.io=<model name> --limits cpu=4
+```
+
+改内存限制：
+
+```shell
+kubectl set resources deployment -l model.ollama.ayaka.io=<model name> --limits memory=8Gi
+```
+
+:::
+
+格式是 `<resource>=<quantity>`.
+
+比如：`--limit=cpu=1` `--limit=memory=1Gi`.
+
+
 ### `--storage-class`
 
 ```shell

diff --git a/docs/pages/zh-CN/references/crd/model.md b/docs/pages/zh-CN/references/crd/model.md
@@ -19,6 +19,15 @@ spec:
   # Use the model image `phi`
   image: phi
   imagePullPolicy: IfNotPresent
+  resources:
+    limits:
+      cpu: 4
+      memory: 8Gi
+      nvidia.com/gpu: 1 # If you got GPUs
+    requests:
+      cpu: 4
+      memory: 8Gi
+      nvidia.com/gpu: 1 # If you got GPUs
   storageClassName: local-path
   # If you have your own PersistentVolumeClaim created
   persistentVolumeClaim: your-pvc

diff --git a/internal/cli/kollama/cmd_deploy.go b/internal/cli/kollama/cmd_deploy.go
@@ -98,6 +98,8 @@ type CmdDeployOptions struct {
 	storageClass string
 	pvAccessMode string
 
+	resourceLimits []string
+
 	genericiooptions.IOStreams
 }
 
@@ -138,6 +140,12 @@ func NewCmdDeploy(streams genericiooptions.IOStreams) *cobra.Command {
 		"default if no registry is specified), the tag will be latest.",
 	)
 
+	cmd.Flags().StringArrayVar(&o.resourceLimits, "limit", []string{}, ""+
+		"Resource limits for the model. The format is <resource>=<quantity>. "+
+		"For example: --limit=cpu=1 --limit=memory=1Gi"+
+		"Multiple limits can be specified by using the flag multiple times. ",
+	)
+
 	cmd.Flags().StringVarP(&o.storageClass, "storage-class", "", "", ""+
 		"StorageClass to use for the model's associated PersistentVolumeClaim. If not specified, "+
 		"the default StorageClass will be used.",

diff --git a/pkg/model/image_store.go b/pkg/model/image_store.go
@@ -161,7 +161,7 @@ func EnsureImageStoreStatefulSetCreated(
 				},
 				Spec: corev1.PodSpec{
 					Containers: []corev1.Container{
-						NewOllamaServerContainer(false),
+						NewOllamaServerContainer(false, corev1.ResourceRequirements{}),
 					},
 					RestartPolicy: corev1.RestartPolicyAlways,
 					Volumes: []corev1.Volume{

diff --git a/pkg/model/model.go b/pkg/model/model.go
@@ -31,7 +31,7 @@ func ModelLabels(name string) map[string]string {
 
 func ImageStoreLabels(name string) map[string]string {
 	return map[string]string{
-		"app":                        name,
+		"app":                        "ollama-image-store",
 		"model.ollama.ayaka.io":      name,
 		"model.ollama.ayaka.io/type": "image-store",
 	}
@@ -103,10 +103,10 @@ func EnsureDeploymentCreated(
 				},
 				Spec: corev1.PodSpec{
 					InitContainers: []corev1.Container{
-						NewOllamaPullerContainer(image, namespace),
+						NewOllamaPullerContainer(image, namespace, model.Spec.Resources),
 					},
 					Containers: []corev1.Container{
-						NewOllamaServerContainer(true),
+						NewOllamaServerContainer(true, model.Spec.Resources),
 					},
 					Volumes: []corev1.Volume{
 						{