Skip to content

Commit

Permalink
feat: support resource limits, added docs for them
Browse files Browse the repository at this point in the history
Close #2

Signed-off-by: Neko Ayaka <[email protected]>
  • Loading branch information
nekomeowww committed Apr 25, 2024
1 parent aa668b5 commit c464635
Show file tree
Hide file tree
Showing 10 changed files with 263 additions and 9 deletions.
11 changes: 8 additions & 3 deletions api/ollama/v1/model_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,20 +59,25 @@ type ModelSpec struct {
// +patchMergeKey=name
// +patchStrategy=merge
ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty" patchStrategy:"merge" patchMergeKey:"name" protobuf:"bytes,4,rep,name=imagePullSecrets"`
// Compute Resources required by this container.
// Cannot be updated.
// More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
// +optional
Resources corev1.ResourceRequirements `json:"resources,omitempty" protobuf:"bytes,5,opt,name=resources"`
// storageClassName is the name of StorageClass to which this persistent volume belongs. Empty value
// means that this volume does not belong to any StorageClass.
// +optional
StorageClassName *string `json:"storageClassName,omitempty" protobuf:"bytes,5,opt,name=storageClassName"`
StorageClassName *string `json:"storageClassName,omitempty" protobuf:"bytes,6,opt,name=storageClassName"`
// persistentVolumeClaimVolumeSource represents a reference to a
// PersistentVolumeClaim in the same namespace.
// More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims
// +optional
PersistentVolumeClaim *corev1.PersistentVolumeClaimVolumeSource `json:"persistentVolumeClaim,omitempty" protobuf:"bytes,6,opt,name=persistentVolumeClaim"`
PersistentVolumeClaim *corev1.PersistentVolumeClaimVolumeSource `json:"persistentVolumeClaim,omitempty" protobuf:"bytes,7,opt,name=persistentVolumeClaim"`
// spec defines a specification of a persistent volume owned by the cluster.
// Provisioned by an administrator.
// More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistent-volumes
// +optional
PersistentVolume *ModelPersistentVolumeSpec `json:"persistentVolume,omitempty" protobuf:"bytes,7,opt,name=persistentVolume"`
PersistentVolume *ModelPersistentVolumeSpec `json:"persistentVolume,omitempty" protobuf:"bytes,8,opt,name=persistentVolume"`
}

type ConditionType string
Expand Down
58 changes: 58 additions & 0 deletions config/crd/bases/ollama.ayaka.io_models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,64 @@ spec:
zero and not specified. Defaults to 1.
format: int32
type: integer
resources:
description: |-
Compute Resources required by this container.
Cannot be updated.
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
properties:
claims:
description: |-
Claims lists the names of resources, defined in spec.resourceClaims,
that are used by this container.
This is an alpha field and requires enabling the
DynamicResourceAllocation feature gate.
This field is immutable. It can only be set for containers.
items:
description: ResourceClaim references one entry in PodSpec.ResourceClaims.
properties:
name:
description: |-
Name must match the name of one entry in pod.spec.resourceClaims of
the Pod where this field is used. It makes that resource available
inside a container.
type: string
required:
- name
type: object
type: array
x-kubernetes-list-map-keys:
- name
x-kubernetes-list-type: map
limits:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: |-
Limits describes the maximum amount of compute resources allowed.
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
type: object
requests:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: |-
Requests describes the minimum amount of compute resources required.
If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
otherwise to an implementation-defined value. Requests cannot exceed Limits.
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
type: object
type: object
storageClassName:
description: |-
storageClassName is the name of StorageClass to which this persistent volume belongs. Empty value
Expand Down
81 changes: 81 additions & 0 deletions docs/pages/en/references/cli/commands/deploy.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@ kollama deploy phi --expose
kollama deploy phi --expose --service-type=LoadBalancer
```

### Deploy [`Model`](/pages/en/references/crd/model) with resources limits

The following example deploys the `phi` model with CPU limit to `1` and memory limit to `1Gi`.

```shell
kollama deploy phi --limit=cpu=1 --limit=memory=1Gi
```

## Flags

### `--namespace`
Expand All @@ -55,6 +63,79 @@ Model image to deploy.
- If not specified, the [`Model`](/pages/en/references/crd/model) name will be used as the image name (will be pulled from `registry.ollama.ai/library/<model name>` by default if no registry is specified). For example, if the [`Model`](/pages/en/references/crd/model) name is `phi`, the image name will be `registry.ollama.ai/library/phi:latest`.
- If not specified, the tag will be latest.

### `--limit` (supports multiple flags)

> Multiple limits can be specified by using the flag multiple times.
Resource limits for the deployed [`Model`](/pages/en/references/crd/model). This is useful for clusters that don't have a large enough number of resources, or if you want to deploy multiple [`Models`](/pages/en/references/crd/model) in a cluster with limited resources.

::: tip For resource limits on NVIDIA, AMD GPUs...

In Kubernetes, any GPU resource follows this pattern for resources labels:

```yaml
resources:
limits:
gpu-vendor.example/example-gpu: 1 # requesting 1 GPU
```
Using `nvidia.com/gpu` allows you to limit the number of NVIDIA GPUs, therefore when using `kollama deploy` you can use `--limit nvidia.com/gpu=1` to specify the number of NVIDIA GPUs as `1`:

```shell
kollama deploy phi --limit=nvidia.com/gpu=1
```

this is what it may looks like in the YAML configuration file:


```yaml
resources:
limits:
nvidia.com/gpu: 1 # requesting 1 GPU # [!code focus]
```

> [Documentation on using resource labels with `nvidia/k8s-device-plugin`](https://github.com/NVIDIA/k8s-device-plugin?tab=readme-ov-file#enabling-gpu-support-in-kubernetes)

Using `amd.com/gpu` allows you to limit the number of AMD GPUs, therefore when using `kollama deploy` you can use `--limit amd.com/gpu=1` to specify the number of AMD GPUs as `1`.

```shell
kollama deploy phi --limit=amd.com/gpu=1
```

this is what it may looks like in the YAML configuration file:

```yaml
resources:
limits:
amd.com/gpu: 1 # requesting a GPU # [!code focus]
```

> [Example YAML manifest of labels with `ROCm/k8s-device-plugin`](https://github.com/ROCm/k8s-device-plugin/blob/4607bf06b700e53803d566e0bf9555f773f0b4f1/example/pod/alexnet-gpu.yaml)

Your can read more here: [Schedule GPUs | Kubernetes](https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/)

:::

::: details I have deployed [`Model`](/pages/en/references/crd/model), but I want to change the resource limit...

Of course you can, with the [`kubectl set resources`](https://kubernetes.io/zh-cn/docs/reference/kubectl/generated/kubectl_set/kubectl_set_resources/) command, you can change the resource limit:

```shell
kubectl set resources deployment -l model.ollama.ayaka.io=<model name> --limits cpu=4
```

For memory limits:

```shell
kubectl set resources deployment -l model.ollama.ayaka.io=<model name> --limits memory=8Gi
```

:::

The format is `<resource>=<quantity>`.

For example: `--limit=cpu=1` `--limit=memory=1Gi`.

### `--storage-class`

```shell
Expand Down
9 changes: 9 additions & 0 deletions docs/pages/en/references/crd/model.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,15 @@ spec:
# Use the model image `phi`
image: phi
imagePullPolicy: IfNotPresent
resources:
limits:
cpu: 4
memory: 8Gi
nvidia.com/gpu: 1 # If you got GPUs
requests:
cpu: 4
memory: 8Gi
nvidia.com/gpu: 1 # If you got GPUs
storageClassName: local-path
# If you have your own PersistentVolumeClaim created
persistentVolumeClaim: your-pvc
Expand Down
79 changes: 79 additions & 0 deletions docs/pages/zh-CN/references/cli/commands/deploy.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,14 @@ kollama deploy phi --expose --node-port=30000
kollama deploy phi --expose --service-type=LoadBalancer
```

### 部署有着资源限制的 [`Model`](/pages/zh-CN/references/crd/model)

下面的示例部署了 `phi` 模型,并限制 CPU 使用率为 `1` 个核心,内存使用量为 `1Gi`

```shell
kollama deploy phi --limit=cpu=1 --limit=memory=1Gi
```

## 选项

### `--namespace`
Expand All @@ -71,6 +79,77 @@ kollama deploy phi --image=registry.ollama.ai/library/phi:latest
- 如果未指定,将使用 [`Model`](/pages/zh-CN/references/crd/model) 名称作为镜像名称(如果未指定镜像仓库(Registry),这个时候会默认从 `registry.ollama.ai/library/<model name>` 拉取)。例如,如果 [`Model`](/pages/zh-CN/references/crd/model) 名称是 `phi`,最终获取的镜像名称将是 `registry.ollama.ai/library/phi:latest`
- 如果没有指定,标签将会使用 `latest` 的。

### `--limit`(支持多次使用)

> 多次使用该选项可指定多个资源限制。
为即将部署的 [`Model`](/pages/zh-CN/references/crd/model) 指定资源限制。这对于没有足够多资源的集群,或者是希望在有限资源的集群中部署多个 [`Model`](/pages/zh-CN/references/crd/model) 是非常有用的。

::: tip 对于 NVIDIA、AMD GPU 的资源限制...

在 Kubernetes 中,任何 GPU 资源都遵循这个格式:

```yaml
resources:
limits:
gpu-vendor.example/example-gpu: 1 # requesting 1 GPU
```
使用 `nvidia.com/gpu` 可以限制 NVIDIA GPU 的数量,因此,在使用 `kollama deploy` 时,你可以使用 `--limit nvidia.com/gpu=1` 来指定 NVIDIA GPU 的数量为 `1`:

```shell
kollama deploy phi --limit=nvidia.com/gpu=1
```

```yaml
resources:
limits:
nvidia.com/gpu: 1 # requesting 1 GPU # [!code focus]
```

> [有关配合 `nvidia/k8s-device-plugin` 使用资源标签的文档](https://github.com/NVIDIA/k8s-device-plugin?tab=readme-ov-file#enabling-gpu-support-in-kubernetes)

使用 `amd.com/gpu` 可以限制 AMD GPU 的数量,在使用 `kollama deploy` 时,你可以使用 `--limit amd.com/gpu=1` 来指定 AMD GPU 的数量为 `1`。

```shell
kollama deploy phi --limit=amd.com/gpu=1
```

最终会渲染为:

```yaml
resources:
limits:
amd.com/gpu: 1 # requesting a GPU # [!code focus]
```

> [关于配合 `ROCm/k8s-device-plugin` 使用 Label 的 YAML 配置文件的示例](https://github.com/ROCm/k8s-device-plugin/blob/4607bf06b700e53803d566e0bf9555f773f0b4f1/example/pod/alexnet-gpu.yaml)

你可以在这里阅读更多:[调度 GPUs | Kubernetes](https://kubernetes.io/zh-cn/docs/tasks/manage-gpus/scheduling-gpus/)

:::

::: details 我已经部署过 [`Model`](/pages/zh-CN/references/crd/model),但是我想要更改资源限制...

当然可以,用 [`kubectl set resources`](https://kubernetes.io/zh-cn/docs/reference/kubectl/generated/kubectl_set/kubectl_set_resources/) 命令来可以更改资源限制:

```shell
kubectl set resources deployment -l model.ollama.ayaka.io=<model name> --limits cpu=4
```

改内存限制:

```shell
kubectl set resources deployment -l model.ollama.ayaka.io=<model name> --limits memory=8Gi
```

:::

格式是 `<resource>=<quantity>`.

比如:`--limit=cpu=1` `--limit=memory=1Gi`.


### `--storage-class`

```shell
Expand Down
9 changes: 9 additions & 0 deletions docs/pages/zh-CN/references/crd/model.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,15 @@ spec:
# Use the model image `phi`
image: phi
imagePullPolicy: IfNotPresent
resources:
limits:
cpu: 4
memory: 8Gi
nvidia.com/gpu: 1 # If you got GPUs
requests:
cpu: 4
memory: 8Gi
nvidia.com/gpu: 1 # If you got GPUs
storageClassName: local-path
# If you have your own PersistentVolumeClaim created
persistentVolumeClaim: your-pvc
Expand Down
8 changes: 8 additions & 0 deletions internal/cli/kollama/cmd_deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ type CmdDeployOptions struct {
storageClass string
pvAccessMode string

resourceLimits []string

genericiooptions.IOStreams
}

Expand Down Expand Up @@ -138,6 +140,12 @@ func NewCmdDeploy(streams genericiooptions.IOStreams) *cobra.Command {
"default if no registry is specified), the tag will be latest.",
)

cmd.Flags().StringArrayVar(&o.resourceLimits, "limit", []string{}, ""+
"Resource limits for the model. The format is <resource>=<quantity>. "+
"For example: --limit=cpu=1 --limit=memory=1Gi"+
"Multiple limits can be specified by using the flag multiple times. ",
)

cmd.Flags().StringVarP(&o.storageClass, "storage-class", "", "", ""+
"StorageClass to use for the model's associated PersistentVolumeClaim. If not specified, "+
"the default StorageClass will be used.",
Expand Down
2 changes: 1 addition & 1 deletion pkg/model/image_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ func EnsureImageStoreStatefulSetCreated(
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{
NewOllamaServerContainer(false),
NewOllamaServerContainer(false, corev1.ResourceRequirements{}),
},
RestartPolicy: corev1.RestartPolicyAlways,
Volumes: []corev1.Volume{
Expand Down
6 changes: 3 additions & 3 deletions pkg/model/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func ModelLabels(name string) map[string]string {

func ImageStoreLabels(name string) map[string]string {
return map[string]string{
"app": name,
"app": "ollama-image-store",
"model.ollama.ayaka.io": name,
"model.ollama.ayaka.io/type": "image-store",
}
Expand Down Expand Up @@ -103,10 +103,10 @@ func EnsureDeploymentCreated(
},
Spec: corev1.PodSpec{
InitContainers: []corev1.Container{
NewOllamaPullerContainer(image, namespace),
NewOllamaPullerContainer(image, namespace, model.Spec.Resources),
},
Containers: []corev1.Container{
NewOllamaServerContainer(true),
NewOllamaServerContainer(true, model.Spec.Resources),
},
Volumes: []corev1.Volume{
{
Expand Down
Loading

0 comments on commit c464635

Please sign in to comment.