Skip to content

Commit

Permalink
Adding filestore support
Browse files Browse the repository at this point in the history
  • Loading branch information
chiayi committed Dec 21, 2023
1 parent a358be2 commit 7790e20
Show file tree
Hide file tree
Showing 11 changed files with 113 additions and 2 deletions.
3 changes: 3 additions & 0 deletions gke-platform/modules/gke_standard/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ resource "google_container_cluster" "ml_cluster" {
gcs_fuse_csi_driver_config {
enabled = true
}
gcp_filestore_csi_driver_config {
enabled = true
}
}

release_channel {
Expand Down
2 changes: 2 additions & 0 deletions jupyter-on-gke/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ Currently there are 2 choices for storage:
2. GCSFuse - `gcsfuse.csi.storage.gke.io` uses GCS Buckets and require users to pre-create buckets with name format `gcsfuse-{username}`
3. Filestore - `filestore.csi.storage.gke.io` uses Filestore autoprovisioning to create persistent volumes. Uses `Retain` reclaim policy
For more information about Persistent storage and the available options, visit [here](https://github.com/GoogleCloudPlatform/ai-on-gke/blob/main/jupyter-on-gke/storage.md)
## Running GPT-J-6B
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
14 changes: 13 additions & 1 deletion jupyter-on-gke/jupyter_config/config-selfauth.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ singleuser:
gke-gcsfuse/ephemeral-storage-limit: 10Gi
storage:
dynamic:
pvcNameTemplate: claim-{username}
pvcNameTemplate: ${pvc_template}
# More info on kubespawner overrides: https://jupyterhub-kubespawner.readthedocs.io/en/latest/spawner.html#kubespawner.KubeSpawner
# profile example:
# - display_name: "Learning Data Science"
Expand Down Expand Up @@ -137,6 +137,10 @@ singleuser:
mountOptions: "uid=1000,gid=100,o=noexec,implicit-dirs,dir-mode=777,file-mode=777"
node_selector:
iam.gke.io/gke-metadata-server-enabled: "true"
Filestore:
display_name: "Filestore"
kubespawner_override:
storage_class: "filestore-storage-class"
default: true
- display_name: "GPU T4"
description: "Creates GPU VMs (T4) as the compute for notebook execution"
Expand All @@ -163,6 +167,10 @@ singleuser:
mountOptions: "uid=1000,gid=100,o=noexec,implicit-dirs,dir-mode=777,file-mode=777"
node_selector:
iam.gke.io/gke-metadata-server-enabled: "true"
Filestore:
display_name: "Filestore"
kubespawner_override:
storage_class: "filestore-storage-class"
kubespawner_override:
image: jupyter/tensorflow-notebook:python-3.10
extra_resource_limits:
Expand Down Expand Up @@ -196,6 +204,10 @@ singleuser:
mountOptions: "uid=1000,gid=100,o=noexec,implicit-dirs,dir-mode=777,file-mode=777"
node_selector:
iam.gke.io/gke-metadata-server-enabled: "true"
Filestore:
display_name: "Filestore"
kubespawner_override:
storage_class: "filestore-storage-class"
kubespawner_override:
image: jupyter/tensorflow-notebook:python-3.10
extra_resource_limits:
Expand Down
7 changes: 6 additions & 1 deletion jupyter-on-gke/jupyterhub.tf
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ resource "google_project_service" "project_service" {

# Creates a "Brand", equivalent to the OAuth consent screen on Cloud console
resource "google_iap_brand" "project_brand" {
count = var.add_auth && var.add_auth && var.brand == "" ? 1 : 0
count = var.add_auth && var.brand == "" ? 1 : 0
support_email = var.support_email
application_title = "Cloud IAP protected Application"
project = var.project_id
Expand Down Expand Up @@ -105,6 +105,10 @@ module "iap_auth" {
]
}

resource "kubectl_manifest" "filestore_storage_class" {
yaml_body = templatefile("${path.module}/persistent_storage_deployments/filestore-storage-class.yaml", {})
}

module "workload_identity_service_account" {
source = "./service_accounts_module"

Expand All @@ -126,6 +130,7 @@ resource "helm_release" "jupyterhub" {
project_number = data.google_project.project.number
authenticator_class = var.add_auth ? "'gcpiapjwtauthenticator.GCPIAPAuthenticator'" : "dummy"
service_type = var.add_auth ? "NodePort" : "LoadBalancer"
pvc_template = var.enable_shared_pv ? "filestore-claim" : "claim-{username}"
})
]

Expand Down
18 changes: 18 additions & 0 deletions jupyter-on-gke/persistent_storage_deployments/filestore-pv.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
apiVersion: v1
kind: PersistentVolume
metadata:
name: filestore-pv
spec:
storageClassName: ""
capacity:
storage: 1Ti
accessModes:
- ReadWriteMany
persistentVolumeReclaimPolicy: Retain
volumeMode: Filesystem
csi:
driver: filestore.csi.storage.gke.io
volumeHandle: "modeInstance/<FILESTORE_INSTANCE_LOCATION>/<FILESTORE_INSTANCE_NAME>/<FILESTORE_SHARE_NAME>"
volumeAttributes:
ip: <FILESTORE_INSTANCE_IP>
volume: <FILESTORE_SHARE_NAME>
13 changes: 13 additions & 0 deletions jupyter-on-gke/persistent_storage_deployments/filestore-pvc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: filestore-claim
namespace: <NAMESPACE>
spec:
accessModes:
- ReadWriteMany
storageClassName: ""
volumeName: filestore-pv
resources:
requests:
storage: 1Ti
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: filestore-storage-class
provisioner: filestore.csi.storage.gke.io
allowVolumeExpansion: true
reclaimPolicy: Retain
parameters:
tier: standard
network: default
volumeBindingMode: WaitForFirstConsumer
37 changes: 37 additions & 0 deletions jupyter-on-gke/storage.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,40 @@ GCSFuse allow users to mount GCS Buckets as their local filesystem. This option
![Profiles Page](images/gcs_bucket.png)

Since this bucket in GCS, there is built in permission control and access outside of the clutser.

## Filestore

Filestore has built in auto provisioning. This means that the first time this option is picked for a users, it will auto provision a Filestore instance then mount it to the Jupyter instance.

**Important Note:** Currently the teir used is the `standard` tier. Different other teirs may require different handling.

Filestore can be accessed by any GCE instance within the same VPC network.

### Using the same PV within one namespace

The current PVC naming template is deteremined by the Jupyterhub config:

```unset
singleuser:
...
storage:
dynamic:
pvcNameTemplate: claim-{username}
...
```

This means that every single users will have their own PVC+PV. For users that want to share the PV within the same namespace, they can set the `pvcNameTemplate` to a more generic/static template. **Important** Changing the `pvcNameTemplate` means that when re-mounting, the name of the PVC will also have the match this new template.

### Remounting a Filestore instance

A new Filestore instance will be created everytime the option is selected unless there is already an existing PVC under the format `cliam-{USERNAME}`. This means that to reuse an existing Filestore instance will require manual remounting.

1. Open up the Filestore page in [cloud console](https://console.cloud.google.com/filestore/instances). It will look something like this:

![Filestore instance](images/filestore_instance_screenshot.png)

2. Fill out both `filestire-pv.yaml` and `filestore-pvc.yaml` under the `persistent_storage_deployments` directory. The cloud console will have the necessary information.

3. Deploy the PVC/PV into the correct namespace (PV is cluster resource so this only applies to PVC)

4. Login to the user and select Filestore as persistent storage.
4 changes: 4 additions & 0 deletions jupyter-on-gke/variable_definitions.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,7 @@ Client ID of an OAuth client created by the user for enabling IAP. When this var
### client_secret

Client Secret associated with the [client ID](#client_id). This variable will only be used when the client id is filled out.

### enable_shared_pv

Enables a common PV/PVC to be used/shared within the same namespace by all users. Enabling this flag will cause the PV/PVCs provisioned by the storageclasses to be shared.
6 changes: 6 additions & 0 deletions jupyter-on-gke/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,10 @@ variable "client_secret" {
description = "Client secret used for enabling IAP"
default = ""
sensitive = true
}

variable "enable_shared_pv" {
type = bool
description = "Enables filestore storage option so that all users will share the same pv/pvc under the same namespace"
default = false
}

0 comments on commit 7790e20

Please sign in to comment.