Skip to content

Commit

Permalink
Add a SCRATCH_BUCKET per dask hub
Browse files Browse the repository at this point in the history
A temporary bucket that's cleared every 7 days,
and provides full access to all the users on that
hub. See
pangeo-data/pangeo-cloud-federation#610
for reasons why this is very useful.
  • Loading branch information
yuvipanda committed Mar 3, 2021
1 parent 3fcfc00 commit 67bbeec
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 1 deletion.
7 changes: 7 additions & 0 deletions hub-templates/daskhub/templates/env-vars.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: ConfigMap
apiVersion: v1
metadata:
name: cloud-env-vars
data:
scratch-bucket-name: {{ include "daskhub.scratchBucket.name" . }}
scratch-bucket-protocol: "gcs"
30 changes: 30 additions & 0 deletions hub-templates/daskhub/templates/storage.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{{- define "daskhub.scratchBucket.name" -}}
{{ .Values.iam.projectId }}-{{ .Release.Name }}-scratch-bucket
{{- end }}
apiVersion: storage.cnrm.cloud.google.com/v1beta1
kind: StorageBucket
metadata:
annotations:
cnrm.cloud.google.com/force-destroy: "false"
name: {{ include "daskhub.scratchBucket.name" . }}
spec:
bucketPolicyOnly: true
lifecycleRule:
- action:
type: Delete
condition:
age: 7
---
apiVersion: iam.cnrm.cloud.google.com/v1beta1
kind: IAMPolicyMember
metadata:
name: scratch-bucket-binding
spec:
member: serviceAccount:{{ include "daskhub.serviceAccountName" . }}@{{ .Values.iam.projectId }}.iam.gserviceaccount.com
# This gives users the ability to delete the bucket too :(
# But without this, I think you can't list objects in the bucket
role: roles/storage.admin
resourceRef:
apiVersion: storage.cnrm.cloud.google.com/v1beta1
kind: StorageBucket
name: {{ include "daskhub.scratchBucket.name" . }}
39 changes: 38 additions & 1 deletion hub-templates/daskhub/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,48 @@ base-hub:
extraEnv:
# The default worker image matches the singleuser image.
DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE: '{JUPYTER_IMAGE_SPEC}'
SCRATCH_BUCKET_PROTOCOL:
valueFrom:
configMapKeyRef:
name: cloud-env-vars
key: scratch-bucket-protocol
SCRATCH_BUCKET_NAME:
valueFrom:
configMapKeyRef:
name: cloud-env-vars
key: scratch-bucket-name
hub:
networkPolicy:
enabled: false
extraConfig:
daskhub-01-add-dask-gateway-values: |
daskhub-01-dependent-env-vars: |
# Explicitly add user env vars that derive from other env vars
# When we use the dict z2jh form, they don't preserve ordering!
# Since the env var you refer to must already be defined, this
# doesn't work based on the name you have.
dependent_env_vars = [
{
'name': 'SCRATCH_BUCKET',
'value': "$(SCRATCH_BUCKET_PROTOCOL)://$(SCRATCH_BUCKET_NAME)/$(JUPYTERHUB_USER)"
},{
'name': 'PANGEO_SCRATCH',
'value': '$(SCRATCH_BUCKET)'
}
]
from kubernetes import client
def modify_pod_hook(spawner, pod):
# FIXME: Make sure sidecars are never first containers
user_container = pod.spec.containers[0]
for denv in dependent_env_vars:
user_container.env.append(
client.V1EnvVar(**denv)
)
return pod
c.KubeSpawner.modify_pod_hook = modify_pod_hook
daskhub-02-add-dask-gateway-values: |
# 1. Sets `DASK_GATEWAY__PROXY_ADDRESS` in the singleuser environment.
# 2. Adds the URL for the Dask Gateway JupyterHub service.
import os
Expand Down

0 comments on commit 67bbeec

Please sign in to comment.