fix(app): case insensitive project and namespace (#858)

enhancement(app): add the ability to do simple migrations of the jupyterserver k8s resources
SwissDataScienceCenter · Jan 19, 2022 · 56c2db4 · 56c2db4
1 parent e290fba
commit 56c2db4
Show file tree

Hide file tree

Showing 12 changed files with 251 additions and 6 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -13,6 +13,7 @@ WORKDIR /renku-notebooks
 RUN pipenv install --system --deploy
 
 COPY renku_notebooks /renku-notebooks/renku_notebooks
+COPY resource_schema_migrations /resource_schema_migrations
 # Set up the flask app
 ENV FLASK_APP=/renku-notebooks/renku-notebooks:create_app
 

diff --git a/Dockerfile.tests b/Dockerfile.tests
@@ -26,6 +26,7 @@ WORKDIR /renku-notebooks
 RUN pipenv install --dev
 
 COPY renku_notebooks /renku-notebooks/renku_notebooks
+COPY resource_schema_migrations /resource_schema_migrations
 COPY tests /renku-notebooks/tests
 
 CMD ["pipenv", "run", "pytest", "tests/integration"]
diff --git a/helm-chart/renku-notebooks/templates/hpa.yaml b/helm-chart/renku-notebooks/templates/hpa.yaml
@@ -6,7 +6,7 @@ metadata:
 spec:
   scaleTargetRef:
     apiVersion: apps/v1
-    kind: Deployment
+    kind: StatefulSet
     name: {{ template "notebooks.fullname" . }}
   minReplicas: {{ .Values.autoscaling.minReplicas }}
   maxReplicas: {{ .Values.autoscaling.maxReplicas }}

diff --git a/...renku-notebooks/templates/deployment.yaml → ...enku-notebooks/templates/statefulset.yaml b/...renku-notebooks/templates/deployment.yaml → ...enku-notebooks/templates/statefulset.yaml
@@ -1,5 +1,5 @@
 apiVersion: apps/v1
-kind: Deployment
+kind: StatefulSet
 metadata:
   name: {{ template "notebooks.fullname" . }}
   labels:
@@ -11,6 +11,7 @@ spec:
   {{- if not .Values.autoscaling.enabled }}
   replicas: {{ .Values.replicaCount }}
   {{- end }}
+  serviceName: {{ template "notebooks.fullname" . }}
   selector:
     matchLabels:
       app: {{ template "notebooks.name" . }}
@@ -144,6 +145,28 @@ spec:
             periodSeconds: 30
           resources:
 {{ toYaml .Values.resources | indent 12 }}
+      initContainers:
+        - name: k8s-resource-schema-migrations
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          env:
+            - name: CRD_GROUP
+              value: {{ .Values.amalthea.crdApiGroup }}
+            - name: CRD_VERSION
+              value: {{ .Values.amalthea.crdApiVersion }}
+            - name: CRD_PLURAL
+              value: {{ .Values.amalthea.crdNames.plural }}
+            - name: K8S_NAMESPACE
+              value: {{ .Release.Namespace }}
+            - name: POD_NAME
+              valueFrom:
+                fieldRef:
+                  apiVersion: v1
+                  fieldPath: metadata.name
+          command:
+            - python
+          args:
+            - /resource_schema_migrations/run_all.py
       volumes:
         - name: server-options
           configMap:

diff --git a/renku_notebooks/api/classes/server.py b/renku_notebooks/api/classes/server.py
@@ -80,6 +80,12 @@ def _check_flask_config(self):
     @property
     def server_name(self):
         """Make the name that is used to identify a unique user session"""
+        prefix = current_app.config["RENKU_ANNOTATION_PREFIX"]
+        if self.js is not None:
+            try:
+                return self.js["metadata"]["annotations"][f"{prefix}servername"]
+            except KeyError:
+                pass  # make server name from params - required fields missing from k8s resource
         return make_server_name(
             self._user.safe_username,
             self.namespace,
@@ -294,6 +300,9 @@ def _get_session_manifest(self):
             f"{prefix}commit-sha": self.commit_sha,
             f"{prefix}gitlabProjectId": None,
             f"{prefix}safe-username": self._user.safe_username,
+            f"{prefix}schemaVersion": current_app.config[
+                "CURRENT_RESOURCE_SCHEMA_VERSION"
+            ],
         }
         annotations = {
             f"{prefix}commit-sha": self.commit_sha,

diff --git a/renku_notebooks/api/custom_fields.py b/renku_notebooks/api/custom_fields.py
@@ -126,3 +126,27 @@ def memory_value_validation(x):
     missing=config.SERVER_OPTIONS_DEFAULTS["lfs_auto_fetch"],
     required=False,
 )
+
+
+class LowercaseString(fields.String):
+    """Basic class for a string field that always serializes
+    and deserializes to lowercase string. Used for parameters that are
+    case insensitive."""
+
+    def _serialize(self, value, attr, obj, **kwargs):
+        value = super()._serialize(value, attr, obj, **kwargs)
+        if type(value) is str:
+            return value.lower()
+        else:
+            raise ValidationError(
+                f"The value {value} is not type string, but {type(value)}."
+            )
+
+    def _deserialize(self, value, attr, data, **kwargs):
+        value = super()._deserialize(value, attr, data, **kwargs)
+        if type(value) is str:
+            return value.lower()
+        else:
+            raise ValidationError(
+                f"The value {value} is not type string, but {type(value)}."
+            )
diff --git a/renku_notebooks/api/schemas.py b/renku_notebooks/api/schemas.py
@@ -26,6 +26,7 @@
     serverOptionRequestUrlValue,
     serverOptionRequestLfsAutoFetchValue,
     serverOptionRequestGpuValue,
+    LowercaseString,
 )
 from .classes.server import UserServer
 from .classes.user import User
@@ -72,8 +73,11 @@ def validate_server_options(self, data, **kwargs):
 class LaunchNotebookRequest(Schema):
     """Used to validate the requesting for launching a jupyter server"""
 
-    namespace = fields.Str(required=True)
-    project = fields.Str(required=True)
+    # namespaces in gitlab are NOT case sensitive
+    namespace = LowercaseString(required=True)
+    # project names in gitlab are NOT case sensitive
+    project = LowercaseString(required=True)
+    # branch names in gitlab are case sensitive
     branch = fields.Str(missing="master")
     commit_sha = fields.Str(required=True)
     notebook = fields.Str(missing=None)
@@ -309,9 +313,12 @@ class Meta:
         # passing unknown params does not error, but the params are ignored
         unknown = EXCLUDE
 
-    project = fields.String(required=False, default=None)
+    # project names in gitlab are NOT case sensitive
+    project = LowercaseString(required=False, default=None)
     commit_sha = fields.String(required=False, default=None)
-    namespace = fields.String(required=False, default=None)
+    # namespaces in gitlab are NOT case sensitive
+    namespace = LowercaseString(required=False, default=None)
+    # branch names in gitlab are case sensitive
     branch = fields.String(required=False, default=None)
 
 

diff --git a/renku_notebooks/config.py b/renku_notebooks/config.py
@@ -125,3 +125,4 @@
 SESSION_AFFINITY = safe_load(os.environ.get("SESSION_AFFINITY", "{}"))
 SESSION_TOLERATIONS = safe_load(os.environ.get("SESSION_TOLERATIONS", "[]"))
 ENFORCE_CPU_LIMITS = os.getenv("ENFORCE_CPU_LIMITS", "off")
+CURRENT_RESOURCE_SCHEMA_VERSION = "1"
diff --git a/resource_schema_migrations/README.md b/resource_schema_migrations/README.md
@@ -0,0 +1,23 @@
+# Resource Schema Migrations
+
+In most cases the notebooks code is compatible with old versions of the JuyterServer CRD and the values of existing
+JupyterServer resources. However sometimes changes are made that require all existing JupyterServer resources
+to be adjusted and migrated to avoid interruption of users' work.
+
+The migrations here are python scripts that utilize the python k8s SDK and modify
+existing resources in the cluster so that they properly function after a new version of the notebook
+service has been released.
+
+The migrations run in an `init` container before the notebook service starts. Since the notebook service
+is a `StatefulSet` the migrations will only truly run in the first pod of the deployment - i.e. the one whose
+name ends with `-0`.
+
+The migrations are made to be idempotent - which means that running a single migration more than once 
+will not provide different results. I.e. if you run the same migration twice, the second time
+around the changes will not be applied to any pods because the label of the jupyterserver resources
+will prevent applying a migration to the wrong servers or to servers that have already been patched.
+
+**WARNING**: The migrations do not support downgrading the notebook service. In the case 
+where it is required to downgrade the notebook service and migrations were applied by
+the upgrades that will be rolled back, then all active sessions should be deleted prior to
+the downgrade to avoid problems.
diff --git a/resource_schema_migrations/config.py b/resource_schema_migrations/config.py
@@ -0,0 +1,5 @@
+import os
+
+PAGINATION_LIMIT = 50
+POD_NAME = os.environ.get("POD_NAME")
+SCHEMA_VERSION_LABEL_NAME = "schemaVersion"
diff --git a/resource_schema_migrations/migration_1.py b/resource_schema_migrations/migration_1.py
@@ -0,0 +1,80 @@
+from kubernetes import client
+from kubernetes import config as k8s_config
+
+from run_all import parse_args
+import config
+
+
+def adjust_annotations(args):
+    k8s_config.load_config()
+    k8s_api = client.CustomObjectsApi(client.ApiClient())
+
+    print(
+        "Running migration 1: Patching projecName and namespace "
+        "in annotations to be lowercarse."
+    )
+
+    annotation_keys = [
+        f"{args.prefix}projectName",
+        f"{args.prefix}namespace",
+    ]
+    next_page = ""
+    dry_run_prefix = "DRY RUN: " if args.dry_run else ""
+
+    while (True):
+        jss = k8s_api.list_namespaced_custom_object(
+            version=args.api_version,
+            namespace=args.namespace,
+            plural=args.plural,
+            limit=config.PAGINATION_LIMIT,
+            group=args.group,
+            _continue=next_page,
+            # select only servers that do not have a schema version label
+            label_selector=f"!{args.prefix}{config.SCHEMA_VERSION_LABEL_NAME}",
+        )
+
+        for js in jss["items"]:
+            annotation_patches = {}
+            js_name = js["metadata"]["name"]
+            print(f"Checking session {js_name}")
+            for annotation_key in annotation_keys:
+                annotations = js["metadata"]["annotations"]
+                try:
+                    annotation_val = annotations[annotation_key]
+                except KeyError:
+                    print(f"Annotation {annotation_key} not found in {js_name}.")
+                    continue
+                if annotation_val != annotation_val.lower():
+                    print(
+                        f"{dry_run_prefix}Patching {js_name} for annotation {annotation_key}: "
+                        f"{annotation_val} --> {annotation_val.lower()}"
+                    )
+                    annotation_patches[annotation_key] = annotation_val.lower()
+                else:
+                    print(f"No need to patch {js_name} for annotation {annotation_key}")
+
+                patch = {
+                    "metadata": {
+                        "labels": {f"{args.prefix}{config.SCHEMA_VERSION_LABEL_NAME}": "1"},
+                    }
+                }
+                if len(annotation_patches.keys()) > 0:
+                    patch["metadata"]["annotations"] = annotation_patches
+                if not args.dry_run:
+                    k8s_api.patch_namespaced_custom_object(
+                        group=args.group,
+                        version=args.api_version,
+                        namespace=args.namespace,
+                        plural=args.plural,
+                        name=js_name,
+                        body=patch,
+                    )
+
+        next_page = jss["metadata"].get("continue")
+        if next_page is None or next_page == "":
+            break
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    adjust_annotations(args)
diff --git a/resource_schema_migrations/run_all.py b/resource_schema_migrations/run_all.py
@@ -0,0 +1,71 @@
+import argparse
+import os
+import re
+
+import migration_1
+import config
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-n",
+        "--namespace",
+        required=False,
+        default=os.environ.get("K8S_NAMESPACE", "default"),
+        type=str,
+        help="The k8s namespace where to run.",
+    )
+    parser.add_argument(
+        "-d",
+        "--dry-run",
+        action="store_true",
+        required=False,
+        help="If set, no changes are made at all.",
+    )
+    parser.add_argument(
+        "-g",
+        "--group",
+        type=str,
+        required=False,
+        default=os.environ.get("CRD_GROUP", "amalthea.dev"),
+        help="The group for the jupyterserver CRD.",
+    )
+    parser.add_argument(
+        "-a",
+        "--api-version",
+        type=str,
+        required=False,
+        default=os.environ.get("CRD_VERSION", "v1alpha1"),
+        help="The api version for the jupyterserver CRD.",
+    )
+    parser.add_argument(
+        "-p",
+        "--plural",
+        type=str,
+        required=False,
+        default=os.environ.get("CRD_PLURAL", "jupyterservers"),
+        help="The plural name for the jupyterserver CRD.",
+    )
+    parser.add_argument(
+        "--prefix",
+        type=str,
+        required=False,
+        default="renku.io/",
+        help="The renku k8s annotation prefix.",
+    )
+    args = parser.parse_args()
+    return args
+
+
+def run_all(args):
+    # Run all migrations in order
+    print("Starting k8s resource migrations.")
+    migration_1.adjust_annotations(args)
+
+
+if __name__ == "__main__":
+    if config.POD_NAME is not None and re.match(r".*-0$", config.POD_NAME) is not None:
+        # This is the first pod in the deployment - only that one will run the migrations
+        args = parse_args()
+        run_all(args)