From 7bac57c293136ae5cab74d77740d2fc5a03ac226 Mon Sep 17 00:00:00 2001 From: Min RK Date: Thu, 8 Jun 2023 12:51:16 +0200 Subject: [PATCH 01/18] add 'safe' slug scheme - always produces valid values - uses names, etc. as-is, when valid - uses stripped name--hash when needed --- kubespawner/slugs.py | 118 +++++++++++++++++++++++++++++++++++++++++ kubespawner/spawner.py | 53 +++++++++++++++--- 2 files changed, 165 insertions(+), 6 deletions(-) create mode 100644 kubespawner/slugs.py diff --git a/kubespawner/slugs.py b/kubespawner/slugs.py new file mode 100644 index 00000000..b1dd8a44 --- /dev/null +++ b/kubespawner/slugs.py @@ -0,0 +1,118 @@ +"""Tools for generating + +Requirements: + +- always valid for arbitary strings +- no collisions +""" +import hashlib +import re +import string + +_alphanum = set(string.ascii_letters + string.digits) +_alphanum_lower = set(string.ascii_lowercase + string.digits) +_lower_plus_hyphen = _alphanum_lower | {'-'} + +# patterns _do not_ need to cover length or start/end conditions, +# which are handled separately +_object_pattern = re.compile(r'^[a-z0-9\.-]+$') +_label_pattern = re.compile(r'^[a-z0-9\.-_]+$', flags=re.IGNORECASE) + + +def _is_valid_general( + s, starts_with=None, ends_with=None, pattern=None, max_length=None +): + """General is_valid check + + Checks rules: + """ + if not 1 <= len(s) <= max_length: + return False + if starts_with and not s.startswith(starts_with): + return False + if ends_with and not s.endswith(ends_with): + return False + if pattern and not pattern.match(s): + return False + return True + + +def is_valid_object_name(s): + """is_valid check for object names""" + # object rules: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + return _is_valid_general( + s, + starts_with=_alphanum_lower, + ends_with=_alphanum_lower, + pattern=_object_pattern, + max_length=255, + ) + + +def is_valid_label(s): + """is_valid check for label values""" + # label rules: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set + return _is_valid_general( + s, + starts_with=_alphanum, + ends_with=_alphanum, + pattern=_label_pattern, + max_length=63, + ) + + +def is_valid_default(s): + """Strict is_valid + + Returns True if it's valid for _all_ our known uses + + So we can more easily have a single is_valid check. + + - object names have stricter character rules, but have longer max length + - labels have short max length, but allow uppercase + """ + return _is_valid_general( + s, + starts_with=_alphanum_lower, + ends_with=_alphanum_lower, + pattern=_object_pattern, + max_length=63, + ) + + +def strip_and_hash(name): + """Generate an always-safe, unique string for any input""" + # make sure we start with a prefix + # quick, short hash to avoid name collsions + name_hash = hashlib.sha256(name).hexdigest()[:8] + safe_chars = [c for c in name.lower() if c in _lower_plus_hyphen] + safe_name = ''.join(safe_chars[:24]) + if not safe_name: + safe_name = 'x' + if safe_name.startswith('-'): + # starting with '-' is generally not allowed, + # start with 'j-' instead + # Question: always do this so it's consistent, instead of as-needed? + safe_name = f"j{safe_name}" + return f"{safe_name}--{name_hash}" + + +def safe_slug(name, is_valid=is_valid_default): + """Always generate a safe slug + + is_valid should be a callable that returns True if a given string follows appropriate rules, + and False if it does not. + + Given a string, if it's already valid, use it. + If it's not valid, follow a safe encoding scheme that ensures: + + 1. validity, and + 2. no collisions + """ + if '--' in name: + # don't accept any names that could collide with the safe slug + return strip_and_hash(name) + if is_valid(name): + return name + else: + return strip_and_hash(name) diff --git a/kubespawner/spawner.py b/kubespawner/spawner.py index 9e88ee0c..9a4566d0 100644 --- a/kubespawner/spawner.py +++ b/kubespawner/spawner.py @@ -25,6 +25,7 @@ from traitlets import ( Bool, Dict, + Enum, Integer, List, Unicode, @@ -44,6 +45,7 @@ make_service, ) from .reflector import ResourceReflector +from .slugs import is_valid_label, safe_slug from .utils import recursive_update @@ -294,6 +296,31 @@ def __init__(self, *args, **kwargs): """, ) + slug_scheme = Enum( + "escape", + choices=["escape", "safe"], + config=True, + help="""Select the scheme for producing slugs. + + Can be 'escape' or 'safe'. + + 'escape' is the default for backward-compatibility, + escaping strings for safety. + + but 'safe' is preferred as it produces both: + + - better values, where possible (no `-2d` inserted to escape hyphens) + - always valid names, avoiding issues where escaping produces invalid names, + stripping characters and appending hashes where needed for names + that are not already valid. + + + + + .. versionadded:: 6.1 + """, + ) + user_namespace_labels = Dict( config=True, help=""" @@ -1809,18 +1836,32 @@ def _expand_user_properties(self, template): safe_username = escapism.escape( self.user.name, safe=safe_chars, escape_char='-' ).lower() + + if self.slug_scheme == "safe": + # safe slug scheme escapes _after_ rendering the template + username = self.user.name + servername = raw_servername + else: + username = safe_username + servername = safe_servername + rendered = template.format( userid=self.user.id, - username=safe_username, + username=username, + escaped_username=safe_username, unescaped_username=self.user.name, legacy_escape_username=legacy_escaped_username, - servername=safe_servername, + servername=servername, unescaped_servername=raw_servername, + escaped_servername=safe_servername, hubnamespace=hub_namespace, ) # strip trailing - delimiter in case of empty servername. # k8s object names cannot have trailing - - return rendered.rstrip("-") + rendered = rendered.rstrip("-") + if self.slug_scheme == "safe": + rendered = safe_slug(rendered) + return rendered def _expand_all(self, src): if isinstance(src, list): @@ -1836,9 +1877,9 @@ def _build_common_labels(self, extra_labels): # Default set of labels, picked up from # https://github.com/helm/helm-www/blob/HEAD/content/en/docs/chart_best_practices/labels.md labels = { - 'hub.jupyter.org/username': escapism.escape( - self.user.name, safe=self.safe_chars, escape_char='-' - ).lower() + 'hub.jupyter.org/username': safe_slug( + self.user.name, is_valid=is_valid_label + ), } labels.update(extra_labels) labels.update(self.common_labels) From bdf0f37d49179ab07029a5b60d775ebfee1e9a84 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 12 Jun 2024 07:59:05 +0000 Subject: [PATCH 02/18] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- kubespawner/slugs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/kubespawner/slugs.py b/kubespawner/slugs.py index b1dd8a44..0a0b8fc8 100644 --- a/kubespawner/slugs.py +++ b/kubespawner/slugs.py @@ -5,6 +5,7 @@ - always valid for arbitary strings - no collisions """ + import hashlib import re import string From a1b524dd28e12af8855da9720bd0a102d1cb1a88 Mon Sep 17 00:00:00 2001 From: Min RK Date: Wed, 12 Jun 2024 14:42:01 +0200 Subject: [PATCH 03/18] toward compatible transition for safe slugs - safe scheme by default - preserve old scheme if found - persist pvc_name and attempt to handle lack of persistence from before - add handle_legacy_names and use_legacy_labels configuration for legacy labels --- kubespawner/slugs.py | 10 +- kubespawner/spawner.py | 267 ++++++++++++++++++++++++++++++++++------- tests/test_slugs.py | 21 ++++ 3 files changed, 248 insertions(+), 50 deletions(-) create mode 100644 tests/test_slugs.py diff --git a/kubespawner/slugs.py b/kubespawner/slugs.py index 0a0b8fc8..06ebba57 100644 --- a/kubespawner/slugs.py +++ b/kubespawner/slugs.py @@ -1,4 +1,4 @@ -"""Tools for generating +"""Tools for generating slugs like k8s object names and labels Requirements: @@ -10,9 +10,9 @@ import re import string -_alphanum = set(string.ascii_letters + string.digits) -_alphanum_lower = set(string.ascii_lowercase + string.digits) -_lower_plus_hyphen = _alphanum_lower | {'-'} +_alphanum = tuple(string.ascii_letters + string.digits) +_alphanum_lower = tuple(string.ascii_lowercase + string.digits) +_lower_plus_hyphen = _alphanum_lower + ('-',) # patterns _do not_ need to cover length or start/end conditions, # which are handled separately @@ -85,7 +85,7 @@ def strip_and_hash(name): """Generate an always-safe, unique string for any input""" # make sure we start with a prefix # quick, short hash to avoid name collsions - name_hash = hashlib.sha256(name).hexdigest()[:8] + name_hash = hashlib.sha256(name.encode("utf8")).hexdigest()[:8] safe_chars = [c for c in name.lower() if c in _lower_plus_hyphen] safe_name = ''.join(safe_chars[:24]) if not safe_name: diff --git a/kubespawner/spawner.py b/kubespawner/spawner.py index d4ac3075..9c6d5cfa 100644 --- a/kubespawner/spawner.py +++ b/kubespawner/spawner.py @@ -185,6 +185,8 @@ def __init__(self, *args, **kwargs): # By now, all the traitlets have been set, so we can use them to # compute other attributes + # namespace, pod_name, dns_name are persisted in state + # thse same assignments should match clear_state if self.enable_user_namespaces: self.namespace = self._expand_user_properties(self.user_namespace_template) self.log.info(f"Using user namespace: {self.namespace}") @@ -193,9 +195,13 @@ def __init__(self, *args, **kwargs): self.dns_name = self.dns_name_template.format( namespace=self.namespace, name=self.pod_name ) + self.secret_name = self._expand_user_properties(self.secret_name_template) self.pvc_name = self._expand_user_properties(self.pvc_name_template) + # _pvc_exists indicates whether we've checked at least once that our pvc name is right + # only persist pvc name in state if pvc exists + self._pvc_exists = False # initialized from load_state or start if self.working_dir: self.working_dir = self._expand_user_properties(self.working_dir) if self.port == 0: @@ -316,27 +322,44 @@ def __init__(self, *args, **kwargs): ) slug_scheme = Enum( - "escape", - choices=["escape", "safe"], + default_value="safe", + values=["safe", "escape"], config=True, - help="""Select the scheme for producing slugs. + help="""Select the scheme for producing slugs such as pod names, etc. - Can be 'escape' or 'safe'. + Can be 'safe' or 'escape'. - 'escape' is the default for backward-compatibility, - escaping strings for safety. + 'escape' is the legacy scheme, used in kubespawner < 7. + Pick this to minimize changes when upgrading from kubespawner 6. + + The way templates are computed is different between the two schemes: + + 'escape' scheme: + + - does not guarantee correct names, e.g. does not handle capital letters or length + - escapes fields one at a time in templates: + - `{username}` is the _escaped_ username + - `{servername}` is the _escaped_ server name + + 'safe' scheme: + + - should guarantee correct names + - escapes the whole string at once + - escapes only if needed + - in templates: + - `{username}` is the user's actual name + - `{escaped_username}` is the escaped username, used in the previous scheme + - `{servername}` is the user's actual name + - `{escaped_servername}` is the escaped server name, used in the previous scheme - but 'safe' is preferred as it produces both: + 'safe' is the default and preferred as it produces both: - better values, where possible (no `-2d` inserted to escape hyphens) - - always valid names, avoiding issues where escaping produces invalid names, + - always valid names, avoiding issues where escaping produced invalid names, stripping characters and appending hashes where needed for names that are not already valid. - - - - .. versionadded:: 6.1 + .. versionadded:: 7 """, ) @@ -687,10 +710,6 @@ def _deprecated_changed(self, change): { 'app.kubernetes.io/name': 'jupyterhub', 'app.kubernetes.io/managed-by': 'kubespawner', - # app and heritage are older variants of the modern - # app.kubernetes.io labels - 'app': 'jupyterhub', - 'heritage': 'jupyterhub', }, config=True, help=""" @@ -702,6 +721,23 @@ def _deprecated_changed(self, change): """, ) + @default("common_labels") + def _common_labels_default(self): + labels = { + 'app.kubernetes.io/name': 'jupyterhub', + 'app.kubernetes.io/managed-by': 'kubespawner', + } + if self.use_legacy_labels: + labels.update( + { + # app and heritage are older variants of the modern + # app.kubernetes.io labels + 'app': 'jupyterhub', + 'heritage': 'jupyterhub', + } + ) + return labels + extra_labels = Dict( config=True, help=""" @@ -1373,6 +1409,31 @@ def _validate_image_pull_secrets(self, proposal): """, ) + handle_legacy_names = Bool( + True, + config=True, + help="""handle legacy names and labels + + kubespawner 7 changed the scheme for computing names and labels to be more reliably valid. + In order to preserve backward compatibility, the old names must be handled in some places. + + You can safely disable this if no PVCs were created or running servers were started + before upgrading to kubespawner 7. + """, + ) + + use_legacy_labels = Bool( + True, + config=True, + help="""Apply legacy labels + + Kubespawner 7 adopted standard labels. + These can be disabled if no running Spawners were started before upgrading to kubespawner 7. + + use_legacy_labels MUST be True if handle_legacy_names is True. + """, + ) + # FIXME: Don't override 'default_value' ("") or 'allow_none' (False) (Breaking change) scheduler_name = Unicode( None, @@ -1839,9 +1900,12 @@ def _set_deprecated(name, new_name, version, self, value): ) del _deprecated_name - def _expand_user_properties(self, template): + def _expand_user_properties(self, template, slug_scheme=None): # Make sure username and servername match the restrictions for DNS labels # Note: '-' is not in safe_chars, as it is being used as escape character + if slug_scheme is None: + slug_scheme = self.slug_scheme + safe_chars = set(string.ascii_lowercase + string.digits) raw_servername = self.name or '' @@ -1860,13 +1924,33 @@ def _expand_user_properties(self, template): self.user.name, safe=safe_chars, escape_char='-' ).lower() - if self.slug_scheme == "safe": + if slug_scheme == "safe": # safe slug scheme escapes _after_ rendering the template username = self.user.name servername = raw_servername - else: + # but not escaping before joining {username}--{servername} + # reintroduces the possibility of collision + # e.g. {user--name}--{server} == {user}--{name--server} + # and {user-}--{server} == {user}--{-server} + # in that case, double-escape username and server name. + # double-escape will not collide, + # because it will match always match this condition and be escaped again + if ( + '--' in username + or '--' in servername + or username.endswith("-") + or servername.startswith("-") + ): + username = safe_slug(username) + if servername: + servername = safe_slug(servername) + elif slug_scheme == "escape": username = safe_username servername = safe_servername + else: + raise ValueError( + f"slug scheme must be 'safe' or 'escape', not '{slug_scheme}'" + ) rendered = template.format( userid=self.user.id, @@ -1880,10 +1964,16 @@ def _expand_user_properties(self, template): hubnamespace=hub_namespace, ) # strip trailing - delimiter in case of empty servername. - # k8s object names cannot have trailing - - rendered = rendered.rstrip("-") - if self.slug_scheme == "safe": + # k8s object names cannot have trailing '-' + if slug_scheme == "safe": + if not (username.endswith("-") or self.name.endswith("-")): + # strip trailing - delimiter _unless_ it's actually the end of the user or server name + rendered = rendered.rstrip("-") + # 'safe' slug scheme does processing after rendered = safe_slug(rendered) + else: + # safe to strip trailing - in escape scheme because escaped username/servername will never end in '-' + rendered = rendered.rstrip("-") return rendered def _expand_env(self, env): @@ -1924,12 +2014,17 @@ def _build_pod_labels(self, extra_labels): labels.update( { 'app.kubernetes.io/component': self.component_label, - 'component': self.component_label, - 'hub.jupyter.org/servername': escapism.escape( - self.name, safe=self.safe_chars, escape_char='-' - ).lower(), + 'hub.jupyter.org/servername': safe_slug( + self.name, is_valid=is_valid_label + ), } ) + if self.use_legacy_labels: + labels.update( + { + 'component': self.component_label, + } + ) return labels def _build_common_annotations(self, extra_annotations): @@ -2150,15 +2245,20 @@ def get_pvc_manifest(self): labels = self._build_common_labels(self._expand_all(self.storage_extra_labels)) labels.update( { - # The component label has been set to singleuser-storage, but should - # probably have been set to singleuser-server (self.component_label) - # as that ties it to the user pods kubespawner creates. Due to that, - # the newly introduced label app.kubernetes.io/component gets - # singleuser-server (self.component_label) as a value instead. 'app.kubernetes.io/component': self.component_label, - 'component': 'singleuser-storage', } ) + if self.use_legacy_labels: + labels.update( + { + # The component label has been set to singleuser-storage, but should + # probably have been set to singleuser-server (self.component_label) + # as that ties it to the user pods kubespawner creates. Due to that, + # the newly introduced label app.kubernetes.io/component gets + # singleuser-server (self.component_label) as a value instead. + 'component': 'singleuser-storage', + } + ) annotations = self._build_common_annotations( self._expand_all(self.storage_extra_annotations) @@ -2214,9 +2314,15 @@ def get_state(self): calculated dynamically, or it changes between restarts. """ state = super().get_state() + # these should only be persisted if our pod is running + # but we don't have a sync check for that state['pod_name'] = self.pod_name state['namespace'] = self.namespace state['dns_name'] = self.dns_name + + # persist pvc name only if it's established that it exists + if self._pvc_exists: + state['pvc_name'] = self.pvc_name return state def get_env(self): @@ -2247,7 +2353,9 @@ def load_state(self, state): be the case. This allows us to continue serving from the old pods with the old names. - For a similar reason, we also save the namespace. + For a similar reason, we also save the namespace, dns name, pvc name. + Anything where changing a template may break something after Hub restart + should be persisted here. """ if 'pod_name' in state: self.pod_name = state['pod_name'] @@ -2258,6 +2366,29 @@ def load_state(self, state): if 'dns_name' in state: self.dns_name = state['dns_name'] + if 'pvc_name' in state: + self.pvc_name = state['pvc_name'] + # indicate that we've already checked that self.pvc_name is correct + # and we don't need to check for legacy names anymore + self._pvc_exists = True + + def clear_state(self): + """Reset state for a stopped server + + This should reset all state values to new values, + except those that should persist across Spawner restarts (e.g. pvc_name) + """ + super().clear_state() + # this should be the same initialization as __init__ / trait defaults + # this allows changing config to take effect after a server restart + if self.enable_user_namespaces: + self.namespace = self._expand_user_properties(self.user_namespace_template) + + self.pod_name = self._expand_user_properties(self.pod_name_template) + self.dns_name = self.dns_name_template.format( + namespace=self.namespace, name=self.pod_name + ) + async def poll(self): """ Check if the pod is still running. @@ -2526,20 +2657,18 @@ async def _start_watching_pods(self, replace=False): If replace=True, a running pod reflector will be stopped and a new one started (for recovering from possible errors). """ + if self.use_legacy_labels: + # For safe upgrades to new labels, monitor on old labels + # it is safe to disable this as soon as the last server _started_ + # was started after upgrading to kubespawner 7 + # Related to https://github.com/jupyterhub/kubespawner/issues/834 + labels = {"component": self.component_label} + else: + labels = {"app.kubernetes.io/component": self.component_label} return await self._start_reflector( kind="pods", reflector_class=PodReflector, - # NOTE: We monitor resources with the old component label instead of - # the modern app.kubernetes.io/component label. A change here - # is only non-breaking if we can assume the running resources - # monitored can be detected by either old or new labels. - # - # The modern labels were added to resources created by - # KubeSpawner 6.3 first adopted in z2jh 4.0. - # - # Related to https://github.com/jupyterhub/kubespawner/issues/834 - # - labels={"component": self.component_label}, + labels=labels, omit_namespace=self.enable_user_namespaces, replace=replace, ) @@ -2624,6 +2753,7 @@ async def _make_create_pvc_request(self, pvc, request_timeout): # error for quota being exceeded. This is because quota is # checked before determining if the PVC needed to be # created. + pvc_name = pvc.metadata.name try: self.log.info( @@ -2738,6 +2868,25 @@ async def _make_create_resource_request(self, kind, manifest): else: return True + async def _check_pvc_exists(self, pvc_name, namespace): + """Return True/False if a pvc exists""" + try: + await exponential_backoff( + partial( + self.api.read_namespaced_persistent_volume_claim, + name=pvc_name, + namespace=self.namespace, + ), + f"Could not check if PVC {pvc_name} exists", + timeout=self.k8s_api_request_retry_timeout, + ) + except ApiException as e: + if e.status == 404: + return False + else: + raise + return True + async def _start(self): """Start the user's pod""" @@ -2778,8 +2927,34 @@ async def _start(self): self._last_event = events[-1]["metadata"]["uid"] if self.storage_pvc_ensure: - pvc = self.get_pvc_manifest() + if self.handle_legacy_names and not self._pvc_exists: + # pvc name wasn't reliably persisted before kubespawner 17, so if the name changed, + # check if a pvc with the legacy name exists and use it + # this will be persisted in state on next launch in the future, + # so the comparison below will be False for launches after the first. + legacy_pvc_name = self._expand_user_properties( + self.pvc_name_template, slug_scheme="escape" + ) + if legacy_pvc_name != self.pvc_name: + self.log.debug( + f"Checking for legacy-named pvc {legacy_pvc_name} for {self.user.name}" + ) + if await self._check_pvc_exists(self.pvc_name, self.namespace): + # if current name exists: use it + self._pvc_exists = True + else: + # current name doesn't exist, check if legacy name exists + if await self._check_pvc_exists( + legacy_pvc_name, self.namespace + ): + # legacy name exists, use it to avoid data loss + self.log.warning( + f"Using legacy pvc {legacy_pvc_name} for {self.user.name}" + ) + self.pvc_name = legacy_pvc_name + self._pvc_exists = True + pvc = self.get_pvc_manifest() # If there's a timeout, just let it propagate await exponential_backoff( partial( @@ -2789,6 +2964,8 @@ async def _start(self): # Each req should be given k8s_api_request_timeout seconds. timeout=self.k8s_api_request_retry_timeout, ) + # indicate that pvc name is known and should be persisted + self._pvc_exists = True # If we run into a 409 Conflict error, it means a pod with the # same name already exists. We stop it, wait for it to stop, and diff --git a/tests/test_slugs.py b/tests/test_slugs.py new file mode 100644 index 00000000..6f281417 --- /dev/null +++ b/tests/test_slugs.py @@ -0,0 +1,21 @@ +import pytest + +from kubespawner.slugs import safe_slug + + +@pytest.mark.parametrize( + "name, expected", + [ + ("jupyter-alex", "jupyter-alex"), + ("jupyter-Alex", "jupyter-alex--3a1c285c"), + ("jupyter-üni", "jupyter-ni--a5aaf5dd"), + ("endswith-", "endswith---165f1166"), + ("-start", "j-start--f587e2dc"), + ("j-start--f587e2dc", "j-start--f587e2dc--f007ef7c"), + ("x" * 65, "xxxxxxxxxxxxxxxxxxxxxxxx--9537c5fd"), + ("x" * 66, "xxxxxxxxxxxxxxxxxxxxxxxx--6eb879f1"), + ], +) +def test_safe_slug(name, expected): + slug = safe_slug(name) + assert slug == expected From 058afd57c97468ac73338bfbd14226381d0fae1e Mon Sep 17 00:00:00 2001 From: Min RK Date: Mon, 17 Jun 2024 09:06:03 +0200 Subject: [PATCH 04/18] revert use_legacy_labels config no need to make legacy component labels optional --- kubespawner/spawner.py | 77 ++++++++++++------------------------------ 1 file changed, 22 insertions(+), 55 deletions(-) diff --git a/kubespawner/spawner.py b/kubespawner/spawner.py index 9c6d5cfa..64c48acd 100644 --- a/kubespawner/spawner.py +++ b/kubespawner/spawner.py @@ -710,6 +710,10 @@ def _deprecated_changed(self, change): { 'app.kubernetes.io/name': 'jupyterhub', 'app.kubernetes.io/managed-by': 'kubespawner', + # app and heritage are older variants of the modern + # app.kubernetes.io labels + 'app': 'jupyterhub', + 'heritage': 'jupyterhub', }, config=True, help=""" @@ -721,23 +725,6 @@ def _deprecated_changed(self, change): """, ) - @default("common_labels") - def _common_labels_default(self): - labels = { - 'app.kubernetes.io/name': 'jupyterhub', - 'app.kubernetes.io/managed-by': 'kubespawner', - } - if self.use_legacy_labels: - labels.update( - { - # app and heritage are older variants of the modern - # app.kubernetes.io labels - 'app': 'jupyterhub', - 'heritage': 'jupyterhub', - } - ) - return labels - extra_labels = Dict( config=True, help=""" @@ -1422,18 +1409,6 @@ def _validate_image_pull_secrets(self, proposal): """, ) - use_legacy_labels = Bool( - True, - config=True, - help="""Apply legacy labels - - Kubespawner 7 adopted standard labels. - These can be disabled if no running Spawners were started before upgrading to kubespawner 7. - - use_legacy_labels MUST be True if handle_legacy_names is True. - """, - ) - # FIXME: Don't override 'default_value' ("") or 'allow_none' (False) (Breaking change) scheduler_name = Unicode( None, @@ -2014,17 +1989,12 @@ def _build_pod_labels(self, extra_labels): labels.update( { 'app.kubernetes.io/component': self.component_label, + 'component': self.component_label, 'hub.jupyter.org/servername': safe_slug( self.name, is_valid=is_valid_label ), } ) - if self.use_legacy_labels: - labels.update( - { - 'component': self.component_label, - } - ) return labels def _build_common_annotations(self, extra_annotations): @@ -2245,20 +2215,15 @@ def get_pvc_manifest(self): labels = self._build_common_labels(self._expand_all(self.storage_extra_labels)) labels.update( { + # The component label has been set to singleuser-storage, but should + # probably have been set to singleuser-server (self.component_label) + # as that ties it to the user pods kubespawner creates. Due to that, + # the newly introduced label app.kubernetes.io/component gets + # singleuser-server (self.component_label) as a value instead. 'app.kubernetes.io/component': self.component_label, + 'component': 'singleuser-storage', } ) - if self.use_legacy_labels: - labels.update( - { - # The component label has been set to singleuser-storage, but should - # probably have been set to singleuser-server (self.component_label) - # as that ties it to the user pods kubespawner creates. Due to that, - # the newly introduced label app.kubernetes.io/component gets - # singleuser-server (self.component_label) as a value instead. - 'component': 'singleuser-storage', - } - ) annotations = self._build_common_annotations( self._expand_all(self.storage_extra_annotations) @@ -2657,18 +2622,20 @@ async def _start_watching_pods(self, replace=False): If replace=True, a running pod reflector will be stopped and a new one started (for recovering from possible errors). """ - if self.use_legacy_labels: - # For safe upgrades to new labels, monitor on old labels - # it is safe to disable this as soon as the last server _started_ - # was started after upgrading to kubespawner 7 - # Related to https://github.com/jupyterhub/kubespawner/issues/834 - labels = {"component": self.component_label} - else: - labels = {"app.kubernetes.io/component": self.component_label} return await self._start_reflector( kind="pods", reflector_class=PodReflector, - labels=labels, + # NOTE: We monitor resources with the old component label instead of + # the modern app.kubernetes.io/component label. A change here + # is only non-breaking if we can assume the running resources + # monitored can be detected by either old or new labels. + # + # The modern labels were added to resources created by + # KubeSpawner 7 first adopted in z2jh 4.0. + # + # Related to https://github.com/jupyterhub/kubespawner/issues/834 + # + labels={"component": self.component_label}, omit_namespace=self.enable_user_namespaces, replace=replace, ) From 6968b68ca9aa0997d231d26f3e4bd0e4f2030ec4 Mon Sep 17 00:00:00 2001 From: Min RK Date: Mon, 17 Jun 2024 11:17:33 +0200 Subject: [PATCH 05/18] let safe slug scheme live side-by-side with escape scheme - add user_server to template namespace - add `safe_` prefixed fields to template namespace using new 'safe' slug scheme - add pod_name, namespace, pvc_name to template namespace --- kubespawner/slugs.py | 63 +++++++++++++------ kubespawner/spawner.py | 134 +++++++++++++++++++++++++---------------- tests/test_slugs.py | 54 ++++++++++++++--- 3 files changed, 174 insertions(+), 77 deletions(-) diff --git a/kubespawner/slugs.py b/kubespawner/slugs.py index 06ebba57..6044e75c 100644 --- a/kubespawner/slugs.py +++ b/kubespawner/slugs.py @@ -19,15 +19,23 @@ _object_pattern = re.compile(r'^[a-z0-9\.-]+$') _label_pattern = re.compile(r'^[a-z0-9\.-_]+$', flags=re.IGNORECASE) +# match two or more hyphens +_hyphen_plus_pattern = re.compile('--+') + +# length of hash suffix +_hash_length = 8 + def _is_valid_general( - s, starts_with=None, ends_with=None, pattern=None, max_length=None + s, starts_with=None, ends_with=None, pattern=None, min_length=None, max_length=None ): """General is_valid check Checks rules: """ - if not 1 <= len(s) <= max_length: + if min_length and len(s) < min_length: + return False + if max_length and len(s) > max_length: return False if starts_with and not s.startswith(starts_with): return False @@ -47,12 +55,16 @@ def is_valid_object_name(s): ends_with=_alphanum_lower, pattern=_object_pattern, max_length=255, + min_length=1, ) def is_valid_label(s): """is_valid check for label values""" # label rules: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set + if not s: + # empty strings are valid labels + return True return _is_valid_general( s, starts_with=_alphanum, @@ -77,28 +89,40 @@ def is_valid_default(s): starts_with=_alphanum_lower, ends_with=_alphanum_lower, pattern=_object_pattern, + min_length=1, max_length=63, ) -def strip_and_hash(name): - """Generate an always-safe, unique string for any input""" - # make sure we start with a prefix - # quick, short hash to avoid name collsions - name_hash = hashlib.sha256(name.encode("utf8")).hexdigest()[:8] - safe_chars = [c for c in name.lower() if c in _lower_plus_hyphen] - safe_name = ''.join(safe_chars[:24]) +def strip_and_hash(name, max_length=32): + """Generate an always-safe, unique string for any input + + truncates name to max_length - len(hash_suffix) to fit in max_length + after adding hash suffix + """ + name_length = max_length - (_hash_length + 3) + if name_length < 1: + raise ValueError(f"Cannot make safe names shorter than {_hash_length + 4}") + # quick, short hash to avoid name collisions + name_hash = hashlib.sha256(name.encode("utf8")).hexdigest()[:_hash_length] + # compute safe slug from name (don't worry about collisions, hash handles that) + # cast to lowercase, exclude all but lower & hyphen + safe_name = ''.join([c for c in name.lower() if c in _lower_plus_hyphen]) + # strip leading '-' + # squash repeated '--' to one + safe_name = _hyphen_plus_pattern.sub("-", safe_name.lstrip("-")) + # truncate to 24 chars, strip trailing '-' + safe_name = safe_name[:name_length].rstrip("-") if not safe_name: + # make sure it's non-empty safe_name = 'x' - if safe_name.startswith('-'): - # starting with '-' is generally not allowed, - # start with 'j-' instead - # Question: always do this so it's consistent, instead of as-needed? - safe_name = f"j{safe_name}" - return f"{safe_name}--{name_hash}" + # due to stripping of '-' above, + # the result will always have _exactly_ '---', never '--' nor '----' + # use '---' to avoid colliding with `{username}--{servername}` template join + return f"{safe_name}---{name_hash}" -def safe_slug(name, is_valid=is_valid_default): +def safe_slug(name, is_valid=is_valid_default, max_length=None): """Always generate a safe slug is_valid should be a callable that returns True if a given string follows appropriate rules, @@ -112,8 +136,9 @@ def safe_slug(name, is_valid=is_valid_default): """ if '--' in name: # don't accept any names that could collide with the safe slug - return strip_and_hash(name) - if is_valid(name): + return strip_and_hash(name, max_length=max_length or 32) + # allow max_length override for truncated sub-strings + if is_valid(name) and (max_length is None or len(name) <= max_length): return name else: - return strip_and_hash(name) + return strip_and_hash(name, max_length=max_length or 32) diff --git a/kubespawner/spawner.py b/kubespawner/spawner.py index 64c48acd..0feb1099 100644 --- a/kubespawner/spawner.py +++ b/kubespawner/spawner.py @@ -186,9 +186,11 @@ def __init__(self, *args, **kwargs): # compute other attributes # namespace, pod_name, dns_name are persisted in state - # thse same assignments should match clear_state + # these same assignments should match clear_state if self.enable_user_namespaces: - self.namespace = self._expand_user_properties(self.user_namespace_template) + self.namespace = self._expand_user_properties( + self.user_namespace_template, scheme="slug" + ) self.log.info(f"Using user namespace: {self.namespace}") self.pod_name = self._expand_user_properties(self.pod_name_template) @@ -542,7 +544,7 @@ def _namespace_default(self): ) pod_name_template = Unicode( - 'jupyter-{username}--{servername}', + 'jupyter-{user_server}', config=True, help=""" Template to use to form the name of user's pods. @@ -573,7 +575,7 @@ def _namespace_default(self): The IP address (or hostname) of user's pods which KubeSpawner connects to. If you do not specify the value, KubeSpawner will use the pod IP. - e.g. `jupyter-{username}--{servername}.notebooks.jupyterhub.svc.cluster.local`, + e.g. `{pod_name}.notebooks.jupyterhub.svc.cluster.local`, `{username}`, `{userid}`, `{servername}`, `{hubnamespace}`, `{unescaped_username}`, and `{unescaped_servername}` will be expanded if @@ -617,7 +619,7 @@ def _namespace_default(self): """, ) pvc_name_template = Unicode( - 'claim-{username}--{servername}', + 'claim-{user_server}', config=True, help=""" Template to use to form the name of user's pvc. @@ -1884,71 +1886,100 @@ def _expand_user_properties(self, template, slug_scheme=None): safe_chars = set(string.ascii_lowercase + string.digits) raw_servername = self.name or '' - safe_servername = escapism.escape( + escaped_servername = escapism.escape( raw_servername, safe=safe_chars, escape_char='-' ).lower() + # TODO: measure string template? + # for object names, max is 255, so very unlikely to exceed + # but labels are only 64, so adding a few fields together could easily exceed length limit + # if the per-field limit is more than half the whole budget + # for now, recommend {user_server} anywhere both username and servername are desired + _slug_max_length = 48 + + if raw_servername: + safe_servername = safe_slug(raw_servername, max_length=_slug_max_length) + else: + safe_servername = "" + + username = raw_username = self.user.name + safe_username = safe_slug(raw_username, max_length=_slug_max_length) + + # compute safe_user_server = {username}--{servername} + if ( + # escaping after joining means + # possible collision with `--` delimiter + '--' in username + or '--' in raw_servername + or username.endswith("-") + or raw_servername.startswith("-") + # length exceeded + or len(safe_username) + len(safe_username) + 2 > _slug_max_length + ): + # need double-escape if there's a chance of collision + safe_user_server = safe_slug( + f"{safe_username}--{safe_servername}", max_length=_slug_max_length + ) + else: + if raw_servername: + safe_user_server = safe_slug( + f"{username}--{raw_servername}", max_length=_slug_max_length + ) + else: + safe_user_server = safe_username + hub_namespace = self._namespace_default() if hub_namespace == "default": hub_namespace = "user" - legacy_escaped_username = ''.join( - [s if s in safe_chars else '-' for s in self.user.name.lower()] - ) - safe_username = escapism.escape( + escaped_username = escapism.escape( self.user.name, safe=safe_chars, escape_char='-' ).lower() if slug_scheme == "safe": - # safe slug scheme escapes _after_ rendering the template - username = self.user.name - servername = raw_servername - # but not escaping before joining {username}--{servername} - # reintroduces the possibility of collision - # e.g. {user--name}--{server} == {user}--{name--server} - # and {user-}--{server} == {user}--{-server} - # in that case, double-escape username and server name. - # double-escape will not collide, - # because it will match always match this condition and be escaped again - if ( - '--' in username - or '--' in servername - or username.endswith("-") - or servername.startswith("-") - ): - username = safe_slug(username) - if servername: - servername = safe_slug(servername) - elif slug_scheme == "escape": username = safe_username servername = safe_servername + user_server = safe_user_server + elif slug_scheme == "escape": + # backward-compatible 'escape' scheme is not safe + username = escaped_username + servername = escaped_servername + if servername: + user_server = f"{escaped_username}--{escaped_servername}" + else: + user_server = escaped_username else: raise ValueError( f"slug scheme must be 'safe' or 'escape', not '{slug_scheme}'" ) - rendered = template.format( + ns = dict( + # raw values, always consistent userid=self.user.id, - username=username, - escaped_username=safe_username, unescaped_username=self.user.name, - legacy_escape_username=legacy_escaped_username, - servername=servername, unescaped_servername=raw_servername, - escaped_servername=safe_servername, hubnamespace=hub_namespace, + # scheme-dependent + username=username, + servername=servername, + user_server=user_server, + # safe values (new 'safe' scheme) + safe_username=safe_username, + safe_servername=safe_servername, + safe_user_server=safe_user_server, + # legacy values (old 'escape' scheme) + escaped_username=escaped_username, + escaped_servername=escaped_servername, + escaped_user_server=f"{escaped_username}--{escaped_servername}", ) - # strip trailing - delimiter in case of empty servername. - # k8s object names cannot have trailing '-' - if slug_scheme == "safe": - if not (username.endswith("-") or self.name.endswith("-")): - # strip trailing - delimiter _unless_ it's actually the end of the user or server name - rendered = rendered.rstrip("-") - # 'safe' slug scheme does processing after - rendered = safe_slug(rendered) - else: - # safe to strip trailing - in escape scheme because escaped username/servername will never end in '-' - rendered = rendered.rstrip("-") + # add some resolved values so they can be referred to. + # these may not be defined yet (i.e. when computing the values themselves). + for attr_name in ("pod_name", "pvc_name", "namespace"): + ns[attr_name] = getattr(self, attr_name, f"{attr_name}_unavailable!") + + rendered = template.format(**ns) + # strip trailing '-' in case of old '{username}--{servername}' template + rendered = rendered.rstrip("-") return rendered def _expand_env(self, env): @@ -2054,12 +2085,11 @@ def _get_pod_url(self, pod): hostname = f"[{hostname}]" if self.pod_connect_ip: + # pod_connect_ip is not a slug hostname = ".".join( [ - s.rstrip("-") - for s in self._expand_user_properties(self.pod_connect_ip).split( - "." - ) + self._expand_user_properties(s) if '{' in s else s + for s in self.pod_connect_ip.split(".") ] ) @@ -2277,6 +2307,8 @@ def get_state(self): We also save the namespace and DNS name for use cases where the namespace is calculated dynamically, or it changes between restarts. + + `pvc_name` is also saved, to prevent data loss if template changes across restarts. """ state = super().get_state() # these should only be persisted if our pod is running diff --git a/tests/test_slugs.py b/tests/test_slugs.py index 6f281417..c3cf2c7f 100644 --- a/tests/test_slugs.py +++ b/tests/test_slugs.py @@ -1,21 +1,61 @@ import pytest -from kubespawner.slugs import safe_slug +from kubespawner.slugs import is_valid_label, safe_slug @pytest.mark.parametrize( "name, expected", [ ("jupyter-alex", "jupyter-alex"), - ("jupyter-Alex", "jupyter-alex--3a1c285c"), - ("jupyter-üni", "jupyter-ni--a5aaf5dd"), + ("jupyter-Alex", "jupyter-alex---3a1c285c"), + ("jupyter-üni", "jupyter-ni---a5aaf5dd"), ("endswith-", "endswith---165f1166"), - ("-start", "j-start--f587e2dc"), - ("j-start--f587e2dc", "j-start--f587e2dc--f007ef7c"), - ("x" * 65, "xxxxxxxxxxxxxxxxxxxxxxxx--9537c5fd"), - ("x" * 66, "xxxxxxxxxxxxxxxxxxxxxxxx--6eb879f1"), + ("-start", "start---f587e2dc"), + ("username--servername", "username-servername---d957f1de"), + ("start---f587e2dc", "start-f587e2dc---cc5bb9c9"), + pytest.param("x" * 63, "x" * 63, id="x63"), + pytest.param("x" * 64, "xxxxxxxxxxxxxxxxxxxxx---7ce10097", id="x64"), + pytest.param("x" * 65, "xxxxxxxxxxxxxxxxxxxxx---9537c5fd", id="x65"), + ("", "x---e3b0c442"), ], ) def test_safe_slug(name, expected): slug = safe_slug(name) assert slug == expected + + +@pytest.mark.parametrize( + "max_length, length, expected", + [ + (16, 16, "x" * 16), + (16, 17, "xxxxx---d04fd59f"), + (11, 16, "error"), + (12, 16, "x---9c572959"), + ], +) +def test_safe_slug_max_length(max_length, length, expected): + name = "x" * length + if expected == "error": + with pytest.raises(ValueError): + safe_slug(name, max_length=max_length) + return + + slug = safe_slug(name, max_length=max_length) + assert slug == expected + + +@pytest.mark.parametrize( + "name, expected", + [ + ("", ""), + ("x", "x"), + ("-x", "x---a4209624"), + ("x-", "x---c8b60efc"), + pytest.param("x" * 63, "x" * 63, id="x64"), + pytest.param("x" * 64, "xxxxxxxxxxxxxxxxxxxxx---7ce10097", id="x64"), + pytest.param("x" * 65, "xxxxxxxxxxxxxxxxxxxxx---9537c5fd", id="x65"), + ], +) +def test_safe_slug_label(name, expected): + slug = safe_slug(name, is_valid=is_valid_label) + assert slug == expected From 45299ce26c67b93f92c628c0bf3f06e43d90e8a0 Mon Sep 17 00:00:00 2001 From: Min RK Date: Mon, 17 Jun 2024 16:12:12 +0200 Subject: [PATCH 06/18] add multi_slug mechanism for multi-word slugs (username--servername---hash) --- kubespawner/slugs.py | 74 +++++++++++++++++++++++++++++++++++------- kubespawner/spawner.py | 27 +++++++-------- tests/test_spawner.py | 12 +++---- 3 files changed, 80 insertions(+), 33 deletions(-) diff --git a/kubespawner/slugs.py b/kubespawner/slugs.py index 6044e75c..ff08cb51 100644 --- a/kubespawner/slugs.py +++ b/kubespawner/slugs.py @@ -94,6 +94,31 @@ def is_valid_default(s): ) +def _extract_safe_name(name, max_length): + """Generate safe substring of a name + + Guarantees: + + - always starts and ends with a lowercase letter or number + - never more than one hyphen in a row (no '--') + - only contains lowercase letters, numbers, and hyphens + - length at least 1 ('x' if other rules strips down to empty string) + - max length not exceeded + """ + # compute safe slug from name (don't worry about collisions, hash handles that) + # cast to lowercase, exclude all but lower & hyphen + # we could keep numbers, but it must not _start_ with a number + safe_name = ''.join([c for c in name.lower() if c in _lower_plus_hyphen]) + # strip leading '-', squash repeated '--' to '-' + safe_name = _hyphen_plus_pattern.sub("-", safe_name.lstrip("-")) + # truncate to max_length chars, strip trailing '-' + safe_name = safe_name[:max_length].rstrip("-") + if not safe_name: + # make sure it's non-empty + safe_name = 'x' + return safe_name + + def strip_and_hash(name, max_length=32): """Generate an always-safe, unique string for any input @@ -105,18 +130,8 @@ def strip_and_hash(name, max_length=32): raise ValueError(f"Cannot make safe names shorter than {_hash_length + 4}") # quick, short hash to avoid name collisions name_hash = hashlib.sha256(name.encode("utf8")).hexdigest()[:_hash_length] - # compute safe slug from name (don't worry about collisions, hash handles that) - # cast to lowercase, exclude all but lower & hyphen - safe_name = ''.join([c for c in name.lower() if c in _lower_plus_hyphen]) - # strip leading '-' - # squash repeated '--' to one - safe_name = _hyphen_plus_pattern.sub("-", safe_name.lstrip("-")) - # truncate to 24 chars, strip trailing '-' - safe_name = safe_name[:name_length].rstrip("-") - if not safe_name: - # make sure it's non-empty - safe_name = 'x' - # due to stripping of '-' above, + safe_name = _extract_safe_name(name, name_length) + # due to stripping of '-' in _extract_safe_name, # the result will always have _exactly_ '---', never '--' nor '----' # use '---' to avoid colliding with `{username}--{servername}` template join return f"{safe_name}---{name_hash}" @@ -142,3 +157,38 @@ def safe_slug(name, is_valid=is_valid_default, max_length=None): return name else: return strip_and_hash(name, max_length=max_length or 32) + + +def multi_slug(names, max_length=48): + """multi-component slug with single hash on the end + + same as strip_and_hash, but name components are joined with '--', + so it looks like: + + {name1}--{name2}---{hash} + + In order to avoid hash collisions on boundaries, use `\\xFF` as delimiter + """ + hasher = hashlib.sha256() + hasher.update(names[0].encode("utf8")) + for name in names[1:]: + # \xFF can't occur as a start byte in UTF8 + # so use it as a word delimiter to make sure overlapping words don't collide + hasher.update(b"\xFF") + hasher.update(name.encode("utf8")) + hash = hasher.hexdigest()[:_hash_length] + + name_slugs = [] + available_chars = max_length - (_hash_length + 1) + # allocate equal space per name + # per_name accounts for '{name}--', so really two less + per_name = available_chars // len(names) + name_max_length = per_name - 2 + if name_max_length < 2: + raise ValueError(f"Not enough characters for {len(names)} names: {max_length}") + for name in names: + name_slugs.append(_extract_safe_name(name, name_max_length)) + + # by joining names with '--', this cannot collide with single-hashed names, + # which can only contain '-' and the '---' hash delimiter once + return f"{'--'.join(name_slugs)}---{hash}" diff --git a/kubespawner/spawner.py b/kubespawner/spawner.py index 0feb1099..c2dfa599 100644 --- a/kubespawner/spawner.py +++ b/kubespawner/spawner.py @@ -48,7 +48,7 @@ make_service, ) from .reflector import ResourceReflector -from .slugs import is_valid_label, safe_slug +from .slugs import is_valid_label, multi_slug, safe_slug from .utils import recursive_format, recursive_update @@ -189,7 +189,7 @@ def __init__(self, *args, **kwargs): # these same assignments should match clear_state if self.enable_user_namespaces: self.namespace = self._expand_user_properties( - self.user_namespace_template, scheme="slug" + self.user_namespace_template, slug_scheme="safe" ) self.log.info(f"Using user namespace: {self.namespace}") @@ -1907,24 +1907,21 @@ def _expand_user_properties(self, template, slug_scheme=None): # compute safe_user_server = {username}--{servername} if ( - # escaping after joining means - # possible collision with `--` delimiter - '--' in username - or '--' in raw_servername - or username.endswith("-") - or raw_servername.startswith("-") - # length exceeded - or len(safe_username) + len(safe_username) + 2 > _slug_max_length + # double-escape if safe names are too long after join + len(safe_username) + len(safe_servername) + 2 + > _slug_max_length ): # need double-escape if there's a chance of collision - safe_user_server = safe_slug( - f"{safe_username}--{safe_servername}", max_length=_slug_max_length + safe_user_server = multi_slug( + [username, raw_servername], max_length=_slug_max_length ) else: if raw_servername: - safe_user_server = safe_slug( - f"{username}--{raw_servername}", max_length=_slug_max_length - ) + # choices: + # - {safe_username}--{safe_servername} # could get 2 hashes + # - always {multi_slug} # always a hash for named servers + # - safe_slug({username}--{servername}) # lots of possible collisions to handle specially + safe_user_server = f"{safe_username}--{safe_servername}" else: safe_user_server = safe_username diff --git a/tests/test_spawner.py b/tests/test_spawner.py index e3859179..b57c116d 100644 --- a/tests/test_spawner.py +++ b/tests/test_spawner.py @@ -1418,7 +1418,7 @@ async def test_pod_name_escaping(): spawner = KubeSpawner(config=c, user=user, orm_spawner=orm_spawner, _mock=True) - assert spawner.pod_name == "jupyter-some-5fuser--test-2dserver-21" + assert spawner.pod_name == "jupyter-someuser---7d3a4d4e--test-server---cb54a9af" async def test_pod_name_custom_template(): @@ -1442,15 +1442,15 @@ async def test_pod_name_collision(): user2 = MockUser() user2.name = "user-has" orm_spawner2 = Spawner() - orm_spawner2.name = "2ddash" + orm_spawner2.name = "dash" spawner = KubeSpawner(user=user1, orm_spawner=orm_spawner1, _mock=True) - assert spawner.pod_name == "jupyter-user-2dhas-2ddash" - assert spawner.pvc_name == "claim-user-2dhas-2ddash" + assert spawner.pod_name == "jupyter-user-has-dash" + assert spawner.pvc_name == "claim-user-has-dash" named_spawner = KubeSpawner(user=user2, orm_spawner=orm_spawner2, _mock=True) - assert named_spawner.pod_name == "jupyter-user-2dhas--2ddash" + assert named_spawner.pod_name == "jupyter-user-has--dash" assert spawner.pod_name != named_spawner.pod_name - assert named_spawner.pvc_name == "claim-user-2dhas--2ddash" + assert named_spawner.pvc_name == "claim-user-has--dash" assert spawner.pvc_name != named_spawner.pvc_name From e24b1f6decec82565d4f28ad604aa1598b67fd1f Mon Sep 17 00:00:00 2001 From: Min RK Date: Wed, 26 Jun 2024 11:07:54 +0200 Subject: [PATCH 07/18] sub '-' for any sequence of unsafe characters more recognizable with common separators like '@', '_', '.' --- kubespawner/slugs.py | 16 +++++++--------- tests/test_slugs.py | 2 ++ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/kubespawner/slugs.py b/kubespawner/slugs.py index ff08cb51..ee3a283d 100644 --- a/kubespawner/slugs.py +++ b/kubespawner/slugs.py @@ -19,8 +19,8 @@ _object_pattern = re.compile(r'^[a-z0-9\.-]+$') _label_pattern = re.compile(r'^[a-z0-9\.-_]+$', flags=re.IGNORECASE) -# match two or more hyphens -_hyphen_plus_pattern = re.compile('--+') +# match anything that's not lowercase alphanumeric (will be stripped, replaced with '-') +_non_alphanum_pattern = re.compile(r'[^a-z0-9]+') # length of hash suffix _hash_length = 8 @@ -106,13 +106,11 @@ def _extract_safe_name(name, max_length): - max length not exceeded """ # compute safe slug from name (don't worry about collisions, hash handles that) - # cast to lowercase, exclude all but lower & hyphen - # we could keep numbers, but it must not _start_ with a number - safe_name = ''.join([c for c in name.lower() if c in _lower_plus_hyphen]) - # strip leading '-', squash repeated '--' to '-' - safe_name = _hyphen_plus_pattern.sub("-", safe_name.lstrip("-")) - # truncate to max_length chars, strip trailing '-' - safe_name = safe_name[:max_length].rstrip("-") + # cast to lowercase + # replace any sequence of non-alphanumeric characters with a single '-' + safe_name = _non_alphanum_pattern.sub("-", name.lower()) + # truncate to max_length chars, strip '-' off ends + safe_name = safe_name.lstrip("-")[:max_length].rstrip("-") if not safe_name: # make sure it's non-empty safe_name = 'x' diff --git a/tests/test_slugs.py b/tests/test_slugs.py index c3cf2c7f..e4845c56 100644 --- a/tests/test_slugs.py +++ b/tests/test_slugs.py @@ -10,6 +10,8 @@ ("jupyter-Alex", "jupyter-alex---3a1c285c"), ("jupyter-üni", "jupyter-ni---a5aaf5dd"), ("endswith-", "endswith---165f1166"), + ("user@email.com", "user-email-com---0925f997"), + ("user-_@_emailß.com", "user-email-com---7e3a7efd"), ("-start", "start---f587e2dc"), ("username--servername", "username-servername---d957f1de"), ("start---f587e2dc", "start-f587e2dc---cc5bb9c9"), From cbd983be223d17b3b3c566786d7390418e8382c9 Mon Sep 17 00:00:00 2001 From: Min RK Date: Wed, 26 Jun 2024 11:17:20 +0200 Subject: [PATCH 08/18] restore trailing hyphen logic --- kubespawner/spawner.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kubespawner/spawner.py b/kubespawner/spawner.py index c2dfa599..d9afa37f 100644 --- a/kubespawner/spawner.py +++ b/kubespawner/spawner.py @@ -1975,8 +1975,11 @@ def _expand_user_properties(self, template, slug_scheme=None): ns[attr_name] = getattr(self, attr_name, f"{attr_name}_unavailable!") rendered = template.format(**ns) - # strip trailing '-' in case of old '{username}--{servername}' template - rendered = rendered.rstrip("-") + # strip trailing - delimiter in case of empty servername and old {username}--{servername} template + # but only if trailing '-' is added by the template rendering, + # and not in the template itself + if not template.endswith("-"): + rendered = rendered.rstrip("-") return rendered def _expand_env(self, env): From 15ead3c7cea4778b8679165eb0a081adaccfc658 Mon Sep 17 00:00:00 2001 From: Min RK Date: Wed, 26 Jun 2024 11:18:24 +0200 Subject: [PATCH 09/18] track kubespawner version in state, annotations makes it easier to handle "did it upgrade?" --- kubespawner/spawner.py | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/kubespawner/spawner.py b/kubespawner/spawner.py index d9afa37f..f18f1115 100644 --- a/kubespawner/spawner.py +++ b/kubespawner/spawner.py @@ -18,6 +18,7 @@ from urllib.parse import urlparse import escapism +import jupyterhub from jinja2 import ChoiceLoader, Environment, FileSystemLoader, PackageLoader from jupyterhub.spawner import Spawner from jupyterhub.traitlets import Callable, Command @@ -38,6 +39,7 @@ validate, ) +from . import __version__ from .clients import load_config, shared_client from .objects import ( make_namespace, @@ -2033,6 +2035,8 @@ def _build_common_annotations(self, extra_annotations): annotations = {'hub.jupyter.org/username': self.user.name} if self.name: annotations['hub.jupyter.org/servername'] = self.name + annotations["hub.jupyter.org/kubespawner-version"] = __version__ + annotations["hub.jupyter.org/jupyterhub-version"] = jupyterhub.__version__ annotations.update(extra_annotations) return annotations @@ -2311,6 +2315,7 @@ def get_state(self): `pvc_name` is also saved, to prevent data loss if template changes across restarts. """ state = super().get_state() + state["kubespawner_version"] = __version__ # these should only be persisted if our pod is running # but we don't have a sync check for that state['pod_name'] = self.pod_name @@ -2340,6 +2345,9 @@ def get_env(self): return env + # remember version of kubespawner that state was loaded from + _state_kubespawner_version = None + def load_state(self, state): """ Load state from storage required to reinstate this user's pod @@ -2369,6 +2377,19 @@ def load_state(self, state): # and we don't need to check for legacy names anymore self._pvc_exists = True + if 'kubespawner_version' in state: + self._state_kubespawner_version = state["kubespawner_version"] + elif state: + self.log.warning( + f"Loading state for {self.user.name}/{self.name} from unknown prior version of kubespawner (likely 6.x), will attempt to upgrade." + ) + # if there was any state to load, we assume 'unknown' version + # (most likely 6.x, prior to 'safe' slug scheme) + self._state_kubespawner_version = "unknown" + else: + # None means no state loaded (i.e. fresh launch) + self._state_kubespawner_version = None + def clear_state(self): """Reset state for a stopped server @@ -2926,11 +2947,16 @@ async def _start(self): self._last_event = events[-1]["metadata"]["uid"] if self.storage_pvc_ensure: - if self.handle_legacy_names and not self._pvc_exists: - # pvc name wasn't reliably persisted before kubespawner 17, so if the name changed, - # check if a pvc with the legacy name exists and use it - # this will be persisted in state on next launch in the future, + if ( + self.handle_legacy_names + and not self._pvc_exists + and self._state_kubespawner_version == "unknown" + ): + # pvc name wasn't reliably persisted before kubespawner 7, + # so if the name changed check if a pvc with the legacy name exists and use it. + # This will be persisted in state on next launch in the future, # so the comparison below will be False for launches after the first. + # this check will only work if pvc_name_template itself has not changed across the upgrade. legacy_pvc_name = self._expand_user_properties( self.pvc_name_template, slug_scheme="escape" ) From fc4f628c9eaed3c39e8f47f3e4cf4c888c8065dd Mon Sep 17 00:00:00 2001 From: Min RK Date: Wed, 26 Jun 2024 11:28:03 +0200 Subject: [PATCH 10/18] allow opting out of persisted pvc name with remember_pvc_name = False --- kubespawner/spawner.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/kubespawner/spawner.py b/kubespawner/spawner.py index f18f1115..d73388df 100644 --- a/kubespawner/spawner.py +++ b/kubespawner/spawner.py @@ -646,6 +646,20 @@ def _namespace_default(self): """, ) + remember_pvc_name = Bool( + True, + config=True, + help=""" + Remember the PVC name across restarts and configuration changes. + + If True, once the PVC has been created, its name will be remembered and reused + and changing pvc_name_template will have no effect on servers that have previously mounted PVCs. + If False, changing pvc_name_template or slug_scheme may detatch servers from their PVCs. + + `False` is the behavior of kubespawner prior to version 7. + """, + ) + component_label = Unicode( 'singleuser-server', config=True, @@ -2323,6 +2337,8 @@ def get_state(self): state['dns_name'] = self.dns_name # persist pvc name only if it's established that it exists + # ignore 'remember_pvc_name' config here so the info is available + # so future calls to load_state can decide whether to use it or not if self._pvc_exists: state['pvc_name'] = self.pvc_name return state @@ -2371,7 +2387,7 @@ def load_state(self, state): if 'dns_name' in state: self.dns_name = state['dns_name'] - if 'pvc_name' in state: + if 'pvc_name' in state and self.remember_pvc_name: self.pvc_name = state['pvc_name'] # indicate that we've already checked that self.pvc_name is correct # and we don't need to check for legacy names anymore From fd4135f656ca7aa72ed03ce18f43191f9994a4ca Mon Sep 17 00:00:00 2001 From: Min RK Date: Wed, 26 Jun 2024 13:24:32 +0200 Subject: [PATCH 11/18] document new template scheme and upgrade notes consolidate references to template fields now that there's a doc for it --- docs/source/index.md | 3 +- docs/source/ssl.md | 6 +- docs/source/templates.md | 156 +++++++++++++++++++++++++++++++++++++ kubespawner/spawner.py | 162 ++++++++++++++++----------------------- tests/test_slugs.py | 2 +- 5 files changed, 227 insertions(+), 102 deletions(-) create mode 100644 docs/source/templates.md diff --git a/docs/source/index.md b/docs/source/index.md index 92726951..d13bc854 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -13,7 +13,7 @@ management of containerized applications. If you want to run a JupyterHub setup that needs to scale across multiple nodes (anything with over ~50 simultaneous users), Kubernetes is a wonderful way to do it. Features include: -- Easily and elasticly run anywhere between 2 and thousands of nodes with the +- Easily and elastically run anywhere between 2 and thousands of nodes with the same set of powerful abstractions. Scale up and down as required by simply adding or removing nodes. @@ -81,5 +81,6 @@ utils ```{toctree} :maxdepth: 2 :caption: Reference +templates changelog ``` diff --git a/docs/source/ssl.md b/docs/source/ssl.md index 9af8bcfb..a6677f96 100644 --- a/docs/source/ssl.md +++ b/docs/source/ssl.md @@ -8,7 +8,7 @@ If enabled, the Kubespawner will mount the internal_ssl certificates as Kubernet To enable, use the following settings: -``` +```python c.JupyterHub.internal_ssl = True c.JupyterHub.spawner_class = 'kubespawner.KubeSpawner' @@ -16,8 +16,8 @@ c.JupyterHub.spawner_class = 'kubespawner.KubeSpawner' Further configuration can be specified with the following (listed with their default values): -``` -c.KubeSpawner.secret_name_template = "jupyter-{username}{servername}" +```python +c.KubeSpawner.secret_name_template = "{pod_name}" c.KubeSpawner.secret_mount_path = "/etc/jupyterhub/ssl/" ``` diff --git a/docs/source/templates.md b/docs/source/templates.md new file mode 100644 index 00000000..6b17da17 --- /dev/null +++ b/docs/source/templates.md @@ -0,0 +1,156 @@ +(templates)= + +# Templated fields + +Several fields in KubeSpawner can be resolved as string templates, +so each user server can get distinct values from the same configuration. + +String templates use the Python formatting convention of `f"{fieldname}"`, +so for example the default `pod_name_template` of `"jupyter-{user_server}"` will produce: + +| username | server name | pod name | +| ---------------- | ----------- | ---------------------------------------------- | +| `user` | `''` | `jupyter-user` | +| `user` | `server` | `jupyter-user--server` | +| `user@email.com` | `Some Name` | `jupyter-user-email-com--some-name---0c1fe94b` | + +## templated properties + +Some common templated fields: + +- [pod_name_template](#KubeSpawner.pod_name_template) +- [pvc_name_template](#KubeSpawner.pvc_name_template) +- [working_dir](#KubeSpawner.working_dir) + +## fields + +The following fields are available in templates: + +| field | description | +| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `{username}` | the username passed through the configured slug scheme | +| `{servername}` | the name of the server passed through the configured slug scheme (`''` for the user's default server) | +| `{user_server}` | the username and servername together as a single slug. This should be used most places for a unique string for a given user's server (new in kubespawner 7). | +| `{unescaped_username}` | the actual username without escaping (no guarantees about value, except as enforced by your Authenticator) | +| `{unescaped_servername}` | the actual server name without escaping (no guarantees about value) | +| `{pod_name}` | the resolved pod name, often a good choice if you need a starting point for other resources (new in kubespawner 7) | +| `{pvc_name}` | the resolved PVC name (new in kubespawner 7) | +| `{namespace}` | the kubernetes namespace of the server (new in kubespawner 7) | +| `{hubnamespace}` | the kubernetes namespace of the Hub | + +Because there are two escaping schemes for `username`, `servername`, and `user_server`, you can explicitly select one or the other on a per-template-field basis with the prefix `safe_` or `escaped_`: + +| field | description | +| ----------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------- | +| `{escaped_username}` | the username passed through the old 'escape' slug scheme | +| `{escaped_servername}` | the server name passed through the 'escape' slug scheme | +| `{escaped_user_server}` | the username and servername together as a single slug, identical to `"{escaped_username}--{escaped_servername}".rstrip("-")` (new in kubespawner 7) | +| `{safe_username}` | the username passed through the 'safe' slug scheme (new in kubespawner 7) | +| `{safe_servername}` | the server name passed through the 'safe' slug scheme (new in kubespawner 7) | +| `{safe_user_server}` | the username and server name together as a 'safe' slug (new in kubespawner 7) | + +These may be useful during a transition upgrading a deployment from an earlier version of kubespawner. + +The value of the unprefixed `username`, etc. is goverend by the [](#KubeSpawner.slug_scheme) configuration, and always matches exactly one of these values. + +## Template tips + +In general, these guidelines should help you pick fields to use in your template strings: + +- use `{user_server}` when a string should be unique _per server_ (e.g. pod name) +- use `{username}` when it should be unique per user, but shared across named servers (sometimes chosen for PVCs) +- use `{escaped_}` prefix if you need to keep certain values unchanged in a deployment upgrading from kubespawner \< 7 +- `{pod_name}` can be re-used anywhere you want to create more resources associated with a given pod, + to avoid repeating yourself + +## Changing template configuration + +Changing configuration should not generally affect _running_ servers. +However, when changing a property that may need to persist across user server restarts, special consideration may be required. +For example, changing `pvc_name` or `working_dir` could result in disconnecting a user's server from data loaded in previous sessions. +This may be your intention or not! KubeSpawner cannot know. + +`pvc_name` is handled specially, to avoid losing access to data. +If `KubeSpawner.remember_pvc_name` is True, once a server has started, a server's PVC name cannot be changed by configuration. +Any future launch will use the previous `pvc_name`, regardless of change in configuration. +If you _want_ to change the names of mounted PVCs, you can set + +```python +c.KubeSpawner.remember_pvc_name = False +``` + +This handling isn't general for PVCs, only specifically the default `pvc_name`. +If you have defined your own volumes, you need to handle changes to these yourself. + +## Upgrading from kubespawner \< 7 + +Prior to kubespawner 7, an escaping scheme was used that ensured values were _unique_, +but did not always ensure fields were _valid_. +In particular: + +- start/end rules were not enforced +- length was not enforced + +This meant that e.g. usernames that start with a capital letter or were very long could result in servers failing to start because the escaping scheme produced an invalid label. +To solve this, a new 'safe' scheme has been added in kubespawner 7 for computing template strings, +which aims to guarantee to always produce valid object names and labels. +The new scheme is the default in kubespawner 7. + +You can select the scheme with: + +```python +c.KubeSpawner.slug_scheme = "escape" # no changes from kubespawner 6 +c.KubeSpawner.slug_scheme = "safe" # default for kubespawner 7 +``` + +The new scheme has the following rules: + +- the length of any _single_ template field is limited to 48 characters (the total length of the string is not enforced) +- the result will only contain lowercase ascii letters, numbers, and `-` +- it will always start and end with a letter or number +- if a name is 'safe', it is used unmodified +- if any escaping is required, a truncated safe subset of characters is used, followed by `---{hash}` where `{hash}` is a checksum of the original input string +- `-` shall not occur in sequences of more than one consecutive `-`, except where inserted by the escaping mechanism +- if no safe characters are present, 'x' is used for the 'safe' subset + +Since length requirements are applied on a per-field basis, a new `{user_server}` field is added, +which computes a single valid slug following the above rules which is unique for a given user server. +The general form is: + +``` +{username}--{servername}---{hash} +``` + +where + +- `--{servername}` is only present for non-empty server names +- `---{hash}` is only present if escaping is required for _either_ username or servername, and hashes the combination of user and server. + +This `{user_server}` is the recommended value to use in pod names, etc. +In the escape scheme, `{user_server}` is identical to the previous value used in default templates: `{username}--{servername}`, +so it should be safe to upgrade previous templated using `{username}--{servername}` to `{user_server}` or `{escaped_user_server}`. + +In the vast majority of cases (where no escaping is required), the 'safe' scheme produces identical results to the 'escape' scheme. +Probably the most common case where the two differ is in the presence of single `-` characters, which the `escape` scheme escaped to `-2d`, while the 'safe' scheme does not. + +Examples: + +| name | escape scheme | safe scheme | +| username | username | username | +| has-hyphen | has-2dhyphen | has-hyphen | +| Capital | `-43apital` (error) | `capital---1a1cf792` | +| user@email.com | 'user-40email-2ecom' | 'user-email-com---0925f997' | +| 'a-very-long-name-that-is-too-long-for-sixty-four-character-labels' | 'a-2dvery-2dlong-2dname-2dthat-2dis-2dtoo-2dlong-2dfor-2dsixty-2dfour-2dcharacter-2dlabels' (error) | 'a-very-long-name-that-is-too-long-for---29ac5fd2' | +| ALLCAPS | '-41-4c-4c-43-41-50-53' (error) | 'allcaps---27c6794c'| + +Most changed names won't have a practical effect. +However, to avoid `pvc_name` changing even though KubeSpawner 6 didn't persist it, +on first launch (for each server) after upgrade KubeSpawner checks if: + +1. `pvc_name_template` produces a different result with `scheme='escape'` +1. a pvc with the old 'escaped' name exists + +and if such a pvc exists, the older name is used instead of the new one (it is then remembered for subsequent launches, according to `remember_pvc_name`). +This is an attempt to respect the `remember_pvc_name` configuration, even though the old name is not technically recorded. +We can infer the old value, as long as configuration has not changed. +This will only work if upgrading KubeSpawer does not _also_ coincide with a change in the `pvc_name_template` configuration. diff --git a/kubespawner/spawner.py b/kubespawner/spawner.py index d73388df..f30cc670 100644 --- a/kubespawner/spawner.py +++ b/kubespawner/spawner.py @@ -341,20 +341,13 @@ def __init__(self, *args, **kwargs): 'escape' scheme: - does not guarantee correct names, e.g. does not handle capital letters or length - - escapes fields one at a time in templates: - - `{username}` is the _escaped_ username - - `{servername}` is the _escaped_ server name 'safe' scheme: - should guarantee correct names - - escapes the whole string at once - escapes only if needed - - in templates: - - `{username}` is the user's actual name - - `{escaped_username}` is the escaped username, used in the previous scheme - - `{servername}` is the user's actual name - - `{escaped_servername}` is the escaped server name, used in the previous scheme + - enforces length requirements + - uses hash to avoid collisions when escaping is required 'safe' is the default and preferred as it produces both: @@ -376,11 +369,10 @@ def __init__(self, *args, **kwargs): Note that these are only set when the namespaces are created, not later when this setting is updated. - `{username}`, `{userid}`, `{servername}`, `{hubnamespace}`, - `{unescaped_username}`, and `{unescaped_servername}` will be expanded if - found within strings of this configuration. The username and servername - come escaped to follow the `DNS label standard - `__. + .. seealso:: + + :ref:`templates` for information on fields available in template strings. + """, ) @@ -393,11 +385,10 @@ def __init__(self, *args, **kwargs): Note that these are only set when the namespaces are created, not later when this setting is updated. - `{username}`, `{userid}`, `{servername}`, `{hubnamespace}`, - `{unescaped_username}`, and `{unescaped_servername}` will be expanded if - found within strings of this configuration. The username and servername - come escaped to follow the `DNS label standard - `__. + .. seealso:: + + :ref:`templates` for information on fields available in template strings. + """, ) @@ -408,11 +399,10 @@ def __init__(self, *args, **kwargs): Template to use to form the namespace of user's pods (only if enable_user_namespaces is True). - `{username}`, `{userid}`, `{servername}`, `{hubnamespace}`, - `{unescaped_username}`, and `{unescaped_servername}` will be expanded if - found within strings of this configuration. The username and servername - come escaped to follow the `DNS label standard - `__. + .. seealso:: + + :ref:`templates` for information on fields available in template strings. + """, ) @@ -496,11 +486,10 @@ def _namespace_default(self): The working directory where the Notebook server will be started inside the container. Defaults to `None` so the working directory will be the one defined in the Dockerfile. - `{username}`, `{userid}`, `{servername}`, `{hubnamespace}`, - `{unescaped_username}`, and `{unescaped_servername}` will be expanded if - found within strings of this configuration. The username and servername - come escaped to follow the `DNS label standard - `__. + .. seealso:: + + :ref:`templates` for information on fields available in template strings. + """, ) @@ -551,18 +540,14 @@ def _namespace_default(self): help=""" Template to use to form the name of user's pods. - `{username}`, `{userid}`, `{servername}`, `{hubnamespace}`, - `{unescaped_username}`, and `{unescaped_servername}` will be expanded if - found within strings of this configuration. The username and servername - come escaped to follow the `DNS label standard - `__. - - Trailing `-` characters are stripped for safe handling of empty server names (user default servers). - This must be unique within the namespace the pods are being spawned in, so if you are running multiple jupyterhubs spawning in the same namespace, consider setting this to be something more unique. + .. seealso:: + + :ref:`templates` for information on fields available in template strings. + .. versionchanged:: 0.12 `--` delimiter added to the template, where it was implicitly added to the `servername` field before. @@ -579,11 +564,9 @@ def _namespace_default(self): e.g. `{pod_name}.notebooks.jupyterhub.svc.cluster.local`, - `{username}`, `{userid}`, `{servername}`, `{hubnamespace}`, - `{unescaped_username}`, and `{unescaped_servername}` will be expanded if - found within strings of this configuration. The username and servername - come escaped to follow the `DNS label standard - `__. + .. seealso:: + + :ref:`templates` for information on fields available in template strings. Trailing `-` characters in each domain level are stripped for safe handling of empty server names (user default servers). @@ -626,11 +609,6 @@ def _namespace_default(self): help=""" Template to use to form the name of user's pvc. - `{username}`, `{userid}`, `{servername}`, `{hubnamespace}`, - `{unescaped_username}`, and `{unescaped_servername}` will be expanded if - found within strings of this configuration. The username and servername - come escaped to follow the `DNS label standard - `__. Trailing `-` characters are stripped for safe handling of empty server names (user default servers). @@ -638,6 +616,10 @@ def _namespace_default(self): in, so if you are running multiple jupyterhubs spawning in the same namespace, consider setting this to be something more unique. + .. seealso:: + + :ref:`templates` for information on fields available in template strings. + .. versionchanged:: 0.12 `--` delimiter added to the template, where it was implicitly added to the `servername` field before. @@ -674,20 +656,12 @@ def _namespace_default(self): ) secret_name_template = Unicode( - 'jupyter-{username}{servername}', + '{pod_name}', config=True, help=""" Template to use to form the name of user's secret. - `{username}`, `{userid}`, `{servername}`, `{hubnamespace}`, - `{unescaped_username}`, and `{unescaped_servername}` will be expanded if - found within strings of this configuration. The username and servername - come escaped to follow the `DNS label standard - `__. - - This must be unique within the namespace the pvc are being spawned - in, so if you are running multiple jupyterhubs spawning in the - same namespace, consider setting this to be something more unique. + Default: same as `pod_name`. It is unlikely that this should be changed. """, ) @@ -757,11 +731,9 @@ def _deprecated_changed(self, change): See `the Kubernetes documentation `__ for more info on what labels are and why you might want to use them! - `{username}`, `{userid}`, `{servername}`, `{hubnamespace}`, - `{unescaped_username}`, and `{unescaped_servername}` will be expanded if - found within strings of this configuration. The username and servername - come escaped to follow the `DNS label standard - `__. + .. seealso:: + + :ref:`templates` for information on fields available in template strings. """, ) @@ -778,9 +750,10 @@ def _deprecated_changed(self, change): See `the Kubernetes documentation `__ for more info on what annotations are and why you might want to use them! - `{username}`, `{userid}`, `{servername}`, `{hubnamespace}`, - `{unescaped_username}`, and `{unescaped_servername}` will be expanded if - found within strings of this configuration. + .. seealso:: + + :ref:`templates` for information on fields available in template strings. + """, ) @@ -1123,11 +1096,10 @@ def _validate_image_pull_secrets(self, proposal): for more information on the various kinds of volumes available and their options. Your kubernetes cluster must already be configured to support the volume types you want to use. - `{username}`, `{userid}`, `{servername}`, `{hubnamespace}`, - `{unescaped_username}`, and `{unescaped_servername}` will be expanded if - found within strings of this configuration. The username and servername - come escaped to follow the `DNS label standard - `__. + .. seealso:: + + :ref:`templates` for information on fields available in template strings. + """, ) @@ -1146,11 +1118,10 @@ def _validate_image_pull_secrets(self, proposal): See `the Kubernetes documentation `__ for more information on how the `volumeMount` item works. - `{username}`, `{userid}`, `{servername}`, `{hubnamespace}`, - `{unescaped_username}`, and `{unescaped_servername}` will be expanded if - found within strings of this configuration. The username and servername - come escaped to follow the `DNS label standard - `__. + .. seealso:: + + :ref:`templates` for information on fields available in template strings. + """, ) @@ -1190,9 +1161,10 @@ def _validate_image_pull_secrets(self, proposal): See `the Kubernetes documentation `__ for more info on what annotations are and why you might want to use them! - `{username}`, `{userid}`, `{servername}`, `{hubnamespace}`, - `{unescaped_username}`, and `{unescaped_servername}` will be expanded if - found within strings of this configuration. + .. seealso:: + + :ref:`templates` for information on fields available in template strings. + """, ) @@ -1208,11 +1180,10 @@ def _validate_image_pull_secrets(self, proposal): See `the Kubernetes documentation `__ for more info on what labels are and why you might want to use them! - `{username}`, `{userid}`, `{servername}`, `{hubnamespace}`, - `{unescaped_username}`, and `{unescaped_servername}` will be expanded if - found within strings of this configuration. The username and servername - come escaped to follow the `DNS label standard - `__. + .. seealso:: + + :ref:`templates` for information on fields available in template strings. + """, ) @@ -1270,11 +1241,10 @@ def _validate_image_pull_secrets(self, proposal): c.KubeSpawner.storage_selector = {'matchLabels':{'content': 'jupyter'}} - `{username}`, `{userid}`, `{servername}`, `{hubnamespace}`, - `{unescaped_username}`, and `{unescaped_servername}` will be expanded if - found within strings of this configuration. The username and servername - come escaped to follow the `DNS label standard - `__. + .. seealso:: + + :ref:`templates` for information on fields available in template strings. + """, ) @@ -1406,11 +1376,10 @@ def _validate_image_pull_secrets(self, proposal): "command": ["/usr/local/bin/supercronic", "/etc/crontab"] }] - `{username}`, `{userid}`, `{servername}`, `{hubnamespace}`, - `{unescaped_username}`, and `{unescaped_servername}` will be expanded if - found within strings of this configuration. The username and servername - come escaped to follow the `DNS label standard - `__. + .. seealso:: + + :ref:`templates` for information on fields available in template strings. + """, ) @@ -2330,8 +2299,9 @@ def get_state(self): """ state = super().get_state() state["kubespawner_version"] = __version__ - # these should only be persisted if our pod is running + # pod_name, dns_name should only be persisted if our pod is running # but we don't have a sync check for that + # is that true for namespace as well? (namespace affects pvc) state['pod_name'] = self.pod_name state['namespace'] = self.namespace state['dns_name'] = self.dns_name @@ -2415,13 +2385,11 @@ def clear_state(self): super().clear_state() # this should be the same initialization as __init__ / trait defaults # this allows changing config to take effect after a server restart - if self.enable_user_namespaces: - self.namespace = self._expand_user_properties(self.user_namespace_template) - self.pod_name = self._expand_user_properties(self.pod_name_template) self.dns_name = self.dns_name_template.format( namespace=self.namespace, name=self.pod_name ) + # reset namespace as well? async def poll(self): """ diff --git a/tests/test_slugs.py b/tests/test_slugs.py index e4845c56..bad39885 100644 --- a/tests/test_slugs.py +++ b/tests/test_slugs.py @@ -53,7 +53,7 @@ def test_safe_slug_max_length(max_length, length, expected): ("x", "x"), ("-x", "x---a4209624"), ("x-", "x---c8b60efc"), - pytest.param("x" * 63, "x" * 63, id="x64"), + pytest.param("x" * 63, "x" * 63, id="x63"), pytest.param("x" * 64, "xxxxxxxxxxxxxxxxxxxxx---7ce10097", id="x64"), pytest.param("x" * 65, "xxxxxxxxxxxxxxxxxxxxx---9537c5fd", id="x65"), ], From 3ece5696baede1f725bccd297a5f7bb68d3034fa Mon Sep 17 00:00:00 2001 From: Min RK Date: Wed, 26 Jun 2024 13:30:16 +0200 Subject: [PATCH 12/18] update some test expectations --- tests/test_spawner.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/tests/test_spawner.py b/tests/test_spawner.py index b57c116d..ba3a6ce0 100644 --- a/tests/test_spawner.py +++ b/tests/test_spawner.py @@ -5,6 +5,7 @@ from functools import partial from unittest.mock import Mock +import jupyterhub import pytest from jupyterhub.objects import Hub, Server from jupyterhub.orm import Spawner @@ -19,8 +20,10 @@ ) from traitlets.config import Config +import kubespawner from kubespawner import KubeSpawner from kubespawner.objects import make_owner_reference, make_service +from kubespawner.slugs import safe_slug class MockUser(Mock): @@ -1367,9 +1370,10 @@ async def test_spawner_env(): "CALLABLE": lambda spawner: spawner.user.name + " (callable)", } spawner = KubeSpawner(config=c, _mock=True) + slug = safe_slug(spawner.user.name) env = spawner.get_env() assert env["STATIC"] == "static" - assert env["EXPANDED"] == "mock-5fname (expanded)" + assert env["EXPANDED"] == f"{slug} (expanded)" assert env["ESCAPED"] == "{username}" assert env["CALLABLE"] == "mock_name (callable)" @@ -1378,7 +1382,7 @@ async def test_jupyterhub_supplied_env(): cookie_options = {"samesite": "None", "secure": True} c = Config() - c.KubeSpawner.environment = {"HELLO": "It's {username}"} + c.KubeSpawner.environment = {"HELLO": "It's {unescaped_username}"} spawner = KubeSpawner(config=c, _mock=True, cookie_options=cookie_options) pod_manifest = await spawner.get_pod_manifest() @@ -1386,7 +1390,7 @@ async def test_jupyterhub_supplied_env(): env = pod_manifest.spec.containers[0].env # Set via .environment, must be expanded - assert V1EnvVar("HELLO", "It's mock-5fname") in env + assert V1EnvVar("HELLO", "It's mock_name") in env # Set by JupyterHub itself, must not be expanded assert V1EnvVar("JUPYTERHUB_COOKIE_OPTIONS", json.dumps(cookie_options)) in env @@ -1418,18 +1422,18 @@ async def test_pod_name_escaping(): spawner = KubeSpawner(config=c, user=user, orm_spawner=orm_spawner, _mock=True) - assert spawner.pod_name == "jupyter-someuser---7d3a4d4e--test-server---cb54a9af" + assert spawner.pod_name == "jupyter-some-user---7d3a4d4e--test-server---cb54a9af" async def test_pod_name_custom_template(): user = MockUser() user.name = "some_user" - pod_name_template = "prefix-{username}-suffix" + pod_name_template = "prefix-{user_server}-suffix" spawner = KubeSpawner(user=user, pod_name_template=pod_name_template, _mock=True) - assert spawner.pod_name == "prefix-some-5fuser-suffix" + assert spawner.pod_name == "prefix-some-user---7d3a4d4e-suffix" async def test_pod_name_collision(): @@ -1508,6 +1512,8 @@ async def test_pod_connect_ip(kube_ns, kube_client, config, hub_pod, hub): async def test_get_pvc_manifest(): c = Config() + username = "mock_name" + slug = safe_slug(username) c.KubeSpawner.pvc_name_template = "user-{username}" c.KubeSpawner.storage_extra_labels = {"user": "{username}"} c.KubeSpawner.storage_extra_annotations = {"user": "{username}"} @@ -1518,10 +1524,10 @@ async def test_get_pvc_manifest(): manifest = spawner.get_pvc_manifest() assert isinstance(manifest, V1PersistentVolumeClaim) - assert manifest.metadata.name == "user-mock-5fname" + assert manifest.metadata.name == f"user-{slug}" assert manifest.metadata.labels == { - "user": "mock-5fname", - "hub.jupyter.org/username": "mock-5fname", + "user": slug, + "hub.jupyter.org/username": username, "app.kubernetes.io/name": "jupyterhub", "app.kubernetes.io/managed-by": "kubespawner", "app.kubernetes.io/component": "singleuser-server", @@ -1530,10 +1536,12 @@ async def test_get_pvc_manifest(): "component": "singleuser-storage", } assert manifest.metadata.annotations == { - "user": "mock-5fname", - "hub.jupyter.org/username": "mock_name", + "user": slug, + "hub.jupyter.org/username": username, + "hub.jupyter.org/jupyterhub-version": jupyterhub.__version__, + "hub.jupyter.org/kubespawner-version": kubespawner.__version__, } - assert manifest.spec.selector == {"matchLabels": {"user": "mock-5fname"}} + assert manifest.spec.selector == {"matchLabels": {"user": slug}} async def test_variable_expansion(ssl_app): From f96ab3803e9abf4368a907d58f5e01aaa8879d7a Mon Sep 17 00:00:00 2001 From: Min RK Date: Wed, 26 Jun 2024 14:44:04 +0200 Subject: [PATCH 13/18] exercise pvc_name upgrade cases - upgrade from 6.x after default slug scheme change preserves pvc attachment - old name check doesn't happen if loaded state from more kubespawner 7 --- tests/test_spawner.py | 78 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/tests/test_spawner.py b/tests/test_spawner.py index ba3a6ce0..0477e6d8 100644 --- a/tests/test_spawner.py +++ b/tests/test_spawner.py @@ -834,6 +834,84 @@ async def test_spawn_start_restore_namespace( assert isinstance(status, int) +@pytest.mark.parametrize("handle_legacy_names", [True, False]) +async def test_spawn_start_upgrade_pvc_name( + config, + hub, + handle_legacy_names, +): + # Emulate stopping JupyterHub and starting with different settings while some pods still exist + user = MockUser(name="needs-Escape") + spawner_args = dict( + hub=hub, + user=user, + orm_spawner=MockOrmSpawner(), + config=config, + api_token="abc123", + oauth_client_id="unused", + ) + config.KubeSpawner.storage_pvc_ensure = True + config.KubeSpawner.storage_capacity = "100M" + config.KubeSpawner.slug_scheme = "escape" + config.KubeSpawner.handle_legacy_names = handle_legacy_names + + # Save state with old config + old_spawner = KubeSpawner( + **spawner_args, pvc_name_template="claim-{username}--{servername}" + ) + old_spawner.load_state({}) + assert old_spawner._state_kubespawner_version is None + + old_spawner_pvc_name = old_spawner.pvc_name + + # launch old pod + await old_spawner.start() + assert old_spawner._pvc_exists + old_state = old_spawner.get_state() + # subset keys to what's actually stored in earlier versions of kubespawner + old_state = {key: old_state[key] for key in ("namespace", "pod_name", "dns_name")} + + await old_spawner.stop() + + # Change config + config.KubeSpawner.slug_scheme = "safe" + + # Load old state + spawner = KubeSpawner(**spawner_args) + spawner.load_state(old_state) + assert spawner._state_kubespawner_version == "unknown" + # value changed from config + new_spawner_pvc_name = spawner.pvc_name + assert spawner.pvc_name != old_spawner_pvc_name + # start checks for old name and uses it if it exists + await spawner.start() + if handle_legacy_names: + assert spawner.pvc_name == old_spawner_pvc_name + else: + assert spawner.pvc_name == new_spawner_pvc_name + assert spawner._pvc_exists + await spawner.stop() + new_state = spawner.get_state() + assert new_state["pvc_name"] == spawner.pvc_name + + # one more time, this time shouldn't need to call _check_pvc_exists + config.KubeSpawner.pvc_name_template = "shouldnt-be-used" + spawner = KubeSpawner(**spawner_args) + + def _shouldnt_check(): + pytest.fail("shouldn't have called _check_pvc_exists") + + spawner._check_pvc_exists = _shouldnt_check + spawner.load_state(new_state) + assert spawner._pvc_exists + assert spawner._state_kubespawner_version == kubespawner.__version__ + + await spawner.start() + assert spawner.pvc_name == new_state["pvc_name"] + assert spawner._pvc_exists + await spawner.stop() + + async def test_get_pod_manifest_tolerates_mixed_input(): """ Test that the get_pod_manifest function can handle a either a dictionary or From 09ade1c7011ab378a58d6f5d56428fc342868b5f Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sat, 20 Jul 2024 21:40:47 +0200 Subject: [PATCH 14/18] Fix markdown table formatting --- docs/source/templates.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/docs/source/templates.md b/docs/source/templates.md index 6b17da17..12b791b5 100644 --- a/docs/source/templates.md +++ b/docs/source/templates.md @@ -135,13 +135,14 @@ Probably the most common case where the two differ is in the presence of single Examples: -| name | escape scheme | safe scheme | -| username | username | username | -| has-hyphen | has-2dhyphen | has-hyphen | -| Capital | `-43apital` (error) | `capital---1a1cf792` | -| user@email.com | 'user-40email-2ecom' | 'user-email-com---0925f997' | -| 'a-very-long-name-that-is-too-long-for-sixty-four-character-labels' | 'a-2dvery-2dlong-2dname-2dthat-2dis-2dtoo-2dlong-2dfor-2dsixty-2dfour-2dcharacter-2dlabels' (error) | 'a-very-long-name-that-is-too-long-for---29ac5fd2' | -| ALLCAPS | '-41-4c-4c-43-41-50-53' (error) | 'allcaps---27c6794c'| +| name | escape scheme | safe scheme | +| ------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- | -------------------------------------------------- | +| `username` | `username` | `username` | +| `has-hyphen` | `has-2dhyphen` | `has-hyphen` | +| `Capital` | `-43apital` (error) | `capital---1a1cf792` | +| `user@email.com` | `user-40email-2ecom` | `user-email-com---0925f997` | +| `a-very-long-name-that-is-too-long-for-sixty-four-character-labels` | `a-2dvery-2dlong-2dname-2dthat-2dis-2dtoo-2dlong-2dfor-2dsixty-2dfour-2dcharacter-2dlabels` (error) | `a-very-long-name-that-is-too-long-for---29ac5fd2` | +| `ALLCAPS` | `-41-4c-4c-43-41-50-53` (error) | `allcaps---27c6794c` | Most changed names won't have a practical effect. However, to avoid `pvc_name` changing even though KubeSpawner 6 didn't persist it, From 10ba1fb8121eceb56b1a0993e1dd390d214ec2ca Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sat, 20 Jul 2024 22:51:22 +0200 Subject: [PATCH 15/18] Document escaped_username and escaped_servername to be added in v7 --- docs/source/templates.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/templates.md b/docs/source/templates.md index 12b791b5..b41c807a 100644 --- a/docs/source/templates.md +++ b/docs/source/templates.md @@ -42,8 +42,8 @@ Because there are two escaping schemes for `username`, `servername`, and `user_s | field | description | | ----------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------- | -| `{escaped_username}` | the username passed through the old 'escape' slug scheme | -| `{escaped_servername}` | the server name passed through the 'escape' slug scheme | +| `{escaped_username}` | the username passed through the old 'escape' slug scheme (new in kubespawner 7) | +| `{escaped_servername}` | the server name passed through the 'escape' slug scheme (new in kubespawner 7) | | `{escaped_user_server}` | the username and servername together as a single slug, identical to `"{escaped_username}--{escaped_servername}".rstrip("-")` (new in kubespawner 7) | | `{safe_username}` | the username passed through the 'safe' slug scheme (new in kubespawner 7) | | `{safe_servername}` | the server name passed through the 'safe' slug scheme (new in kubespawner 7) | From d4c230895c56b26319eb1844408108f8ebdd0f0f Mon Sep 17 00:00:00 2001 From: Min RK Date: Tue, 30 Jul 2024 14:07:40 +0200 Subject: [PATCH 16/18] clearer comment about values being loaded by get_state --- kubespawner/spawner.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kubespawner/spawner.py b/kubespawner/spawner.py index f30cc670..b3649a2f 100644 --- a/kubespawner/spawner.py +++ b/kubespawner/spawner.py @@ -187,8 +187,14 @@ def __init__(self, *args, **kwargs): # By now, all the traitlets have been set, so we can use them to # compute other attributes - # namespace, pod_name, dns_name are persisted in state + # namespace, pod_name, etc. are persisted in state + # so values set here are only _default_ values. + # If this Spawner has ever launched before, + # these values will be be overridden in `get_state()` + # # these same assignments should match clear_state + # for transitive values (pod_name, dns_name) + # but not persistent values (namespace, pvc_name) if self.enable_user_namespaces: self.namespace = self._expand_user_properties( self.user_namespace_template, slug_scheme="safe" From 97399f0aaffa11f887ab68ef2151d650c2fb0a3b Mon Sep 17 00:00:00 2001 From: Min RK Date: Tue, 30 Jul 2024 19:51:39 +0200 Subject: [PATCH 17/18] remove hardcoded safe slug scheme from namespace --- kubespawner/spawner.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kubespawner/spawner.py b/kubespawner/spawner.py index 5b36a4d5..74650f2f 100644 --- a/kubespawner/spawner.py +++ b/kubespawner/spawner.py @@ -196,9 +196,7 @@ def __init__(self, *args, **kwargs): # for transitive values (pod_name, dns_name) # but not persistent values (namespace, pvc_name) if self.enable_user_namespaces: - self.namespace = self._expand_user_properties( - self.user_namespace_template, slug_scheme="safe" - ) + self.namespace = self._expand_user_properties(self.user_namespace_template) self.log.info(f"Using user namespace: {self.namespace}") self.pod_name = self._expand_user_properties(self.pod_name_template) From 6232c4b3f3df80d8c29a03cde7e9874129e73d19 Mon Sep 17 00:00:00 2001 From: Min RK Date: Wed, 31 Jul 2024 15:41:55 +0200 Subject: [PATCH 18/18] only handle legacy pvc name when remember_pvc_name is true since it is an attempt to 'remember' an old name from kubespawner 6 which never remembers --- kubespawner/spawner.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kubespawner/spawner.py b/kubespawner/spawner.py index 74650f2f..023d6869 100644 --- a/kubespawner/spawner.py +++ b/kubespawner/spawner.py @@ -1455,6 +1455,9 @@ def _validate_image_pull_secrets(self, proposal): kubespawner 7 changed the scheme for computing names and labels to be more reliably valid. In order to preserve backward compatibility, the old names must be handled in some places. + Currently, this only affects `pvc_name` + and has no effect when `remember_pvc_name` is False. + You can safely disable this if no PVCs were created or running servers were started before upgrading to kubespawner 7. """, @@ -3107,6 +3110,7 @@ async def _start(self): if self.storage_pvc_ensure: if ( self.handle_legacy_names + and self.remember_pvc_name and not self._pvc_exists and self._state_kubespawner_version == "unknown" ):