diff --git a/kubespawner/slugs.py b/kubespawner/slugs.py index 6044e75c..ff08cb51 100644 --- a/kubespawner/slugs.py +++ b/kubespawner/slugs.py @@ -94,6 +94,31 @@ def is_valid_default(s): ) +def _extract_safe_name(name, max_length): + """Generate safe substring of a name + + Guarantees: + + - always starts and ends with a lowercase letter or number + - never more than one hyphen in a row (no '--') + - only contains lowercase letters, numbers, and hyphens + - length at least 1 ('x' if other rules strips down to empty string) + - max length not exceeded + """ + # compute safe slug from name (don't worry about collisions, hash handles that) + # cast to lowercase, exclude all but lower & hyphen + # we could keep numbers, but it must not _start_ with a number + safe_name = ''.join([c for c in name.lower() if c in _lower_plus_hyphen]) + # strip leading '-', squash repeated '--' to '-' + safe_name = _hyphen_plus_pattern.sub("-", safe_name.lstrip("-")) + # truncate to max_length chars, strip trailing '-' + safe_name = safe_name[:max_length].rstrip("-") + if not safe_name: + # make sure it's non-empty + safe_name = 'x' + return safe_name + + def strip_and_hash(name, max_length=32): """Generate an always-safe, unique string for any input @@ -105,18 +130,8 @@ def strip_and_hash(name, max_length=32): raise ValueError(f"Cannot make safe names shorter than {_hash_length + 4}") # quick, short hash to avoid name collisions name_hash = hashlib.sha256(name.encode("utf8")).hexdigest()[:_hash_length] - # compute safe slug from name (don't worry about collisions, hash handles that) - # cast to lowercase, exclude all but lower & hyphen - safe_name = ''.join([c for c in name.lower() if c in _lower_plus_hyphen]) - # strip leading '-' - # squash repeated '--' to one - safe_name = _hyphen_plus_pattern.sub("-", safe_name.lstrip("-")) - # truncate to 24 chars, strip trailing '-' - safe_name = safe_name[:name_length].rstrip("-") - if not safe_name: - # make sure it's non-empty - safe_name = 'x' - # due to stripping of '-' above, + safe_name = _extract_safe_name(name, name_length) + # due to stripping of '-' in _extract_safe_name, # the result will always have _exactly_ '---', never '--' nor '----' # use '---' to avoid colliding with `{username}--{servername}` template join return f"{safe_name}---{name_hash}" @@ -142,3 +157,38 @@ def safe_slug(name, is_valid=is_valid_default, max_length=None): return name else: return strip_and_hash(name, max_length=max_length or 32) + + +def multi_slug(names, max_length=48): + """multi-component slug with single hash on the end + + same as strip_and_hash, but name components are joined with '--', + so it looks like: + + {name1}--{name2}---{hash} + + In order to avoid hash collisions on boundaries, use `\\xFF` as delimiter + """ + hasher = hashlib.sha256() + hasher.update(names[0].encode("utf8")) + for name in names[1:]: + # \xFF can't occur as a start byte in UTF8 + # so use it as a word delimiter to make sure overlapping words don't collide + hasher.update(b"\xFF") + hasher.update(name.encode("utf8")) + hash = hasher.hexdigest()[:_hash_length] + + name_slugs = [] + available_chars = max_length - (_hash_length + 1) + # allocate equal space per name + # per_name accounts for '{name}--', so really two less + per_name = available_chars // len(names) + name_max_length = per_name - 2 + if name_max_length < 2: + raise ValueError(f"Not enough characters for {len(names)} names: {max_length}") + for name in names: + name_slugs.append(_extract_safe_name(name, name_max_length)) + + # by joining names with '--', this cannot collide with single-hashed names, + # which can only contain '-' and the '---' hash delimiter once + return f"{'--'.join(name_slugs)}---{hash}" diff --git a/kubespawner/spawner.py b/kubespawner/spawner.py index 0feb1099..c2dfa599 100644 --- a/kubespawner/spawner.py +++ b/kubespawner/spawner.py @@ -48,7 +48,7 @@ make_service, ) from .reflector import ResourceReflector -from .slugs import is_valid_label, safe_slug +from .slugs import is_valid_label, multi_slug, safe_slug from .utils import recursive_format, recursive_update @@ -189,7 +189,7 @@ def __init__(self, *args, **kwargs): # these same assignments should match clear_state if self.enable_user_namespaces: self.namespace = self._expand_user_properties( - self.user_namespace_template, scheme="slug" + self.user_namespace_template, slug_scheme="safe" ) self.log.info(f"Using user namespace: {self.namespace}") @@ -1907,24 +1907,21 @@ def _expand_user_properties(self, template, slug_scheme=None): # compute safe_user_server = {username}--{servername} if ( - # escaping after joining means - # possible collision with `--` delimiter - '--' in username - or '--' in raw_servername - or username.endswith("-") - or raw_servername.startswith("-") - # length exceeded - or len(safe_username) + len(safe_username) + 2 > _slug_max_length + # double-escape if safe names are too long after join + len(safe_username) + len(safe_servername) + 2 + > _slug_max_length ): # need double-escape if there's a chance of collision - safe_user_server = safe_slug( - f"{safe_username}--{safe_servername}", max_length=_slug_max_length + safe_user_server = multi_slug( + [username, raw_servername], max_length=_slug_max_length ) else: if raw_servername: - safe_user_server = safe_slug( - f"{username}--{raw_servername}", max_length=_slug_max_length - ) + # choices: + # - {safe_username}--{safe_servername} # could get 2 hashes + # - always {multi_slug} # always a hash for named servers + # - safe_slug({username}--{servername}) # lots of possible collisions to handle specially + safe_user_server = f"{safe_username}--{safe_servername}" else: safe_user_server = safe_username diff --git a/tests/test_spawner.py b/tests/test_spawner.py index e3859179..b57c116d 100644 --- a/tests/test_spawner.py +++ b/tests/test_spawner.py @@ -1418,7 +1418,7 @@ async def test_pod_name_escaping(): spawner = KubeSpawner(config=c, user=user, orm_spawner=orm_spawner, _mock=True) - assert spawner.pod_name == "jupyter-some-5fuser--test-2dserver-21" + assert spawner.pod_name == "jupyter-someuser---7d3a4d4e--test-server---cb54a9af" async def test_pod_name_custom_template(): @@ -1442,15 +1442,15 @@ async def test_pod_name_collision(): user2 = MockUser() user2.name = "user-has" orm_spawner2 = Spawner() - orm_spawner2.name = "2ddash" + orm_spawner2.name = "dash" spawner = KubeSpawner(user=user1, orm_spawner=orm_spawner1, _mock=True) - assert spawner.pod_name == "jupyter-user-2dhas-2ddash" - assert spawner.pvc_name == "claim-user-2dhas-2ddash" + assert spawner.pod_name == "jupyter-user-has-dash" + assert spawner.pvc_name == "claim-user-has-dash" named_spawner = KubeSpawner(user=user2, orm_spawner=orm_spawner2, _mock=True) - assert named_spawner.pod_name == "jupyter-user-2dhas--2ddash" + assert named_spawner.pod_name == "jupyter-user-has--dash" assert spawner.pod_name != named_spawner.pod_name - assert named_spawner.pvc_name == "claim-user-2dhas--2ddash" + assert named_spawner.pvc_name == "claim-user-has--dash" assert spawner.pvc_name != named_spawner.pvc_name