Skip to content

Commit

Permalink
add multi_slug mechanism for multi-word slugs (username--servername--…
Browse files Browse the repository at this point in the history
…-hash)
  • Loading branch information
minrk committed Jun 17, 2024
1 parent 6968b68 commit 45299ce
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 33 deletions.
74 changes: 62 additions & 12 deletions kubespawner/slugs.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,31 @@ def is_valid_default(s):
)


def _extract_safe_name(name, max_length):
"""Generate safe substring of a name
Guarantees:
- always starts and ends with a lowercase letter or number
- never more than one hyphen in a row (no '--')
- only contains lowercase letters, numbers, and hyphens
- length at least 1 ('x' if other rules strips down to empty string)
- max length not exceeded
"""
# compute safe slug from name (don't worry about collisions, hash handles that)
# cast to lowercase, exclude all but lower & hyphen
# we could keep numbers, but it must not _start_ with a number
safe_name = ''.join([c for c in name.lower() if c in _lower_plus_hyphen])
# strip leading '-', squash repeated '--' to '-'
safe_name = _hyphen_plus_pattern.sub("-", safe_name.lstrip("-"))
# truncate to max_length chars, strip trailing '-'
safe_name = safe_name[:max_length].rstrip("-")
if not safe_name:
# make sure it's non-empty
safe_name = 'x'
return safe_name


def strip_and_hash(name, max_length=32):
"""Generate an always-safe, unique string for any input
Expand All @@ -105,18 +130,8 @@ def strip_and_hash(name, max_length=32):
raise ValueError(f"Cannot make safe names shorter than {_hash_length + 4}")
# quick, short hash to avoid name collisions
name_hash = hashlib.sha256(name.encode("utf8")).hexdigest()[:_hash_length]
# compute safe slug from name (don't worry about collisions, hash handles that)
# cast to lowercase, exclude all but lower & hyphen
safe_name = ''.join([c for c in name.lower() if c in _lower_plus_hyphen])
# strip leading '-'
# squash repeated '--' to one
safe_name = _hyphen_plus_pattern.sub("-", safe_name.lstrip("-"))
# truncate to 24 chars, strip trailing '-'
safe_name = safe_name[:name_length].rstrip("-")
if not safe_name:
# make sure it's non-empty
safe_name = 'x'
# due to stripping of '-' above,
safe_name = _extract_safe_name(name, name_length)
# due to stripping of '-' in _extract_safe_name,
# the result will always have _exactly_ '---', never '--' nor '----'
# use '---' to avoid colliding with `{username}--{servername}` template join
return f"{safe_name}---{name_hash}"
Expand All @@ -142,3 +157,38 @@ def safe_slug(name, is_valid=is_valid_default, max_length=None):
return name
else:
return strip_and_hash(name, max_length=max_length or 32)


def multi_slug(names, max_length=48):
"""multi-component slug with single hash on the end
same as strip_and_hash, but name components are joined with '--',
so it looks like:
{name1}--{name2}---{hash}
In order to avoid hash collisions on boundaries, use `\\xFF` as delimiter
"""
hasher = hashlib.sha256()
hasher.update(names[0].encode("utf8"))
for name in names[1:]:
# \xFF can't occur as a start byte in UTF8
# so use it as a word delimiter to make sure overlapping words don't collide
hasher.update(b"\xFF")
hasher.update(name.encode("utf8"))
hash = hasher.hexdigest()[:_hash_length]

name_slugs = []
available_chars = max_length - (_hash_length + 1)
# allocate equal space per name
# per_name accounts for '{name}--', so really two less
per_name = available_chars // len(names)
name_max_length = per_name - 2
if name_max_length < 2:
raise ValueError(f"Not enough characters for {len(names)} names: {max_length}")
for name in names:
name_slugs.append(_extract_safe_name(name, name_max_length))

# by joining names with '--', this cannot collide with single-hashed names,
# which can only contain '-' and the '---' hash delimiter once
return f"{'--'.join(name_slugs)}---{hash}"
27 changes: 12 additions & 15 deletions kubespawner/spawner.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
make_service,
)
from .reflector import ResourceReflector
from .slugs import is_valid_label, safe_slug
from .slugs import is_valid_label, multi_slug, safe_slug
from .utils import recursive_format, recursive_update


Expand Down Expand Up @@ -189,7 +189,7 @@ def __init__(self, *args, **kwargs):
# these same assignments should match clear_state
if self.enable_user_namespaces:
self.namespace = self._expand_user_properties(
self.user_namespace_template, scheme="slug"
self.user_namespace_template, slug_scheme="safe"
)
self.log.info(f"Using user namespace: {self.namespace}")

Expand Down Expand Up @@ -1907,24 +1907,21 @@ def _expand_user_properties(self, template, slug_scheme=None):

# compute safe_user_server = {username}--{servername}
if (
# escaping after joining means
# possible collision with `--` delimiter
'--' in username
or '--' in raw_servername
or username.endswith("-")
or raw_servername.startswith("-")
# length exceeded
or len(safe_username) + len(safe_username) + 2 > _slug_max_length
# double-escape if safe names are too long after join
len(safe_username) + len(safe_servername) + 2
> _slug_max_length
):
# need double-escape if there's a chance of collision
safe_user_server = safe_slug(
f"{safe_username}--{safe_servername}", max_length=_slug_max_length
safe_user_server = multi_slug(
[username, raw_servername], max_length=_slug_max_length
)
else:
if raw_servername:
safe_user_server = safe_slug(
f"{username}--{raw_servername}", max_length=_slug_max_length
)
# choices:
# - {safe_username}--{safe_servername} # could get 2 hashes
# - always {multi_slug} # always a hash for named servers
# - safe_slug({username}--{servername}) # lots of possible collisions to handle specially
safe_user_server = f"{safe_username}--{safe_servername}"
else:
safe_user_server = safe_username

Expand Down
12 changes: 6 additions & 6 deletions tests/test_spawner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1418,7 +1418,7 @@ async def test_pod_name_escaping():

spawner = KubeSpawner(config=c, user=user, orm_spawner=orm_spawner, _mock=True)

assert spawner.pod_name == "jupyter-some-5fuser--test-2dserver-21"
assert spawner.pod_name == "jupyter-someuser---7d3a4d4e--test-server---cb54a9af"


async def test_pod_name_custom_template():
Expand All @@ -1442,15 +1442,15 @@ async def test_pod_name_collision():
user2 = MockUser()
user2.name = "user-has"
orm_spawner2 = Spawner()
orm_spawner2.name = "2ddash"
orm_spawner2.name = "dash"

spawner = KubeSpawner(user=user1, orm_spawner=orm_spawner1, _mock=True)
assert spawner.pod_name == "jupyter-user-2dhas-2ddash"
assert spawner.pvc_name == "claim-user-2dhas-2ddash"
assert spawner.pod_name == "jupyter-user-has-dash"
assert spawner.pvc_name == "claim-user-has-dash"
named_spawner = KubeSpawner(user=user2, orm_spawner=orm_spawner2, _mock=True)
assert named_spawner.pod_name == "jupyter-user-2dhas--2ddash"
assert named_spawner.pod_name == "jupyter-user-has--dash"
assert spawner.pod_name != named_spawner.pod_name
assert named_spawner.pvc_name == "claim-user-2dhas--2ddash"
assert named_spawner.pvc_name == "claim-user-has--dash"
assert spawner.pvc_name != named_spawner.pvc_name


Expand Down

0 comments on commit 45299ce

Please sign in to comment.