Skip to content

Commit

Permalink
let safe slug scheme live side-by-side with escape scheme
Browse files Browse the repository at this point in the history
- add user_server to template namespace
- add `safe_` prefixed fields to template namespace using new 'safe' slug scheme
- add pod_name, namespace, pvc_name to template namespace
  • Loading branch information
minrk committed Jun 17, 2024
1 parent 058afd5 commit 6968b68
Show file tree
Hide file tree
Showing 3 changed files with 174 additions and 77 deletions.
63 changes: 44 additions & 19 deletions kubespawner/slugs.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,23 @@
_object_pattern = re.compile(r'^[a-z0-9\.-]+$')
_label_pattern = re.compile(r'^[a-z0-9\.-_]+$', flags=re.IGNORECASE)

# match two or more hyphens
_hyphen_plus_pattern = re.compile('--+')

# length of hash suffix
_hash_length = 8


def _is_valid_general(
s, starts_with=None, ends_with=None, pattern=None, max_length=None
s, starts_with=None, ends_with=None, pattern=None, min_length=None, max_length=None
):
"""General is_valid check
Checks rules:
"""
if not 1 <= len(s) <= max_length:
if min_length and len(s) < min_length:
return False
if max_length and len(s) > max_length:
return False
if starts_with and not s.startswith(starts_with):
return False
Expand All @@ -47,12 +55,16 @@ def is_valid_object_name(s):
ends_with=_alphanum_lower,
pattern=_object_pattern,
max_length=255,
min_length=1,
)


def is_valid_label(s):
"""is_valid check for label values"""
# label rules: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
if not s:
# empty strings are valid labels
return True
return _is_valid_general(
s,
starts_with=_alphanum,
Expand All @@ -77,28 +89,40 @@ def is_valid_default(s):
starts_with=_alphanum_lower,
ends_with=_alphanum_lower,
pattern=_object_pattern,
min_length=1,
max_length=63,
)


def strip_and_hash(name):
"""Generate an always-safe, unique string for any input"""
# make sure we start with a prefix
# quick, short hash to avoid name collsions
name_hash = hashlib.sha256(name.encode("utf8")).hexdigest()[:8]
safe_chars = [c for c in name.lower() if c in _lower_plus_hyphen]
safe_name = ''.join(safe_chars[:24])
def strip_and_hash(name, max_length=32):
"""Generate an always-safe, unique string for any input
truncates name to max_length - len(hash_suffix) to fit in max_length
after adding hash suffix
"""
name_length = max_length - (_hash_length + 3)
if name_length < 1:
raise ValueError(f"Cannot make safe names shorter than {_hash_length + 4}")
# quick, short hash to avoid name collisions
name_hash = hashlib.sha256(name.encode("utf8")).hexdigest()[:_hash_length]
# compute safe slug from name (don't worry about collisions, hash handles that)
# cast to lowercase, exclude all but lower & hyphen
safe_name = ''.join([c for c in name.lower() if c in _lower_plus_hyphen])
# strip leading '-'
# squash repeated '--' to one
safe_name = _hyphen_plus_pattern.sub("-", safe_name.lstrip("-"))
# truncate to 24 chars, strip trailing '-'
safe_name = safe_name[:name_length].rstrip("-")
if not safe_name:
# make sure it's non-empty
safe_name = 'x'
if safe_name.startswith('-'):
# starting with '-' is generally not allowed,
# start with 'j-' instead
# Question: always do this so it's consistent, instead of as-needed?
safe_name = f"j{safe_name}"
return f"{safe_name}--{name_hash}"
# due to stripping of '-' above,
# the result will always have _exactly_ '---', never '--' nor '----'
# use '---' to avoid colliding with `{username}--{servername}` template join
return f"{safe_name}---{name_hash}"


def safe_slug(name, is_valid=is_valid_default):
def safe_slug(name, is_valid=is_valid_default, max_length=None):
"""Always generate a safe slug
is_valid should be a callable that returns True if a given string follows appropriate rules,
Expand All @@ -112,8 +136,9 @@ def safe_slug(name, is_valid=is_valid_default):
"""
if '--' in name:
# don't accept any names that could collide with the safe slug
return strip_and_hash(name)
if is_valid(name):
return strip_and_hash(name, max_length=max_length or 32)
# allow max_length override for truncated sub-strings
if is_valid(name) and (max_length is None or len(name) <= max_length):
return name
else:
return strip_and_hash(name)
return strip_and_hash(name, max_length=max_length or 32)
134 changes: 83 additions & 51 deletions kubespawner/spawner.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,9 +186,11 @@ def __init__(self, *args, **kwargs):
# compute other attributes

# namespace, pod_name, dns_name are persisted in state
# thse same assignments should match clear_state
# these same assignments should match clear_state
if self.enable_user_namespaces:
self.namespace = self._expand_user_properties(self.user_namespace_template)
self.namespace = self._expand_user_properties(
self.user_namespace_template, scheme="slug"
)
self.log.info(f"Using user namespace: {self.namespace}")

self.pod_name = self._expand_user_properties(self.pod_name_template)
Expand Down Expand Up @@ -542,7 +544,7 @@ def _namespace_default(self):
)

pod_name_template = Unicode(
'jupyter-{username}--{servername}',
'jupyter-{user_server}',
config=True,
help="""
Template to use to form the name of user's pods.
Expand Down Expand Up @@ -573,7 +575,7 @@ def _namespace_default(self):
The IP address (or hostname) of user's pods which KubeSpawner connects to.
If you do not specify the value, KubeSpawner will use the pod IP.
e.g. `jupyter-{username}--{servername}.notebooks.jupyterhub.svc.cluster.local`,
e.g. `{pod_name}.notebooks.jupyterhub.svc.cluster.local`,
`{username}`, `{userid}`, `{servername}`, `{hubnamespace}`,
`{unescaped_username}`, and `{unescaped_servername}` will be expanded if
Expand Down Expand Up @@ -617,7 +619,7 @@ def _namespace_default(self):
""",
)
pvc_name_template = Unicode(
'claim-{username}--{servername}',
'claim-{user_server}',
config=True,
help="""
Template to use to form the name of user's pvc.
Expand Down Expand Up @@ -1884,71 +1886,100 @@ def _expand_user_properties(self, template, slug_scheme=None):
safe_chars = set(string.ascii_lowercase + string.digits)

raw_servername = self.name or ''
safe_servername = escapism.escape(
escaped_servername = escapism.escape(
raw_servername, safe=safe_chars, escape_char='-'
).lower()

# TODO: measure string template?
# for object names, max is 255, so very unlikely to exceed
# but labels are only 64, so adding a few fields together could easily exceed length limit
# if the per-field limit is more than half the whole budget
# for now, recommend {user_server} anywhere both username and servername are desired
_slug_max_length = 48

if raw_servername:
safe_servername = safe_slug(raw_servername, max_length=_slug_max_length)
else:
safe_servername = ""

username = raw_username = self.user.name
safe_username = safe_slug(raw_username, max_length=_slug_max_length)

# compute safe_user_server = {username}--{servername}
if (
# escaping after joining means
# possible collision with `--` delimiter
'--' in username
or '--' in raw_servername
or username.endswith("-")
or raw_servername.startswith("-")
# length exceeded
or len(safe_username) + len(safe_username) + 2 > _slug_max_length
):
# need double-escape if there's a chance of collision
safe_user_server = safe_slug(
f"{safe_username}--{safe_servername}", max_length=_slug_max_length
)
else:
if raw_servername:
safe_user_server = safe_slug(
f"{username}--{raw_servername}", max_length=_slug_max_length
)
else:
safe_user_server = safe_username

hub_namespace = self._namespace_default()
if hub_namespace == "default":
hub_namespace = "user"

legacy_escaped_username = ''.join(
[s if s in safe_chars else '-' for s in self.user.name.lower()]
)
safe_username = escapism.escape(
escaped_username = escapism.escape(
self.user.name, safe=safe_chars, escape_char='-'
).lower()

if slug_scheme == "safe":
# safe slug scheme escapes _after_ rendering the template
username = self.user.name
servername = raw_servername
# but not escaping before joining {username}--{servername}
# reintroduces the possibility of collision
# e.g. {user--name}--{server} == {user}--{name--server}
# and {user-}--{server} == {user}--{-server}
# in that case, double-escape username and server name.
# double-escape will not collide,
# because it will match always match this condition and be escaped again
if (
'--' in username
or '--' in servername
or username.endswith("-")
or servername.startswith("-")
):
username = safe_slug(username)
if servername:
servername = safe_slug(servername)
elif slug_scheme == "escape":
username = safe_username
servername = safe_servername
user_server = safe_user_server
elif slug_scheme == "escape":
# backward-compatible 'escape' scheme is not safe
username = escaped_username
servername = escaped_servername
if servername:
user_server = f"{escaped_username}--{escaped_servername}"
else:
user_server = escaped_username
else:
raise ValueError(
f"slug scheme must be 'safe' or 'escape', not '{slug_scheme}'"
)

rendered = template.format(
ns = dict(
# raw values, always consistent
userid=self.user.id,
username=username,
escaped_username=safe_username,
unescaped_username=self.user.name,
legacy_escape_username=legacy_escaped_username,
servername=servername,
unescaped_servername=raw_servername,
escaped_servername=safe_servername,
hubnamespace=hub_namespace,
# scheme-dependent
username=username,
servername=servername,
user_server=user_server,
# safe values (new 'safe' scheme)
safe_username=safe_username,
safe_servername=safe_servername,
safe_user_server=safe_user_server,
# legacy values (old 'escape' scheme)
escaped_username=escaped_username,
escaped_servername=escaped_servername,
escaped_user_server=f"{escaped_username}--{escaped_servername}",
)
# strip trailing - delimiter in case of empty servername.
# k8s object names cannot have trailing '-'
if slug_scheme == "safe":
if not (username.endswith("-") or self.name.endswith("-")):
# strip trailing - delimiter _unless_ it's actually the end of the user or server name
rendered = rendered.rstrip("-")
# 'safe' slug scheme does processing after
rendered = safe_slug(rendered)
else:
# safe to strip trailing - in escape scheme because escaped username/servername will never end in '-'
rendered = rendered.rstrip("-")
# add some resolved values so they can be referred to.
# these may not be defined yet (i.e. when computing the values themselves).
for attr_name in ("pod_name", "pvc_name", "namespace"):
ns[attr_name] = getattr(self, attr_name, f"{attr_name}_unavailable!")

rendered = template.format(**ns)
# strip trailing '-' in case of old '{username}--{servername}' template
rendered = rendered.rstrip("-")
return rendered

def _expand_env(self, env):
Expand Down Expand Up @@ -2054,12 +2085,11 @@ def _get_pod_url(self, pod):
hostname = f"[{hostname}]"

if self.pod_connect_ip:
# pod_connect_ip is not a slug
hostname = ".".join(
[
s.rstrip("-")
for s in self._expand_user_properties(self.pod_connect_ip).split(
"."
)
self._expand_user_properties(s) if '{' in s else s
for s in self.pod_connect_ip.split(".")
]
)

Expand Down Expand Up @@ -2277,6 +2307,8 @@ def get_state(self):
We also save the namespace and DNS name for use cases where the namespace is
calculated dynamically, or it changes between restarts.
`pvc_name` is also saved, to prevent data loss if template changes across restarts.
"""
state = super().get_state()
# these should only be persisted if our pod is running
Expand Down
54 changes: 47 additions & 7 deletions tests/test_slugs.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,61 @@
import pytest

from kubespawner.slugs import safe_slug
from kubespawner.slugs import is_valid_label, safe_slug


@pytest.mark.parametrize(
"name, expected",
[
("jupyter-alex", "jupyter-alex"),
("jupyter-Alex", "jupyter-alex--3a1c285c"),
("jupyter-üni", "jupyter-ni--a5aaf5dd"),
("jupyter-Alex", "jupyter-alex---3a1c285c"),
("jupyter-üni", "jupyter-ni---a5aaf5dd"),
("endswith-", "endswith---165f1166"),
("-start", "j-start--f587e2dc"),
("j-start--f587e2dc", "j-start--f587e2dc--f007ef7c"),
("x" * 65, "xxxxxxxxxxxxxxxxxxxxxxxx--9537c5fd"),
("x" * 66, "xxxxxxxxxxxxxxxxxxxxxxxx--6eb879f1"),
("-start", "start---f587e2dc"),
("username--servername", "username-servername---d957f1de"),
("start---f587e2dc", "start-f587e2dc---cc5bb9c9"),
pytest.param("x" * 63, "x" * 63, id="x63"),
pytest.param("x" * 64, "xxxxxxxxxxxxxxxxxxxxx---7ce10097", id="x64"),
pytest.param("x" * 65, "xxxxxxxxxxxxxxxxxxxxx---9537c5fd", id="x65"),
("", "x---e3b0c442"),
],
)
def test_safe_slug(name, expected):
slug = safe_slug(name)
assert slug == expected


@pytest.mark.parametrize(
"max_length, length, expected",
[
(16, 16, "x" * 16),
(16, 17, "xxxxx---d04fd59f"),
(11, 16, "error"),
(12, 16, "x---9c572959"),
],
)
def test_safe_slug_max_length(max_length, length, expected):
name = "x" * length
if expected == "error":
with pytest.raises(ValueError):
safe_slug(name, max_length=max_length)
return

slug = safe_slug(name, max_length=max_length)
assert slug == expected


@pytest.mark.parametrize(
"name, expected",
[
("", ""),
("x", "x"),
("-x", "x---a4209624"),
("x-", "x---c8b60efc"),
pytest.param("x" * 63, "x" * 63, id="x64"),
pytest.param("x" * 64, "xxxxxxxxxxxxxxxxxxxxx---7ce10097", id="x64"),
pytest.param("x" * 65, "xxxxxxxxxxxxxxxxxxxxx---9537c5fd", id="x65"),
],
)
def test_safe_slug_label(name, expected):
slug = safe_slug(name, is_valid=is_valid_label)
assert slug == expected

0 comments on commit 6968b68

Please sign in to comment.