Skip to content

Commit

Permalink
Clean kubernetes labels
Browse files Browse the repository at this point in the history
  • Loading branch information
dhpollack committed Jan 18, 2023
1 parent 0f7d910 commit 2503653
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 3 deletions.
6 changes: 4 additions & 2 deletions metaflow/plugins/kubernetes/kubernetes_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,8 +186,10 @@ def echo(msg, stream="stderr", job_id=None):

# `labels` is a tuple of strings or a tuple with a single comma separated string
# convert it to a dict
labels = KubernetesDecorator.parse_kube_list(
[l for l_tmp in labels for l in l_tmp.split(",")], False
labels = KubernetesDecorator.clean_kube_labels(
KubernetesDecorator.parse_kube_list(
[l for l_tmp in labels for l in l_tmp.split(",")], False
)
)

def _sync_metadata():
Expand Down
27 changes: 26 additions & 1 deletion metaflow/plugins/kubernetes/kubernetes_decorator.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import hashlib
import json
import os
import platform
import re
import sys
from typing import List, Union
from typing import Dict, List, Optional, Union

from metaflow.decorators import StepDecorator
from metaflow.exception import MetaflowException
Expand Down Expand Up @@ -116,6 +118,7 @@ def __init__(self, attributes=None, statically_defined=False):
self.attributes["labels"] = self.parse_kube_list(
self.attributes["labels"].split(","), False
)
self.attributes["labels"] = self.clean_kube_labels(self.attributes["labels"])

if isinstance(self.attributes["node_selector"], str):
self.attributes["node_selector"] = self.parse_kube_list(
Expand Down Expand Up @@ -411,3 +414,25 @@ def parse_kube_list(items: Union[str, List[str]], requires_both: bool = True):
return ret
except (AttributeError, IndexError):
raise KubernetesException("Unable to parse kubernetes list: %s" % items)

@staticmethod
def clean_kube_labels(
labels: Optional[Dict[str, Optional[str]]],
max_len: int = 63,
regex_sub: str = r"^[^a-z0-9A-Z]*|[^a-zA-Z0-9_\-\.]|[^a-z0-9A-Z]*$",
):
"""Inspired by apache airflow label cleaner."""

def clean_label(s: Optional[str]):
if not s:
# allow empty label
return s
s_clean = re.sub(regex_sub, "", s)
if len(s_clean) > max_len or s != s_clean:
clean_hash = (
hashlib.blake2b(s_clean.encode(), digest_size=9).hexdigest()
)
s_clean = f"{s_clean[: max_len - len(clean_hash) - 1]}-{clean_hash}"
return s_clean

return {k: clean_label(v) for k, v in labels.items()} if labels else labels

0 comments on commit 2503653

Please sign in to comment.