Skip to content

Commit

Permalink
feat: ensure notebook endpoints do their job (#388)
Browse files Browse the repository at this point in the history
This work brings the notebook related endpoints to a working state
to serve as replacement for the renku-notebooks external service.

Short summary:
- API tests have been added
- Code has been fixed to answer / handle exception
- Automated test cluster creation has been added using k3d

---------

Co-authored-by: Tasko Olevski <tasko.olevski@sdsc.ethz.ch>
  • Loading branch information
sgaist and olevski committed Nov 6, 2024
1 parent fa3f88b commit 6359891
Show file tree
Hide file tree
Showing 27 changed files with 654 additions and 89 deletions.
1 change: 0 additions & 1 deletion .devcontainer/.poetry_cache/.keep
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@

4 changes: 2 additions & 2 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
"command": "poetry self add poetry-polylith-plugin"
},
"ghcr.io/devcontainers/features/docker-in-docker:2": {},
"ghcr.io/mpriscella/features/kind:1": {},
"ghcr.io/devcontainers-contrib/features/gh-release:1": {
"repo": "authzed/zed",
"binaryNames": "zed"
Expand All @@ -28,7 +27,8 @@
"ghcr.io/EliiseS/devcontainer-features/bash-profile:1": {
"command": "alias k=kubectl"
},
"ghcr.io/devcontainers-contrib/features/rclone:1": {}
"ghcr.io/devcontainers-contrib/features/rclone:1": {},
"./k3d": {}
},
"overrideFeatureInstallOrder": [
"ghcr.io/devcontainers-contrib/features/poetry",
Expand Down
17 changes: 17 additions & 0 deletions .devcontainer/k3d/devcontainer-feature.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"id": "k3d",
"version": "1.0.0",
"name": "k3s based kubernetes cluster in docker",
"postCreateCommand": "k3d --version",
"installsAfter": [
"ghcr.io/devcontainers-contrib/features/bash-command"
],
"options": {
"k3d_version": {
"type": "string",
"description": "k3d version to install",
"proposals": ["latest", "5.7.4"],
"default": "latest"
}
}
}
14 changes: 14 additions & 0 deletions .devcontainer/k3d/install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
if [ "${K3D_VERSION}" != "none" ]; then
echo "Downloading k3d..."
if [ "${K3D_VERSION}" = "latest" ]; then
# Install and check the hash
curl -sSL https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | bash
else
find_version_from_git_tags K3D_VERSION https://github.com/kubernetes/K3D
if [ "${K3D_VERSION::1}" != "v" ]; then
K3D_VERSION="v${K3D_VERSION}"
fi
# Install and check the hash
curl -sSL https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | TAG="${K3D_VERSION}" bash
fi
fi
5 changes: 5 additions & 0 deletions .github/workflows/test_publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,11 @@ jobs:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/cache/restore@v3
name: Restore cache
with:
path: ${{ env.CACHE_PATH }}
key: ${{ env.CACHE_KEY }}
- name: Set Git config
shell: bash
run: |
Expand Down
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ repos:
- id: check-toml
- id: debug-statements
- id: end-of-file-fixer
exclude: 'components/renku_data_services/message_queue/(avro_models|schemas)'
- id: mixed-line-ending
- id: trailing-whitespace
exclude: 'components/renku_data_services/message_queue/(avro_models|schemas)'
- repo: https://github.com/asottile/yesqa
rev: v1.5.0
hooks:
Expand Down
18 changes: 6 additions & 12 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
.PHONY: schemas tests test_setup main_tests schemathesis_tests collect_coverage style_checks pre_commit_checks run download_avro check_avro avro_models update_avro kind_cluster install_amaltheas all
.PHONY: schemas tests test_setup main_tests schemathesis_tests collect_coverage style_checks pre_commit_checks run download_avro check_avro avro_models update_avro k3d_cluster install_amaltheas all

AMALTHEA_JS_VERSION ?= 0.12.2
AMALTHEA_SESSIONS_VERSION ?= 0.0.9-new-operator-chart
AMALTHEA_SESSIONS_VERSION ?= 0.0.10-new-operator-chart
codegen_params = --input-file-type openapi --output-model-type pydantic_v2.BaseModel --use-double-quotes --target-python-version 3.12 --collapse-root-models --field-constraints --strict-nullable --set-default-enum-member --openapi-scopes schemas paths parameters --set-default-enum-member --use-one-literal-as-default --use-default

define test_apispec_up_to_date
Expand Down Expand Up @@ -151,21 +151,15 @@ help: ## Display this help.

##@ Helm/k8s

kind_cluster: ## Creates a kind cluster for testing
kind delete cluster
docker network rm -f kind
docker network create -d=bridge -o com.docker.network.bridge.enable_ip_masquerade=true -o com.docker.network.driver.mtu=1500 --ipv6=false kind
kind create cluster --config kind_config.yaml
kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/main/deploy/static/provider/kind/deploy.yaml
echo "Waiting for ingress controller to initialize"
sleep 15
kubectl wait --namespace ingress-nginx --for=condition=ready pod --selector=app.kubernetes.io/component=controller --timeout=90s
k3d_cluster: ## Creates a k3d cluster for testing
k3d cluster delete
k3d cluster create --agents 1 --k3s-arg --disable=metrics-server@server:0

install_amaltheas: ## Installs both version of amalthea in the. NOTE: It uses the currently active k8s context.
helm repo add renku https://swissdatasciencecenter.github.io/helm-charts
helm repo update
helm upgrade --install amalthea-js renku/amalthea --version $(AMALTHEA_JS_VERSION)
helm upgrade --install amalthea-sessions amalthea-sessions-0.0.9-new-operator-chart.tgz --version $(AMALTHEA_SESSIONS_VERSION)
helm upgrade --install amalthea-se renku/amalthea-sessions --version ${AMALTHEA_SESSIONS_VERSION}

# TODO: Add the version variables from the top of the file here when the charts are fully published
amalthea_schema: ## Updates generates pydantic classes from CRDs
Expand Down
27 changes: 27 additions & 0 deletions components/renku_data_services/base_api/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,30 @@ async def decorated_function(*args: _P.args, **kwargs: _P.kwargs) -> _T:
return response

return decorated_function


def internal_gitlab_authenticate(
authenticator: Authenticator,
) -> Callable[
[Callable[Concatenate[Request, APIUser, APIUser, _P], Coroutine[Any, Any, _T]]],
Callable[Concatenate[Request, APIUser, _P], Coroutine[Any, Any, _T]],
]:
"""Decorator for a Sanic handler that that adds a user for the internal gitlab user."""

def decorator(
f: Callable[Concatenate[Request, APIUser, APIUser, _P], Coroutine[Any, Any, _T]],
) -> Callable[Concatenate[Request, APIUser, _P], Coroutine[Any, Any, _T]]:
@wraps(f)
async def decorated_function(
request: Request,
user: APIUser,
*args: _P.args,
**kwargs: _P.kwargs,
) -> _T:
access_token = str(request.headers.get("Gitlab-Access-Token"))
internal_gitlab_user = await authenticator.authenticate(access_token, request)
return await f(request, user, internal_gitlab_user, *args, **kwargs)

return decorated_function

return decorator
9 changes: 3 additions & 6 deletions components/renku_data_services/notebooks/api.spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -454,11 +454,8 @@ components:
message:
type: string
example: "Something went wrong - please try again later"
required:
- "code"
- "message"
required:
- "error"
required: ["code", "message"]
required: ["error"]
Generated:
properties:
enabled:
Expand Down Expand Up @@ -881,7 +878,7 @@ components:
type: integer
description: The size of disk storage for the session, in gigabytes
resource_class_id:
default:
default:
nullable: true
type: integer
cloudstorage:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def session_tolerations(server: "UserServer") -> list[dict[str, Any]]:
"op": "add",
"path": "/statefulset/spec/template/spec/tolerations",
"value": default_tolerations
+ [i.json_match_expression() for i in server.server_options.tolerations],
+ [toleration.json_match_expression() for toleration in server.server_options.tolerations],
}
],
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -312,9 +312,9 @@ def certificates_container(config: _NotebooksConfig) -> tuple[client.V1Container
projected=client.V1ProjectedVolumeSource(
default_mode=440,
sources=[
{"secret": {"name": i.get("secret")}}
for i in config.sessions.ca_certs.secrets
if isinstance(i, dict) and i.get("secret") is not None
{"secret": {"name": secret.get("secret")}}
for secret in config.sessions.ca_certs.secrets
if isinstance(secret, dict) and secret.get("secret") is not None
],
),
)
Expand Down
25 changes: 16 additions & 9 deletions components/renku_data_services/notebooks/api/classes/k8s_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,11 @@ async def get_pod_logs(self, name: str, max_log_lines: Optional[int] = None) ->
"""Get the logs of all containers in the session."""
pod = cast(Pod, await Pod.get(name=name, namespace=self.namespace))
logs: dict[str, str] = {}
containers = [i.name for i in pod.spec.containers] + [i.name for i in pod.spec.initContainers]
containers = [container.name for container in pod.spec.containers + pod.spec.get("initContainers", [])]
for container in containers:
try:
# NOTE: calling pod.logs without a container name set crashes the library
clogs: list[str] = [i async for i in pod.logs(container=container, tail_lines=max_log_lines)]
clogs: list[str] = [clog async for clog in pod.logs(container=container, tail_lines=max_log_lines)]
except NotFoundError:
raise errors.MissingResourceError(message=f"The session pod {name} does not exist.")
except ServerError as err:
Expand Down Expand Up @@ -243,8 +243,10 @@ async def patch_statefulset_tokens(self, name: str, renku_tokens: RenkuTokens) -
except NotFoundError:
return None

containers: list[V1Container] = [V1Container(**i) for i in sts.spec.template.spec.containers]
init_containers: list[V1Container] = [V1Container(**i) for i in sts.spec.template.spec.init_containers]
containers: list[V1Container] = [V1Container(**container) for container in sts.spec.template.spec.containers]
init_containers: list[V1Container] = [
V1Container(**container) for container in sts.spec.template.spec.init_containers
]

git_proxy_container_index, git_proxy_container = next(
((i, c) for i, c in enumerate(containers) if c.name == "git-proxy"),
Expand Down Expand Up @@ -368,7 +370,7 @@ async def list_servers(self, safe_username: str) -> list[_SessionType]:
)
raise JSCacheError(f"The JSCache produced an unexpected status code: {res.status_code}")

return [self.server_type.model_validate(i) for i in res.json()]
return [self.server_type.model_validate(server) for server in res.json()]

async def get_server(self, name: str) -> _SessionType | None:
"""Get a specific jupyter server."""
Expand Down Expand Up @@ -441,7 +443,11 @@ async def get_server_logs(
) -> dict[str, str]:
"""Get the logs from the server."""
# NOTE: this get_server ensures the user has access to the server without it you could read someone elses logs
_ = await self.get_server(server_name, safe_username)
server = await self.get_server(server_name, safe_username)
if not server:
raise MissingResourceError(
f"Cannot find server {server_name} for user " f"{safe_username} to retrieve logs."
)
pod_name = f"{server_name}-0"
return await self.renku_ns_client.get_pod_logs(pod_name, max_log_lines)

Expand Down Expand Up @@ -481,9 +487,10 @@ async def delete_server(self, server_name: str, safe_username: str) -> None:
"""Delete the server."""
server = await self.get_server(server_name, safe_username)
if not server:
return None
await self.renku_ns_client.delete_server(server_name)
return None
raise MissingResourceError(
f"Cannot find server {server_name} for user " f"{safe_username} in order to delete it."
)
return await self.renku_ns_client.delete_server(server_name)

async def patch_tokens(self, server_name: str, renku_tokens: RenkuTokens, gitlab_token: GitlabToken) -> None:
"""Patch the Renku and Gitlab access tokens used in a session."""
Expand Down
42 changes: 30 additions & 12 deletions components/renku_data_services/notebooks/api/classes/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Any
from urllib.parse import urljoin, urlparse

from gitlab.v4.objects.projects import Project
from sanic.log import logger

from renku_data_services.base_models import AnonymousAPIUser, AuthenticatedAPIUser
Expand All @@ -22,7 +23,6 @@
from renku_data_services.notebooks.api.classes.cloud_storage import ICloudStorageRequest
from renku_data_services.notebooks.api.classes.k8s_client import JupyterServerV1Alpha1Kr8s, K8sClient
from renku_data_services.notebooks.api.classes.repository import GitProvider, Repository
from renku_data_services.notebooks.api.classes.user import NotebooksGitlabClient
from renku_data_services.notebooks.api.schemas.secrets import K8sUserSecrets
from renku_data_services.notebooks.api.schemas.server_options import ServerOptions
from renku_data_services.notebooks.config import _NotebooksConfig
Expand Down Expand Up @@ -155,7 +155,8 @@ async def start(self) -> JupyterServerV1Alpha1 | None:
f"or Docker resources are missing: {', '.join(errors)}"
)
)
manifest = JupyterServerV1Alpha1.model_validate(await self._get_session_manifest())
session_manifest = await self._get_session_manifest()
manifest = JupyterServerV1Alpha1.model_validate(session_manifest)
return await self._k8s_client.create_server(manifest, self.safe_username)

@staticmethod
Expand Down Expand Up @@ -321,7 +322,9 @@ def get_labels(self) -> dict[str, str | None]:
f"{prefix}commit-sha": None,
f"{prefix}gitlabProjectId": None,
f"{prefix}safe-username": self.safe_username,
f"{prefix}quota": self.server_options.priority_class,
f"{prefix}quota": self.server_options.priority_class
if self.server_options.priority_class is not None
else "",
f"{prefix}userId": self._user.id,
}
return labels
Expand Down Expand Up @@ -378,23 +381,23 @@ def __init__(
workspace_mount_path: Path,
work_dir: Path,
config: _NotebooksConfig,
gitlab_client: NotebooksGitlabClient,
gitlab_project: Project | None,
internal_gitlab_user: APIUser,
using_default_image: bool = False,
is_image_private: bool = False,
**_: dict,
):
self.gitlab_client = gitlab_client
self.gitlab_project = gitlab_project
self.internal_gitlab_user = internal_gitlab_user
gitlab_project_name = f"{namespace}/{project}"
gitlab_project = self.gitlab_client.get_renku_project(gitlab_project_name)
self.gitlab_project_name = f"{namespace}/{project}"
single_repository = (
Repository(
url=gitlab_project.http_url_to_repo,
dirname=gitlab_project.path,
url=self.gitlab_project.http_url_to_repo,
dirname=self.gitlab_project.path,
branch=branch,
commit_sha=commit_sha,
)
if gitlab_project is not None
if self.gitlab_project is not None
else None
)

Expand Down Expand Up @@ -422,8 +425,6 @@ def __init__(
self.commit_sha = commit_sha
self.notebook = notebook
self.git_host = urlparse(config.git.url).netloc
self.gitlab_project_name = gitlab_project_name
self.gitlab_project = gitlab_project
self.single_repository = single_repository

def _get_start_errors(self) -> list[str]:
Expand Down Expand Up @@ -509,6 +510,7 @@ def __init__(
internal_gitlab_user: APIUser,
using_default_image: bool = False,
is_image_private: bool = False,
**_: dict,
):
super().__init__(
user=user,
Expand All @@ -531,11 +533,27 @@ def __init__(
self.project_id = project_id
self.launcher_id = launcher_id

def get_labels(self) -> dict[str, str | None]:
"""Get the labels of the jupyter server."""
prefix = self._get_renku_annotation_prefix()
labels = super().get_labels()

# for validation purpose
for item in ["commit-sha", "gitlabProjectId"]:
labels[f"{prefix}{item}"] = ""

return labels

def get_annotations(self) -> dict[str, str | None]:
"""Get the annotations of the session."""
prefix = self._get_renku_annotation_prefix()
annotations = super().get_annotations()
annotations[f"{prefix}renkuVersion"] = "2.0"
annotations[f"{prefix}projectId"] = self.project_id
annotations[f"{prefix}launcherId"] = self.launcher_id

# for validation purpose
for item in ["commit-sha", "branch", "git-host", "namespace", "projectName", "gitlabProjectId", "repository"]:
annotations[f"{prefix}{item}"] = ""

return annotations
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def server_name(self) -> str:
@property
def hibernation(self) -> Optional[dict[str, Any]]:
"""Return hibernation annotation."""
hibernation = self.manifest.metadata.annotations.get("hibernation")
hibernation = self.manifest.metadata.annotations.get("renku.io/hibernation")
return json.loads(hibernation) if hibernation else None

@property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,12 @@ def __post_init__(self) -> None:
self.storage = 1
elif self.storage is None and not self.gigabytes:
self.storage = 1_000_000_000
if not all([isinstance(i, NodeAffinity) for i in self.node_affinities]):
if not all([isinstance(affinity, NodeAffinity) for affinity in self.node_affinities]):
raise ProgrammingError(
message="Cannot create a ServerOptions dataclass with node "
"affinities that are not of type NodeAffinity"
)
if not all([isinstance(i, Toleration) for i in self.tolerations]):
if not all([isinstance(toleration, Toleration) for toleration in self.tolerations]):
raise ProgrammingError(
message="Cannot create a ServerOptions dataclass with tolerations that are not of type Toleration"
)
Expand Down
2 changes: 1 addition & 1 deletion components/renku_data_services/notebooks/apispec.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: api.spec.yaml
# timestamp: 2024-09-23T08:31:51+00:00
# timestamp: 2024-09-24T09:26:37+00:00

from __future__ import annotations

Expand Down
Loading

0 comments on commit 6359891

Please sign in to comment.