From 8d3ff8177209cbfb05f1cebf83a03936034ce2d7 Mon Sep 17 00:00:00 2001 From: Aaron Liang Date: Mon, 25 Mar 2024 10:27:05 -0700 Subject: [PATCH] add rag kuberay and jupyterhub image --- applications/rag/main.tf | 13 ++++++----- .../config-selfauth-autopilot.yaml | 10 ++++----- .../jupyter_config/config-selfauth.yaml | 8 +++---- .../jupyter_image/notebook_image/Dockerfile | 3 +++ .../notebook_image/cloudbuild.yaml | 22 +++++++++++++++++++ .../notebook_image/requirements.txt | 5 +++++ modules/jupyter/main.tf | 4 ++++ modules/jupyter/variables.tf | 2 +- .../kuberay-cluster/kuberay_image/Dockerfile | 3 +++ .../kuberay_image/cloudbuild.yaml | 22 +++++++++++++++++++ .../kuberay_image/requirements.txt | 9 ++++++++ modules/kuberay-cluster/main.tf | 3 ++- modules/kuberay-cluster/values.yaml | 2 +- modules/kuberay-cluster/variables.tf | 6 +++++ 14 files changed, 94 insertions(+), 18 deletions(-) create mode 100644 modules/jupyter/jupyter_image/notebook_image/Dockerfile create mode 100644 modules/jupyter/jupyter_image/notebook_image/cloudbuild.yaml create mode 100644 modules/jupyter/jupyter_image/notebook_image/requirements.txt create mode 100644 modules/kuberay-cluster/kuberay_image/Dockerfile create mode 100644 modules/kuberay-cluster/kuberay_image/cloudbuild.yaml create mode 100644 modules/kuberay-cluster/kuberay_image/requirements.txt diff --git a/applications/rag/main.tf b/applications/rag/main.tf index 1c015b93f..b7f3d94ea 100644 --- a/applications/rag/main.tf +++ b/applications/rag/main.tf @@ -179,12 +179,12 @@ module "cloudsql" { } module "jupyterhub" { - source = "../../modules/jupyter" - providers = { helm = helm.rag, kubernetes = kubernetes.rag } - namespace = local.kubernetes_namespace - project_id = var.project_id - gcs_bucket = var.gcs_bucket - add_auth = var.jupyter_add_auth + source = "../../modules/jupyter" + providers = { helm = helm.rag, kubernetes = kubernetes.rag } + namespace = local.kubernetes_namespace + project_id = var.project_id + gcs_bucket = var.gcs_bucket + add_auth = var.jupyter_add_auth autopilot_cluster = local.enable_autopilot workload_identity_service_account = local.jupyter_service_account @@ -228,6 +228,7 @@ module "kuberay-cluster" { grafana_host = module.kuberay-monitoring.grafana_uri disable_network_policy = var.disable_ray_cluster_network_policy depends_on = [module.kuberay-operator] + use_custom_image = true # IAP Auth parameters add_auth = var.ray_dashboard_add_auth diff --git a/modules/jupyter/jupyter_config/config-selfauth-autopilot.yaml b/modules/jupyter/jupyter_config/config-selfauth-autopilot.yaml index 0e1c985e1..6e6b52731 100644 --- a/modules/jupyter/jupyter_config/config-selfauth-autopilot.yaml +++ b/modules/jupyter/jupyter_config/config-selfauth-autopilot.yaml @@ -91,8 +91,8 @@ singleuser: extraLabels: ${indent(4, chomp(jsonencode(additional_labels)))} image: - name: jupyter/tensorflow-notebook - tag: python-3.10 + name: ${notebook_image} + tag: ${notebook_image_tag} startTimeout: 1000 extraAnnotations: gke-gcsfuse/volumes: "true" @@ -168,7 +168,7 @@ singleuser: bucketName: gcsfuse-{username} mountOptions: "uid=1000,gid=100,o=noexec,implicit-dirs,dir-mode=777,file-mode=777" kubespawner_override: - image: jupyter/tensorflow-notebook:python-3.10 + image: ${notebook_image}:${notebook_image_tag} extra_resource_limits: nvidia.com/gpu: "2" extra_resource_guarantees: @@ -199,7 +199,7 @@ singleuser: bucketName: gcsfuse-{username} mountOptions: "uid=1000,gid=100,o=noexec,implicit-dirs,dir-mode=777,file-mode=777" kubespawner_override: - image: jupyter/tensorflow-notebook:python-3.10 + image: ${notebook_image}:${notebook_image_tag} extra_resource_limits: nvidia.com/gpu: "2" extra_resource_guarantees: @@ -232,7 +232,7 @@ singleuser: bucketName: gcsfuse-{username} mountOptions: "uid=1000,gid=100,o=noexec,implicit-dirs,dir-mode=777,file-mode=777" kubespawner_override: - image: jupyter/tensorflow-notebook:python-3.10 + image: ${notebook_image}:${notebook_image_tag} extra_resource_limits: nvidia.com/gpu: "2" extra_resource_guarantees: diff --git a/modules/jupyter/jupyter_config/config-selfauth.yaml b/modules/jupyter/jupyter_config/config-selfauth.yaml index f763e871f..e7c476bde 100644 --- a/modules/jupyter/jupyter_config/config-selfauth.yaml +++ b/modules/jupyter/jupyter_config/config-selfauth.yaml @@ -89,8 +89,8 @@ singleuser: extraLabels: ${indent(4, chomp(jsonencode(additional_labels)))} image: - name: jupyter/tensorflow-notebook - tag: python-3.10 + name: ${notebook_image} + tag: ${notebook_image_tag} startTimeout: 1000 extraAnnotations: gke-gcsfuse/volumes: "true" @@ -209,7 +209,7 @@ singleuser: bucketName: gcsfuse-{username} mountOptions: "uid=1000,gid=100,o=noexec,implicit-dirs,dir-mode=777,file-mode=777" kubespawner_override: - image: jupyter/tensorflow-notebook:python-3.10 + image: ${notebook_image}:${notebook_image_tag} extra_resource_limits: # number of gpus needed on the node nvidia.com/gpu: "2" @@ -240,7 +240,7 @@ singleuser: bucketName: gcsfuse-{username} mountOptions: "uid=1000,gid=100,o=noexec,implicit-dirs,dir-mode=777,file-mode=777" kubespawner_override: - image: jupyter/tensorflow-notebook:python-3.10 + image: ${notebook_image}:${notebook_image_tag} extra_resource_limits: nvidia.com/gpu: "2" # GPU requests diff --git a/modules/jupyter/jupyter_image/notebook_image/Dockerfile b/modules/jupyter/jupyter_image/notebook_image/Dockerfile new file mode 100644 index 000000000..302691aa5 --- /dev/null +++ b/modules/jupyter/jupyter_image/notebook_image/Dockerfile @@ -0,0 +1,3 @@ +FROM jupyter/tensorflow-notebook:python-3.10 +COPY requirements.txt ./requirements.txt +RUN pip install --no-cache-dir -r ./requirements.txt diff --git a/modules/jupyter/jupyter_image/notebook_image/cloudbuild.yaml b/modules/jupyter/jupyter_image/notebook_image/cloudbuild.yaml new file mode 100644 index 000000000..d1a4acfa2 --- /dev/null +++ b/modules/jupyter/jupyter_image/notebook_image/cloudbuild.yaml @@ -0,0 +1,22 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# to build, run `gcloud builds submit --config cloudbuild.yaml .` in directory +steps: +- name: 'gcr.io/cloud-builders/docker' + args: [ 'pull', 'docker.io/jupyter/tensorflow-notebook:python-3.10' ] +- name: 'gcr.io/cloud-builders/docker' + args: [ 'build', '-t', '/', '.' ] +images: +- '/' \ No newline at end of file diff --git a/modules/jupyter/jupyter_image/notebook_image/requirements.txt b/modules/jupyter/jupyter_image/notebook_image/requirements.txt new file mode 100644 index 000000000..397916625 --- /dev/null +++ b/modules/jupyter/jupyter_image/notebook_image/requirements.txt @@ -0,0 +1,5 @@ +langchain==0.1.9 +ray==2.9.3 +datasets==2.18.0 +sentence-transformers==2.5.1 +kaggle==1.6.6 \ No newline at end of file diff --git a/modules/jupyter/main.tf b/modules/jupyter/main.tf index c2346e285..faa92d74f 100644 --- a/modules/jupyter/main.tf +++ b/modules/jupyter/main.tf @@ -121,6 +121,8 @@ resource "helm_release" "jupyterhub" { gcs_bucket = var.gcs_bucket k8s_service_account = var.workload_identity_service_account ephemeral_storage = var.ephemeral_storage + notebook_image = "jupyter/tensorflow-notebook" + notebook_image_tag = "python-3.10" }) ] : [templatefile("${path.module}/jupyter_config/config-selfauth.yaml", { password = var.add_auth ? "dummy" : random_password.generated_password[0].result @@ -135,6 +137,8 @@ resource "helm_release" "jupyterhub" { gcs_bucket = var.gcs_bucket k8s_service_account = var.workload_identity_service_account ephemeral_storage = var.ephemeral_storage + notebook_image = "jupyter/tensorflow-notebook" + notebook_image_tag = "python-3.10" }) ] depends_on = [module.jupyterhub-workload-identity] diff --git a/modules/jupyter/variables.tf b/modules/jupyter/variables.tf index 3afe2e235..d03c0262d 100644 --- a/modules/jupyter/variables.tf +++ b/modules/jupyter/variables.tf @@ -134,4 +134,4 @@ variable "ephemeral_storage" { variable "autopilot_cluster" { type = bool -} +} \ No newline at end of file diff --git a/modules/kuberay-cluster/kuberay_image/Dockerfile b/modules/kuberay-cluster/kuberay_image/Dockerfile new file mode 100644 index 000000000..2a450aed9 --- /dev/null +++ b/modules/kuberay-cluster/kuberay_image/Dockerfile @@ -0,0 +1,3 @@ +FROM rayproject/ray:2.9.3-py310-gpu +COPY requirements.txt ./requirements.txt +RUN pip install --no-cache-dir -r ./requirements.txt diff --git a/modules/kuberay-cluster/kuberay_image/cloudbuild.yaml b/modules/kuberay-cluster/kuberay_image/cloudbuild.yaml new file mode 100644 index 000000000..99718ed76 --- /dev/null +++ b/modules/kuberay-cluster/kuberay_image/cloudbuild.yaml @@ -0,0 +1,22 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# to build, run `gcloud builds submit --config cloudbuild.yaml .` in directory +steps: +- name: 'gcr.io/cloud-builders/docker' + args: [ 'pull', 'docker.io/rayproject/ray:2.9.3-py310-gpu' ] +- name: 'gcr.io/cloud-builders/docker' + args: [ 'build', '-t', '/', '.' ] +images: +- '/' \ No newline at end of file diff --git a/modules/kuberay-cluster/kuberay_image/requirements.txt b/modules/kuberay-cluster/kuberay_image/requirements.txt new file mode 100644 index 000000000..c5529eeb9 --- /dev/null +++ b/modules/kuberay-cluster/kuberay_image/requirements.txt @@ -0,0 +1,9 @@ +langchain==0.1.9 +transformers==4.38.1 +sentence-transformers==2.5.1 +pyarrow +datasets==2.18.0 +torch==2.0.1 +cloud-sql-python-connector[pg8000]==1.7.0 +SQLAlchemy==2.0.7 +huggingface_hub==0.21.3 \ No newline at end of file diff --git a/modules/kuberay-cluster/main.tf b/modules/kuberay-cluster/main.tf index de73c56af..50af5d391 100644 --- a/modules/kuberay-cluster/main.tf +++ b/modules/kuberay-cluster/main.tf @@ -44,7 +44,8 @@ resource "helm_release" "ray-cluster" { security_context = local.security_context secret_name = var.db_secret_name cloudsql_instance_connection_name = local.cloudsql_instance_connection_name - image_tag = var.enable_gpu ? "2.9.3-py310-gpu" : "2.9.3-py310" + image = var.use_custom_image ? "us-central1-docker.pkg.dev/ai-on-gke/rag-on-gke/ray-image" : "rayproject/ray" + image_tag = var.enable_gpu ? "2.9.3-py310-gpu" : var.use_custom_image ? "2.9.3-py310-gpu" : "2.9.3-py310" resource_requests = var.enable_gpu ? { "cpu" = "8" "memory" = "32G" diff --git a/modules/kuberay-cluster/values.yaml b/modules/kuberay-cluster/values.yaml index c4cc7d0d6..cec35e35f 100644 --- a/modules/kuberay-cluster/values.yaml +++ b/modules/kuberay-cluster/values.yaml @@ -21,7 +21,7 @@ image: # Replace this with your own image if needed. - repository: rayproject/ray + repository: ${image} tag: ${image_tag} pullPolicy: IfNotPresent diff --git a/modules/kuberay-cluster/variables.tf b/modules/kuberay-cluster/variables.tf index fc87ee509..e3d1243cc 100644 --- a/modules/kuberay-cluster/variables.tf +++ b/modules/kuberay-cluster/variables.tf @@ -239,4 +239,10 @@ variable "members_allowlist" { type = string default = "" ## keeping it string type to support single field input for marketplace UI. +} + +variable "use_custom_image" { + type = bool + description = "If running RAG, set this var to true to use custome image with pre-installed lib" + default = false } \ No newline at end of file