Skip to content

Commit

Permalink
add rag kuberay and jupyterhub image
Browse files Browse the repository at this point in the history
  • Loading branch information
chiayi committed Mar 26, 2024
1 parent 9c6b97b commit 8d3ff81
Show file tree
Hide file tree
Showing 14 changed files with 94 additions and 18 deletions.
13 changes: 7 additions & 6 deletions applications/rag/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -179,12 +179,12 @@ module "cloudsql" {
}

module "jupyterhub" {
source = "../../modules/jupyter"
providers = { helm = helm.rag, kubernetes = kubernetes.rag }
namespace = local.kubernetes_namespace
project_id = var.project_id
gcs_bucket = var.gcs_bucket
add_auth = var.jupyter_add_auth
source = "../../modules/jupyter"
providers = { helm = helm.rag, kubernetes = kubernetes.rag }
namespace = local.kubernetes_namespace
project_id = var.project_id
gcs_bucket = var.gcs_bucket
add_auth = var.jupyter_add_auth

autopilot_cluster = local.enable_autopilot
workload_identity_service_account = local.jupyter_service_account
Expand Down Expand Up @@ -228,6 +228,7 @@ module "kuberay-cluster" {
grafana_host = module.kuberay-monitoring.grafana_uri
disable_network_policy = var.disable_ray_cluster_network_policy
depends_on = [module.kuberay-operator]
use_custom_image = true

# IAP Auth parameters
add_auth = var.ray_dashboard_add_auth
Expand Down
10 changes: 5 additions & 5 deletions modules/jupyter/jupyter_config/config-selfauth-autopilot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ singleuser:
extraLabels:
${indent(4, chomp(jsonencode(additional_labels)))}
image:
name: jupyter/tensorflow-notebook
tag: python-3.10
name: ${notebook_image}
tag: ${notebook_image_tag}
startTimeout: 1000
extraAnnotations:
gke-gcsfuse/volumes: "true"
Expand Down Expand Up @@ -168,7 +168,7 @@ singleuser:
bucketName: gcsfuse-{username}
mountOptions: "uid=1000,gid=100,o=noexec,implicit-dirs,dir-mode=777,file-mode=777"
kubespawner_override:
image: jupyter/tensorflow-notebook:python-3.10
image: ${notebook_image}:${notebook_image_tag}
extra_resource_limits:
nvidia.com/gpu: "2"
extra_resource_guarantees:
Expand Down Expand Up @@ -199,7 +199,7 @@ singleuser:
bucketName: gcsfuse-{username}
mountOptions: "uid=1000,gid=100,o=noexec,implicit-dirs,dir-mode=777,file-mode=777"
kubespawner_override:
image: jupyter/tensorflow-notebook:python-3.10
image: ${notebook_image}:${notebook_image_tag}
extra_resource_limits:
nvidia.com/gpu: "2"
extra_resource_guarantees:
Expand Down Expand Up @@ -232,7 +232,7 @@ singleuser:
bucketName: gcsfuse-{username}
mountOptions: "uid=1000,gid=100,o=noexec,implicit-dirs,dir-mode=777,file-mode=777"
kubespawner_override:
image: jupyter/tensorflow-notebook:python-3.10
image: ${notebook_image}:${notebook_image_tag}
extra_resource_limits:
nvidia.com/gpu: "2"
extra_resource_guarantees:
Expand Down
8 changes: 4 additions & 4 deletions modules/jupyter/jupyter_config/config-selfauth.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ singleuser:
extraLabels:
${indent(4, chomp(jsonencode(additional_labels)))}
image:
name: jupyter/tensorflow-notebook
tag: python-3.10
name: ${notebook_image}
tag: ${notebook_image_tag}
startTimeout: 1000
extraAnnotations:
gke-gcsfuse/volumes: "true"
Expand Down Expand Up @@ -209,7 +209,7 @@ singleuser:
bucketName: gcsfuse-{username}
mountOptions: "uid=1000,gid=100,o=noexec,implicit-dirs,dir-mode=777,file-mode=777"
kubespawner_override:
image: jupyter/tensorflow-notebook:python-3.10
image: ${notebook_image}:${notebook_image_tag}
extra_resource_limits:
# number of gpus needed on the node
nvidia.com/gpu: "2"
Expand Down Expand Up @@ -240,7 +240,7 @@ singleuser:
bucketName: gcsfuse-{username}
mountOptions: "uid=1000,gid=100,o=noexec,implicit-dirs,dir-mode=777,file-mode=777"
kubespawner_override:
image: jupyter/tensorflow-notebook:python-3.10
image: ${notebook_image}:${notebook_image_tag}
extra_resource_limits:
nvidia.com/gpu: "2"
# GPU requests
Expand Down
3 changes: 3 additions & 0 deletions modules/jupyter/jupyter_image/notebook_image/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FROM jupyter/tensorflow-notebook:python-3.10
COPY requirements.txt ./requirements.txt
RUN pip install --no-cache-dir -r ./requirements.txt
22 changes: 22 additions & 0 deletions modules/jupyter/jupyter_image/notebook_image/cloudbuild.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# to build, run `gcloud builds submit --config cloudbuild.yaml .` in directory
steps:
- name: 'gcr.io/cloud-builders/docker'
args: [ 'pull', 'docker.io/jupyter/tensorflow-notebook:python-3.10' ]
- name: 'gcr.io/cloud-builders/docker'
args: [ 'build', '-t', '<Artiact registry repo>/<image name>', '.' ]
images:
- '<Artiact registry repo>/<image name>'
5 changes: 5 additions & 0 deletions modules/jupyter/jupyter_image/notebook_image/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
langchain==0.1.9
ray==2.9.3
datasets==2.18.0
sentence-transformers==2.5.1
kaggle==1.6.6
4 changes: 4 additions & 0 deletions modules/jupyter/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ resource "helm_release" "jupyterhub" {
gcs_bucket = var.gcs_bucket
k8s_service_account = var.workload_identity_service_account
ephemeral_storage = var.ephemeral_storage
notebook_image = "jupyter/tensorflow-notebook"
notebook_image_tag = "python-3.10"
})
] : [templatefile("${path.module}/jupyter_config/config-selfauth.yaml", {
password = var.add_auth ? "dummy" : random_password.generated_password[0].result
Expand All @@ -135,6 +137,8 @@ resource "helm_release" "jupyterhub" {
gcs_bucket = var.gcs_bucket
k8s_service_account = var.workload_identity_service_account
ephemeral_storage = var.ephemeral_storage
notebook_image = "jupyter/tensorflow-notebook"
notebook_image_tag = "python-3.10"
})
]
depends_on = [module.jupyterhub-workload-identity]
Expand Down
2 changes: 1 addition & 1 deletion modules/jupyter/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -134,4 +134,4 @@ variable "ephemeral_storage" {

variable "autopilot_cluster" {
type = bool
}
}
3 changes: 3 additions & 0 deletions modules/kuberay-cluster/kuberay_image/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FROM rayproject/ray:2.9.3-py310-gpu
COPY requirements.txt ./requirements.txt
RUN pip install --no-cache-dir -r ./requirements.txt
22 changes: 22 additions & 0 deletions modules/kuberay-cluster/kuberay_image/cloudbuild.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# to build, run `gcloud builds submit --config cloudbuild.yaml .` in directory
steps:
- name: 'gcr.io/cloud-builders/docker'
args: [ 'pull', 'docker.io/rayproject/ray:2.9.3-py310-gpu' ]
- name: 'gcr.io/cloud-builders/docker'
args: [ 'build', '-t', '<Artiact registry repo>/<image name>', '.' ]
images:
- '<Artiact registry repo>/<image name>'
9 changes: 9 additions & 0 deletions modules/kuberay-cluster/kuberay_image/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
langchain==0.1.9
transformers==4.38.1
sentence-transformers==2.5.1
pyarrow
datasets==2.18.0
torch==2.0.1
cloud-sql-python-connector[pg8000]==1.7.0
SQLAlchemy==2.0.7
huggingface_hub==0.21.3
3 changes: 2 additions & 1 deletion modules/kuberay-cluster/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ resource "helm_release" "ray-cluster" {
security_context = local.security_context
secret_name = var.db_secret_name
cloudsql_instance_connection_name = local.cloudsql_instance_connection_name
image_tag = var.enable_gpu ? "2.9.3-py310-gpu" : "2.9.3-py310"
image = var.use_custom_image ? "us-central1-docker.pkg.dev/ai-on-gke/rag-on-gke/ray-image" : "rayproject/ray"
image_tag = var.enable_gpu ? "2.9.3-py310-gpu" : var.use_custom_image ? "2.9.3-py310-gpu" : "2.9.3-py310"
resource_requests = var.enable_gpu ? {
"cpu" = "8"
"memory" = "32G"
Expand Down
2 changes: 1 addition & 1 deletion modules/kuberay-cluster/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

image:
# Replace this with your own image if needed.
repository: rayproject/ray
repository: ${image}
tag: ${image_tag}
pullPolicy: IfNotPresent

Expand Down
6 changes: 6 additions & 0 deletions modules/kuberay-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -239,4 +239,10 @@ variable "members_allowlist" {
type = string
default = ""
## keeping it string type to support single field input for marketplace UI.
}

variable "use_custom_image" {
type = bool
description = "If running RAG, set this var to true to use custome image with pre-installed lib"
default = false
}

0 comments on commit 8d3ff81

Please sign in to comment.