opendatahub-io · openshift-merge-robot · Oct 4, 2022 · Aug 31, 2022 · Aug 31, 2022 · Aug 31, 2022
diff --git a/jupyterhub/README.md b/jupyterhub/README.md
@@ -21,6 +21,12 @@ JupyterHub component comes with 2 parameters exposed vie KFDef.
 
 HTTP endpoint exposed by your S3 object storage solution which will be made available to JH users in `S3_ENDPOINT_URL` env variable.
 
+#### trusted_ca_bundle_path
+
+Full path to a non-publicly trusted ca-bundle file. Used to fill `TRUSTED_CA_BUNDLE_PATH` env variable containing the full path incl. filename to one or more trusted CAs in [Privacy-Enhanced Mail (PEM) file](https://en.wikipedia.org/wiki/Privacy-Enhanced_Mail) format. For use in airgapped environments where SSL server authenticity can only be validated using certificates based on private public key infrastructure (PKI) with root and optionally intermediate certificate authorities (CAs) that are not publicly trusted. Specifically used for example by Elyra for downloading package catalog files from an airgapped e.g. Artifactory or Nexus with private PKI and establishing the SSL chain of trust. See e.g. on-premise Openshift installs with additionalTrustBundle setting that can be injected into configmaps at a namespace for usage on containers.
+https://docs.openshift.com/container-platform/4.8/networking/configuring-a-custom-pki.html
+This requires `trusted-ca-bundle-path` **overlay** to be enabled as well to work.
+
 #### storage_class
 
 Name of the storage class to be used for PVCs created by JupyterHub component. This requires `storage-class` **overlay** to be enabled as well to work.
@@ -49,6 +55,22 @@ A Secret containing configuration values like JupyterHub DB password or COOKIE_S
         path: jupyterhub/jupyterhub
     name: jupyterhub
 ```
+#### trusted_ca_bundle_path in conjunction with S3 storage for Elyra Pipelines metadata and HTTP-location-based built-in catalog connectors (URL catalog, Apache Airflow package connector, Apache Airflow provider package connector). With this overlay, url download is made possible with optional verify parameter pointing to private PKI CA bundle file in env parameter TRUSTED_CA_BUNDLE_PATH)
+
+```yaml
+ - kustomizeConfig:
+     overlays:
+     - trusted-ca-bundle-path
+     parameters:
+       - name: trusted_ca_bundle_path
+         value: "/opt/app-root/etc/jupyter/custom/cacerts/trustedcas.pem"
+       - name: s3_endpoint_url
+         value: "http://ceph-nano-0"
+     repoRef:
+       name: manifests
+       path: jupyterhub/jupyterhub
+   name: jupyterhub
+```
 
 ### Overlays
 
@@ -62,6 +84,10 @@ Contains build manifests for JupyterHub images.
 
 Customizes JupyterHub to use a specific `StorageClass` for PVCs, see `storage_class` parameter.
 
+#### trusted-ca-bundle-path
+
+Customizes JupyterHub to use a specific ENV variable in TRUSTED_CA_BUNDLE_PATH in spawned notebook images. See `trusted_ca_bundle_path` parameter. The ENV variable is used in configmap jupyter-singleuser-profiles, if parameter is defined.
+
 ## Notebook Images
 
 Contains manifests for Jupyter notebook images compatible with JupyterHub on OpenShift.

diff --git a/jupyterhub/jupyterhub/base/kustomization.yaml b/jupyterhub/jupyterhub/base/kustomization.yaml
@@ -38,7 +38,7 @@ commonLabels:
 
 configMapGenerator:
 - name: parameters
-  envs: 
+  envs:
     - params.env
 generatorOptions:
   disableNameSuffixHash: true
@@ -64,6 +64,13 @@ vars:
     apiVersion: v1
   fieldref:
     fieldpath: data.s3_endpoint_url
+- name: trusted_ca_bundle_path
+  objref:
+    kind: ConfigMap
+    name: parameters
+    apiVersion: v1
+  fieldref:
+    fieldpath: data.trusted_ca_bundle_path    
 - name: jupyterhub_groups_config
   objref:
     kind: ConfigMap

diff --git a/jupyterhub/jupyterhub/base/params.env b/jupyterhub/jupyterhub/base/params.env
@@ -1,5 +1,6 @@
 storage_class=
 s3_endpoint_url=
+trusted_ca_bundle_path=
 jupyterhub_groups_config=jupyterhub-default-groups-config
 jupyterhub_secret=jupyterhub
 notebook_destination=

diff --git a/jupyterhub/jupyterhub/overlays/trusted-ca-bundle-path/jupyterhub-configmap.yaml b/jupyterhub/jupyterhub/overlays/trusted-ca-bundle-path/jupyterhub-configmap.yaml
@@ -0,0 +1,59 @@
+- op: replace
+  path: /data/jupyterhub_config.py
+  value: |-
+    if "TRUSTED_CA_BUNDLE_PATH" in os.environ:
+          TRUSTED_CA_BUNDLE_PATH = os.environ.get("TRUSTED_CA_BUNDLE_PATH")
+          basepath, filename = os.path.split({TRUSTED_CA_BUNDLE_PATH})
+          spawner = c.OpenShiftSpawner
+
+          def custom_apply_pod_profile(spawner, pod):
+            """
+            Example function for overriding JupyterHub server functionality to modify the user notebook Pod spec
+
+            Should only be called via a function referenced by spawner.modify_pod_hook
+            See https://jupyterhub-kubespawner.readthedocs.io/en/latest/spawner.html
+            """
+            # Apply profile from singleuser-profiles. REQUIRED since we want to extend the current pod spec
+            # configs supported by the JH server
+            apply_pod_profile(spawner, pod)
+
+            print("custom apply pod profile ...")
+            # make pod volume definition from optional CA configmap trusted-cabundle.
+            trustedCAVolume = client.V1Volume(
+              name="trusted-cas-volume",
+              config_map=client.V1ConfigMapVolumeSource(
+                name="trusted-cabundle",
+                optional=True,
+                items=[client.V1KeyToPath(key="ca-bundle.crt", path="trustedcas.pem")],
+              )
+            )
+
+            print("existing container volume mounts ")
+            print (str(pod.spec.containers[0].volume_mounts)[1:-1])
+            newVolumesList = [trustedCAVolume]
+
+            if pod.spec.volumes is None:
+              print("pod def has no volumes yet")
+              pod.spec.volumes = newVolumesList
+            else:
+              print("extending pod def volumes with configmap volume")
+              pod.spec.volumes.extend(newVolumesList)
+
+            print("extending container volume mounts for ca cert configmap")
+            newVolumeMount = client.V1VolumeMount(mount_path=basepath, name="trusted-cas-volume", read_only=True)
+            newVolumeMountList = [newVolumeMount]
+
+            # Inject extraVolumeMount
+            if pod.spec.containers[0].volume_mounts is None:
+              print("notebook container def has no volumes mounted yet")
+              pod.spec.containers[0].volume_mounts = newVolumeMountList
+            else:
+              print("extending existing container def volume mounts section with configmap volume mount reference")
+              pod.spec.containers[0].volume_mounts.extend(newVolumeMountList)
+
+            print("new container volume mounts ")
+            print (str(pod.spec.containers[0].volume_mounts)[1:-1])
+
+            return pod
+
+          spawner.modify_pod_hook = custom_apply_pod_profile
diff --git a/.../jupyterhub/overlays/trusted-ca-bundle-path/jupyterhub-singleuser-profiles-configmap.yaml b/.../jupyterhub/overlays/trusted-ca-bundle-path/jupyterhub-singleuser-profiles-configmap.yaml
@@ -0,0 +1,71 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: jupyter-singleuser-profiles
+data:
+  jupyterhub-singleuser-profiles.yaml: |
+      profiles:
+      - name: globals
+        env:
+          - name: S3_ENDPOINT_URL
+            value: $(s3_endpoint_url)
+          - name: TRUSTED_CA_BUNDLE_PATH
+            value: $(trusted_ca_bundle_path)
+        resources:
+          requests:
+            memory: 1Gi
+            cpu: 500m
+          limits:
+            memory: 2Gi
+            cpu: 1
+
+      - name: Spark Notebook
+        images:
+        - 's2i-spark-minimal-notebook:3.6'
+        - 's2i-spark-minimal-notebook:py36-spark2.4.5-hadoop2.7.3'
+        env:
+          - name: PYSPARK_SUBMIT_ARGS
+            value: '--conf spark.cores.max=2 --conf spark.executor.instances=2 --conf spark.executor.memory=1G --conf spark.executor.cores=1 --conf spark.driver.memory=2G --packages com.amazonaws:aws-java-sdk:1.7.4 org.apache.hadoop:hadoop-aws:2.7.3 io.xskipper:xskipper-core_2.11:1.1.1 pyspark-shell'
+          - name: PYSPARK_DRIVER_PYTHON
+            value: 'jupyter'
+          - name: PYSPARK_DRIVER_PYTHON_OPTS
+            value: 'notebook'
+          - name: SPARK_HOME
+            value: '/opt/app-root/lib/python3.6/site-packages/pyspark/'
+          - name: PYTHONPATH
+            value: '$PYTHONPATH:/opt/app-root/lib/python3.6/site-packages/:/opt/app-root/lib/python3.6/site-packages/pyspark/python/:/opt/app-root/lib/python3.6/site-packages/pyspark/python/lib/py4j-0.8.2.1-src.zip'
+        services:
+          spark:
+            resources:
+            - name: spark-cluster-template
+              path: notebookPodServiceTemplate
+            - name: spark-cluster-template
+              path: sparkClusterTemplate
+            configuration:
+              worker_nodes: '2'
+              master_nodes: '1'
+              master_memory_limit: '2Gi'
+              master_cpu_limit: '1'
+              master_memory_request: '2Gi'
+              master_cpu_request: '1'
+              worker_memory_limit: '2Gi'
+              worker_cpu_limit: '1'
+              worker_memory_request: '2Gi'
+              worker_cpu_request: '1'
+              spark_image: 'quay.io/radanalyticsio/openshift-spark-py36:2.4.5-2'
+            return:
+              SPARK_CLUSTER: 'metadata.name'
+      gpuTypes:
+      - type: gpu_one
+        node_tolerations:
+        - key: provider
+          operator: Equal
+          value: gpu-node
+          effect: NoSchedule
+        # This is the default NoSchedule toleration that is recognized by the NVIDIA gpu operator
+      - type: nvidia_gpu
+        node_tolerations:
+        - key: "nvidia.com/gpu"
+          operator: Exists
+          effect: NoSchedule
+
diff --git a/jupyterhub/jupyterhub/overlays/trusted-ca-bundle-path/kustomization.yaml b/jupyterhub/jupyterhub/overlays/trusted-ca-bundle-path/kustomization.yaml
@@ -0,0 +1,21 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+commonLabels:
+  app.kubernetes.io/part-of: jupyterhub
+
+bases:
+  - ../../base
+
+resources:
+  - trusted-cabundle-configmap.yaml
+
+patchesJson6902:
+  - path: jupyterhub-configmap.yaml
+    target:
+      version: v1
+      kind: ConfigMap
+      name: jupyterhub-cfg
+
+patchesStrategicMerge:
+  - jupyterhub-singleuser-profiles-configmap.yaml
diff --git a/jupyterhub/jupyterhub/overlays/trusted-ca-bundle-path/trusted-cabundle-configmap.yaml b/jupyterhub/jupyterhub/overlays/trusted-ca-bundle-path/trusted-cabundle-configmap.yaml
@@ -0,0 +1,6 @@
+kind: ConfigMap
+apiVersion: v1
+metadata:
+  name: trusted-cabundle
+  labels:
+    config.openshift.io/inject-trusted-cabundle: 'true'