From 1efda706336d03343c607b8de9de3b71dfab2a99 Mon Sep 17 00:00:00 2001 From: Patrick Titzler Date: Tue, 30 Aug 2022 12:58:23 +0200 Subject: [PATCH 1/3] Suppor custom ssl certificates in component catalog connectors --- .../airflow_package_catalog_connector.py | 2 ++ ...flow_provider_package_catalog_connector.py | 2 ++ elyra/pipeline/catalog_connector.py | 2 ++ elyra/tests/util/test_url.py | 36 +++++++++++++++++++ elyra/util/url.py | 15 ++++++++ 5 files changed, 57 insertions(+) diff --git a/elyra/pipeline/airflow/package_catalog_connector/airflow_package_catalog_connector.py b/elyra/pipeline/airflow/package_catalog_connector/airflow_package_catalog_connector.py index e2761dc93..7587d22da 100644 --- a/elyra/pipeline/airflow/package_catalog_connector/airflow_package_catalog_connector.py +++ b/elyra/pipeline/airflow/package_catalog_connector/airflow_package_catalog_connector.py @@ -33,6 +33,7 @@ from elyra.pipeline.catalog_connector import ComponentCatalogConnector from elyra.pipeline.catalog_connector import EntryData from elyra.util.url import FileTransportAdapter +from elyra.util.url import get_verify_parm class AirflowPackageCatalogConnector(ComponentCatalogConnector): @@ -111,6 +112,7 @@ def get_catalog_entries(self, catalog_metadata: Dict[str, Any]) -> List[Dict[str timeout=AirflowPackageCatalogConnector.REQUEST_TIMEOUT, allow_redirects=True, auth=auth, + verify=get_verify_parm(), ) except Exception as ex: self.log.error( diff --git a/elyra/pipeline/airflow/provider_package_catalog_connector/airflow_provider_package_catalog_connector.py b/elyra/pipeline/airflow/provider_package_catalog_connector/airflow_provider_package_catalog_connector.py index 8eb4dec7f..40b485876 100644 --- a/elyra/pipeline/airflow/provider_package_catalog_connector/airflow_provider_package_catalog_connector.py +++ b/elyra/pipeline/airflow/provider_package_catalog_connector/airflow_provider_package_catalog_connector.py @@ -35,6 +35,7 @@ from elyra.pipeline.catalog_connector import ComponentCatalogConnector from elyra.pipeline.catalog_connector import EntryData from elyra.util.url import FileTransportAdapter +from elyra.util.url import get_verify_parm class AirflowProviderPackageCatalogConnector(ComponentCatalogConnector): @@ -116,6 +117,7 @@ def get_catalog_entries(self, catalog_metadata: Dict[str, Any]) -> List[Dict[str timeout=AirflowProviderPackageCatalogConnector.REQUEST_TIMEOUT, allow_redirects=True, auth=auth, + verify=get_verify_parm(), ) except Exception as ex: self.log.error( diff --git a/elyra/pipeline/catalog_connector.py b/elyra/pipeline/catalog_connector.py index 19709db86..21dce3270 100644 --- a/elyra/pipeline/catalog_connector.py +++ b/elyra/pipeline/catalog_connector.py @@ -42,6 +42,7 @@ from elyra.pipeline.component import ComponentParameter from elyra.pipeline.runtime_type import RuntimeProcessorType from elyra.util.url import FileTransportAdapter +from elyra.util.url import get_verify_parm class EntryData(object): @@ -664,6 +665,7 @@ def get_entry_data( timeout=UrlComponentCatalogConnector.REQUEST_TIMEOUT, allow_redirects=True, auth=auth, + verify=get_verify_parm(), ) except Exception as e: self.log.error( diff --git a/elyra/tests/util/test_url.py b/elyra/tests/util/test_url.py index e93278452..1c7a3b9aa 100644 --- a/elyra/tests/util/test_url.py +++ b/elyra/tests/util/test_url.py @@ -13,11 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import os from pathlib import Path +import pytest from requests import session from elyra.util.url import FileTransportAdapter +from elyra.util.url import get_verify_parm def test_valid_file_url(): @@ -78,3 +81,36 @@ def test_invalid_file_url(): res = unsupported_method(url) assert res.status_code == 405, url assert res.reason == "Method not allowed" + + +@pytest.fixture +def setup_env_vars(): + # runs before test (save the value of environment variable + # CA_CERT_BUNDLE_PATH to avoid any contamination by tests + # that modify it) + current_ca_cert_bundle_path_value = os.environ.get("CA_CERT_BUNDLE_PATH") + yield + # runs after test (restore the value of environment variable + # CA_CERT_BUNDLE_PATH, if it was defined) + if current_ca_cert_bundle_path_value is not None: + os.environ["CA_CERT_BUNDLE_PATH"] = current_ca_cert_bundle_path_value + + +@pytest.mark.usefixtures("setup_env_vars") +def test_get_verify_parm(): + """ + Verify that method get_verify_parm works as expected: + - env variable CA_CERT_BUNDLE_PATH is defined + - env variable CA_CERT_BUNDLE_PATH is not defined, but a default is specified + - env variable CA_CERT_BUNDLE_PATH is not defined and no default is specified + """ + test_ca_cert_bundle_path_value = "/path/to/cert/bundle" + os.environ["CA_CERT_BUNDLE_PATH"] = test_ca_cert_bundle_path_value + assert get_verify_parm() == test_ca_cert_bundle_path_value + del os.environ["CA_CERT_BUNDLE_PATH"] + # set explicit default + assert get_verify_parm(False) is False + # set explicit default + assert get_verify_parm(True) is True + # use implicit default + assert get_verify_parm() is True diff --git a/elyra/util/url.py b/elyra/util/url.py index 4039ba29f..defd9475a 100644 --- a/elyra/util/url.py +++ b/elyra/util/url.py @@ -13,7 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import os from pathlib import Path +from typing import Union from urllib.request import url2pathname from requests import Response @@ -63,3 +65,16 @@ def send(self, req, **kwargs): def close(self): pass + + +def get_verify_parm(default: bool = True) -> Union[bool, str]: + """ + Returns a value for the 'verify' parameter of the requests.request + method (https://requests.readthedocs.io/en/latest/api/). The value + is determined as follows: if environment variable CA_CERT_BUNDLE_PATH + is defined, use its value, otherwise return the default value. + """ + if os.environ.get("CA_CERT_BUNDLE_PATH"): + return os.environ.get("CA_CERT_BUNDLE_PATH") + + return default From 52a5311b07c2eb009c79100b836e6f75c91ae6d9 Mon Sep 17 00:00:00 2001 From: Patrick Titzler Date: Wed, 31 Aug 2022 08:54:57 +0200 Subject: [PATCH 2/3] Incorporate review feedback and improve implementation and tests --- elyra/tests/util/test_url.py | 41 ++++++++++++++++++++++++------------ elyra/util/url.py | 6 +++--- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/elyra/tests/util/test_url.py b/elyra/tests/util/test_url.py index 1c7a3b9aa..d1855a60e 100644 --- a/elyra/tests/util/test_url.py +++ b/elyra/tests/util/test_url.py @@ -86,31 +86,46 @@ def test_invalid_file_url(): @pytest.fixture def setup_env_vars(): # runs before test (save the value of environment variable - # CA_CERT_BUNDLE_PATH to avoid any contamination by tests + # TRUSTED_CA_BUNDLE_PATH to avoid any contamination by tests # that modify it) - current_ca_cert_bundle_path_value = os.environ.get("CA_CERT_BUNDLE_PATH") + current_TRUSTED_CA_BUNDLE_PATH_value = os.environ.get("TRUSTED_CA_BUNDLE_PATH") yield # runs after test (restore the value of environment variable - # CA_CERT_BUNDLE_PATH, if it was defined) - if current_ca_cert_bundle_path_value is not None: - os.environ["CA_CERT_BUNDLE_PATH"] = current_ca_cert_bundle_path_value + # TRUSTED_CA_BUNDLE_PATH, if it was defined) + if current_TRUSTED_CA_BUNDLE_PATH_value is not None: + os.environ["TRUSTED_CA_BUNDLE_PATH"] = current_TRUSTED_CA_BUNDLE_PATH_value @pytest.mark.usefixtures("setup_env_vars") -def test_get_verify_parm(): +def test_valid_get_verify_parm(): """ Verify that method get_verify_parm works as expected: - - env variable CA_CERT_BUNDLE_PATH is defined - - env variable CA_CERT_BUNDLE_PATH is not defined, but a default is specified - - env variable CA_CERT_BUNDLE_PATH is not defined and no default is specified + - env variable TRUSTED_CA_BUNDLE_PATH is defined + - env variable TRUSTED_CA_BUNDLE_PATH is not defined, but a default is specified + - env variable TRUSTED_CA_BUNDLE_PATH is not defined and no default is specified """ - test_ca_cert_bundle_path_value = "/path/to/cert/bundle" - os.environ["CA_CERT_BUNDLE_PATH"] = test_ca_cert_bundle_path_value - assert get_verify_parm() == test_ca_cert_bundle_path_value - del os.environ["CA_CERT_BUNDLE_PATH"] + test_TRUSTED_CA_BUNDLE_PATH_value = "/path/to/cert/bundle" + os.environ["TRUSTED_CA_BUNDLE_PATH"] = test_TRUSTED_CA_BUNDLE_PATH_value + assert get_verify_parm() == test_TRUSTED_CA_BUNDLE_PATH_value + del os.environ["TRUSTED_CA_BUNDLE_PATH"] # set explicit default assert get_verify_parm(False) is False # set explicit default assert get_verify_parm(True) is True # use implicit default assert get_verify_parm() is True + + +@pytest.mark.usefixtures("setup_env_vars") +def test_invalid_get_verify_parm(): + """ + Verify that method get_verify_parm works as if environment variable + TRUSTED_CA_BUNDLE_PATH contains an invalid value + """ + test_TRUSTED_CA_BUNDLE_PATH_value = "" + os.environ["TRUSTED_CA_BUNDLE_PATH"] = test_TRUSTED_CA_BUNDLE_PATH_value + assert get_verify_parm() is True + + test_TRUSTED_CA_BUNDLE_PATH_value = " " + os.environ["TRUSTED_CA_BUNDLE_PATH"] = test_TRUSTED_CA_BUNDLE_PATH_value + assert get_verify_parm() is True diff --git a/elyra/util/url.py b/elyra/util/url.py index defd9475a..32f50cce0 100644 --- a/elyra/util/url.py +++ b/elyra/util/url.py @@ -71,10 +71,10 @@ def get_verify_parm(default: bool = True) -> Union[bool, str]: """ Returns a value for the 'verify' parameter of the requests.request method (https://requests.readthedocs.io/en/latest/api/). The value - is determined as follows: if environment variable CA_CERT_BUNDLE_PATH + is determined as follows: if environment variable TRUSTED_CA_BUNDLE_PATH is defined, use its value, otherwise return the default value. """ - if os.environ.get("CA_CERT_BUNDLE_PATH"): - return os.environ.get("CA_CERT_BUNDLE_PATH") + if len(os.environ.get("TRUSTED_CA_BUNDLE_PATH", "").strip()) > 0: + return os.environ.get("TRUSTED_CA_BUNDLE_PATH") return default From 378f6d5caa0d493919c042e01295b840e8335cac Mon Sep 17 00:00:00 2001 From: Patrick Titzler Date: Wed, 31 Aug 2022 09:49:44 +0200 Subject: [PATCH 3/3] Update documentation --- docs/source/user_guide/pipeline-components.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/source/user_guide/pipeline-components.md b/docs/source/user_guide/pipeline-components.md index 99f1c023b..12bca7cff 100644 --- a/docs/source/user_guide/pipeline-components.md +++ b/docs/source/user_guide/pipeline-components.md @@ -397,6 +397,7 @@ The URL component catalog connector provides access to components that are store - You can specify one or more URL resources. - The specified URLs must be retrievable using an HTTP `GET` request. `http`, `https`, and `file` [URI schemes](https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml) are supported. - If the resources are secured, provide credentials, such as a user id and password or API key. +- In secured environments where SSL server authenticity can only be validated using certificates based on private public key infrastructure (PKI) with root and optionally intermediate certificate authorities (CAs) that are not publicly trusted, you must define environment variable `TRUSTED_CA_BUNDLE_PATH` in the environment where JupyterLab/Elyra is running. The variable value must identify an existing [Privacy-Enhanced Mail (PEM) file](https://en.wikipedia.org/wiki/Privacy-Enhanced_Mail). Examples (GUI): - HTTPS URL @@ -428,6 +429,7 @@ Examples (CLI): The [Apache Airflow package catalog connector](https://github.com/elyra-ai/elyra/tree/main/elyra/pipeline/airflow/package_catalog_connector) provides access to operators that are stored in Apache Airflow [built distributions](https://packaging.python.org/en/latest/glossary/#term-built-distribution): - Only the [wheel distribution format](https://packaging.python.org/en/latest/glossary/#term-Wheel) is supported. - The specified URL must be retrievable using an HTTP `GET` request. `http`, `https`, and `file` [URI schemes](https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml) are supported. +- In secured environments where SSL server authenticity can only be validated using certificates based on private public key infrastructure (PKI) with root and optionally intermediate certificate authorities (CAs) that are not publicly trusted, you must define environment variable `TRUSTED_CA_BUNDLE_PATH` in the environment where JupyterLab/Elyra is running. The variable value must identify an existing [Privacy-Enhanced Mail (PEM) file](https://en.wikipedia.org/wiki/Privacy-Enhanced_Mail). Examples: - [Apache Airflow](https://pypi.org/project/apache-airflow/) (v1.10.15): @@ -443,6 +445,7 @@ Examples: The [Apache Airflow provider package catalog connector](https://github.com/elyra-ai/elyra/tree/main/elyra/pipeline/airflow/provider_package_catalog_connector) provides access to operators that are stored in [Apache Airflow provider packages](https://airflow.apache.org/docs/apache-airflow-providers/): - Only the [wheel distribution format](https://packaging.python.org/en/latest/glossary/#term-Wheel) is supported. - The specified URL must be retrievable using an HTTP `GET` request. `http`, `https`, and `file` [URI schemes](https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml) are supported. +- In secured environments where SSL server authenticity can only be validated using certificates based on private public key infrastructure (PKI) with root and optionally intermediate certificate authorities (CAs) that are not publicly trusted, you must define environment variable `TRUSTED_CA_BUNDLE_PATH` in the environment where JupyterLab/Elyra is running. The variable value must identify an existing [Privacy-Enhanced Mail (PEM) file](https://en.wikipedia.org/wiki/Privacy-Enhanced_Mail). Examples: - [apache-airflow-providers-http](https://airflow.apache.org/docs/apache-airflow-providers-http/stable/index.html) (v2.0.2):