diff --git a/docs/source/user_guide/pipeline-components.md b/docs/source/user_guide/pipeline-components.md index 99f1c023b..12bca7cff 100644 --- a/docs/source/user_guide/pipeline-components.md +++ b/docs/source/user_guide/pipeline-components.md @@ -397,6 +397,7 @@ The URL component catalog connector provides access to components that are store - You can specify one or more URL resources. - The specified URLs must be retrievable using an HTTP `GET` request. `http`, `https`, and `file` [URI schemes](https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml) are supported. - If the resources are secured, provide credentials, such as a user id and password or API key. +- In secured environments where SSL server authenticity can only be validated using certificates based on private public key infrastructure (PKI) with root and optionally intermediate certificate authorities (CAs) that are not publicly trusted, you must define environment variable `TRUSTED_CA_BUNDLE_PATH` in the environment where JupyterLab/Elyra is running. The variable value must identify an existing [Privacy-Enhanced Mail (PEM) file](https://en.wikipedia.org/wiki/Privacy-Enhanced_Mail). Examples (GUI): - HTTPS URL @@ -428,6 +429,7 @@ Examples (CLI): The [Apache Airflow package catalog connector](https://github.com/elyra-ai/elyra/tree/main/elyra/pipeline/airflow/package_catalog_connector) provides access to operators that are stored in Apache Airflow [built distributions](https://packaging.python.org/en/latest/glossary/#term-built-distribution): - Only the [wheel distribution format](https://packaging.python.org/en/latest/glossary/#term-Wheel) is supported. - The specified URL must be retrievable using an HTTP `GET` request. `http`, `https`, and `file` [URI schemes](https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml) are supported. +- In secured environments where SSL server authenticity can only be validated using certificates based on private public key infrastructure (PKI) with root and optionally intermediate certificate authorities (CAs) that are not publicly trusted, you must define environment variable `TRUSTED_CA_BUNDLE_PATH` in the environment where JupyterLab/Elyra is running. The variable value must identify an existing [Privacy-Enhanced Mail (PEM) file](https://en.wikipedia.org/wiki/Privacy-Enhanced_Mail). Examples: - [Apache Airflow](https://pypi.org/project/apache-airflow/) (v1.10.15): @@ -443,6 +445,7 @@ Examples: The [Apache Airflow provider package catalog connector](https://github.com/elyra-ai/elyra/tree/main/elyra/pipeline/airflow/provider_package_catalog_connector) provides access to operators that are stored in [Apache Airflow provider packages](https://airflow.apache.org/docs/apache-airflow-providers/): - Only the [wheel distribution format](https://packaging.python.org/en/latest/glossary/#term-Wheel) is supported. - The specified URL must be retrievable using an HTTP `GET` request. `http`, `https`, and `file` [URI schemes](https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml) are supported. +- In secured environments where SSL server authenticity can only be validated using certificates based on private public key infrastructure (PKI) with root and optionally intermediate certificate authorities (CAs) that are not publicly trusted, you must define environment variable `TRUSTED_CA_BUNDLE_PATH` in the environment where JupyterLab/Elyra is running. The variable value must identify an existing [Privacy-Enhanced Mail (PEM) file](https://en.wikipedia.org/wiki/Privacy-Enhanced_Mail). Examples: - [apache-airflow-providers-http](https://airflow.apache.org/docs/apache-airflow-providers-http/stable/index.html) (v2.0.2): diff --git a/elyra/pipeline/airflow/package_catalog_connector/airflow_package_catalog_connector.py b/elyra/pipeline/airflow/package_catalog_connector/airflow_package_catalog_connector.py index e2761dc93..7587d22da 100644 --- a/elyra/pipeline/airflow/package_catalog_connector/airflow_package_catalog_connector.py +++ b/elyra/pipeline/airflow/package_catalog_connector/airflow_package_catalog_connector.py @@ -33,6 +33,7 @@ from elyra.pipeline.catalog_connector import ComponentCatalogConnector from elyra.pipeline.catalog_connector import EntryData from elyra.util.url import FileTransportAdapter +from elyra.util.url import get_verify_parm class AirflowPackageCatalogConnector(ComponentCatalogConnector): @@ -111,6 +112,7 @@ def get_catalog_entries(self, catalog_metadata: Dict[str, Any]) -> List[Dict[str timeout=AirflowPackageCatalogConnector.REQUEST_TIMEOUT, allow_redirects=True, auth=auth, + verify=get_verify_parm(), ) except Exception as ex: self.log.error( diff --git a/elyra/pipeline/airflow/provider_package_catalog_connector/airflow_provider_package_catalog_connector.py b/elyra/pipeline/airflow/provider_package_catalog_connector/airflow_provider_package_catalog_connector.py index 8eb4dec7f..40b485876 100644 --- a/elyra/pipeline/airflow/provider_package_catalog_connector/airflow_provider_package_catalog_connector.py +++ b/elyra/pipeline/airflow/provider_package_catalog_connector/airflow_provider_package_catalog_connector.py @@ -35,6 +35,7 @@ from elyra.pipeline.catalog_connector import ComponentCatalogConnector from elyra.pipeline.catalog_connector import EntryData from elyra.util.url import FileTransportAdapter +from elyra.util.url import get_verify_parm class AirflowProviderPackageCatalogConnector(ComponentCatalogConnector): @@ -116,6 +117,7 @@ def get_catalog_entries(self, catalog_metadata: Dict[str, Any]) -> List[Dict[str timeout=AirflowProviderPackageCatalogConnector.REQUEST_TIMEOUT, allow_redirects=True, auth=auth, + verify=get_verify_parm(), ) except Exception as ex: self.log.error( diff --git a/elyra/pipeline/catalog_connector.py b/elyra/pipeline/catalog_connector.py index 19709db86..21dce3270 100644 --- a/elyra/pipeline/catalog_connector.py +++ b/elyra/pipeline/catalog_connector.py @@ -42,6 +42,7 @@ from elyra.pipeline.component import ComponentParameter from elyra.pipeline.runtime_type import RuntimeProcessorType from elyra.util.url import FileTransportAdapter +from elyra.util.url import get_verify_parm class EntryData(object): @@ -664,6 +665,7 @@ def get_entry_data( timeout=UrlComponentCatalogConnector.REQUEST_TIMEOUT, allow_redirects=True, auth=auth, + verify=get_verify_parm(), ) except Exception as e: self.log.error( diff --git a/elyra/tests/util/test_url.py b/elyra/tests/util/test_url.py index e93278452..d1855a60e 100644 --- a/elyra/tests/util/test_url.py +++ b/elyra/tests/util/test_url.py @@ -13,11 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import os from pathlib import Path +import pytest from requests import session from elyra.util.url import FileTransportAdapter +from elyra.util.url import get_verify_parm def test_valid_file_url(): @@ -78,3 +81,51 @@ def test_invalid_file_url(): res = unsupported_method(url) assert res.status_code == 405, url assert res.reason == "Method not allowed" + + +@pytest.fixture +def setup_env_vars(): + # runs before test (save the value of environment variable + # TRUSTED_CA_BUNDLE_PATH to avoid any contamination by tests + # that modify it) + current_TRUSTED_CA_BUNDLE_PATH_value = os.environ.get("TRUSTED_CA_BUNDLE_PATH") + yield + # runs after test (restore the value of environment variable + # TRUSTED_CA_BUNDLE_PATH, if it was defined) + if current_TRUSTED_CA_BUNDLE_PATH_value is not None: + os.environ["TRUSTED_CA_BUNDLE_PATH"] = current_TRUSTED_CA_BUNDLE_PATH_value + + +@pytest.mark.usefixtures("setup_env_vars") +def test_valid_get_verify_parm(): + """ + Verify that method get_verify_parm works as expected: + - env variable TRUSTED_CA_BUNDLE_PATH is defined + - env variable TRUSTED_CA_BUNDLE_PATH is not defined, but a default is specified + - env variable TRUSTED_CA_BUNDLE_PATH is not defined and no default is specified + """ + test_TRUSTED_CA_BUNDLE_PATH_value = "/path/to/cert/bundle" + os.environ["TRUSTED_CA_BUNDLE_PATH"] = test_TRUSTED_CA_BUNDLE_PATH_value + assert get_verify_parm() == test_TRUSTED_CA_BUNDLE_PATH_value + del os.environ["TRUSTED_CA_BUNDLE_PATH"] + # set explicit default + assert get_verify_parm(False) is False + # set explicit default + assert get_verify_parm(True) is True + # use implicit default + assert get_verify_parm() is True + + +@pytest.mark.usefixtures("setup_env_vars") +def test_invalid_get_verify_parm(): + """ + Verify that method get_verify_parm works as if environment variable + TRUSTED_CA_BUNDLE_PATH contains an invalid value + """ + test_TRUSTED_CA_BUNDLE_PATH_value = "" + os.environ["TRUSTED_CA_BUNDLE_PATH"] = test_TRUSTED_CA_BUNDLE_PATH_value + assert get_verify_parm() is True + + test_TRUSTED_CA_BUNDLE_PATH_value = " " + os.environ["TRUSTED_CA_BUNDLE_PATH"] = test_TRUSTED_CA_BUNDLE_PATH_value + assert get_verify_parm() is True diff --git a/elyra/util/url.py b/elyra/util/url.py index 4039ba29f..32f50cce0 100644 --- a/elyra/util/url.py +++ b/elyra/util/url.py @@ -13,7 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import os from pathlib import Path +from typing import Union from urllib.request import url2pathname from requests import Response @@ -63,3 +65,16 @@ def send(self, req, **kwargs): def close(self): pass + + +def get_verify_parm(default: bool = True) -> Union[bool, str]: + """ + Returns a value for the 'verify' parameter of the requests.request + method (https://requests.readthedocs.io/en/latest/api/). The value + is determined as follows: if environment variable TRUSTED_CA_BUNDLE_PATH + is defined, use its value, otherwise return the default value. + """ + if len(os.environ.get("TRUSTED_CA_BUNDLE_PATH", "").strip()) > 0: + return os.environ.get("TRUSTED_CA_BUNDLE_PATH") + + return default