From 86a6d3747397fb28c31f92d4614fba39ed07d1f6 Mon Sep 17 00:00:00 2001 From: zhangxingzhi Date: Wed, 19 Oct 2022 14:42:56 +0800 Subject: [PATCH 1/5] fix: sdk.ml.azure-ai-ml.tests.datastore.e2etests.test_datastore --- sdk/ml/azure-ai-ml/tests/datastore/e2etests/test_datastore.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sdk/ml/azure-ai-ml/tests/datastore/e2etests/test_datastore.py b/sdk/ml/azure-ai-ml/tests/datastore/e2etests/test_datastore.py index 32206de1a28a..db30b97d774b 100644 --- a/sdk/ml/azure-ai-ml/tests/datastore/e2etests/test_datastore.py +++ b/sdk/ml/azure-ai-ml/tests/datastore/e2etests/test_datastore.py @@ -7,8 +7,6 @@ from azure.ai.ml.entities._datastore._on_prem import HdfsDatastore from azure.ai.ml.entities._credentials import NoneCredentialConfiguration from azure.ai.ml.entities._datastore.datastore import Datastore -from azure.core.paging import ItemPaged -from azure.mgmt.storage import StorageManagementClient from devtools_testutils import AzureRecordedTestCase, is_live From 6f025e64dad0a94828c4083d67ca2447dc03d852 Mon Sep 17 00:00:00 2001 From: zhangxingzhi Date: Wed, 19 Oct 2022 14:43:28 +0800 Subject: [PATCH 2/5] feat: component with default label --- .../azure/ai/ml/constants/_common.py | 2 + .../ai/ml/operations/_component_operations.py | 18 +- .../component/e2etests/test_component.py | 25 + .../unittests/test_component_operations.py | 9 + ...nenttest_component_with_default_label.json | 671 ++++++++++++++++++ 5 files changed, 716 insertions(+), 9 deletions(-) create mode 100644 sdk/ml/azure-ai-ml/tests/recordings/component/e2etests/test_component.pyTestComponenttest_component_with_default_label.json diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_common.py b/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_common.py index 4c09996db29e..31f900cd62fa 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_common.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_common.py @@ -132,6 +132,8 @@ "AzureFile": "https://{}.file.{}", } +DEFAULT_LABEL_NAME = "default" +DEFAULT_COMPONENT_VERSION = "azureml_default" ANONYMOUS_COMPONENT_NAME = "azureml_anonymous" GIT_PATH_PREFIX = "git+" SCHEMA_VALIDATION_ERROR_TEMPLATE = ( diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_component_operations.py b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_component_operations.py index d99513386253..d527365f3072 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_component_operations.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_component_operations.py @@ -30,7 +30,8 @@ from azure.ai.ml._utils._azureml_polling import AzureMLPolling from azure.ai.ml._utils._endpoint_utils import polling_wait from azure.ai.ml._utils._logger_utils import OpsLogger -from azure.ai.ml.constants._common import AzureMLResourceType, LROConfigurations +from azure.ai.ml.constants._common import AzureMLResourceType, LROConfigurations, DEFAULT_LABEL_NAME, \ + DEFAULT_COMPONENT_VERSION from azure.ai.ml.entities import Component, ValidationResult from azure.ai.ml.entities._assets import Code from azure.ai.ml.exceptions import ComponentException, ErrorCategory, ErrorTarget, ValidationException @@ -172,17 +173,16 @@ def get(self, name: str, version: Optional[str] = None, label: Optional[str] = N error_category=ErrorCategory.USER_ERROR, ) + if not version and not label: + label = DEFAULT_LABEL_NAME + + if label == DEFAULT_LABEL_NAME: + label = None + version = DEFAULT_COMPONENT_VERSION + if label: return _resolve_label_to_asset(self, name, label) - if not version: - msg = "Must provide either version or label." - raise ValidationException( - message=msg, - target=ErrorTarget.COMPONENT, - no_personal_data_message=msg, - error_category=ErrorCategory.USER_ERROR, - ) result = ( self._version_operation.get( name=name, diff --git a/sdk/ml/azure-ai-ml/tests/component/e2etests/test_component.py b/sdk/ml/azure-ai-ml/tests/component/e2etests/test_component.py index 267f5b41557e..92d4af26c1ca 100644 --- a/sdk/ml/azure-ai-ml/tests/component/e2etests/test_component.py +++ b/sdk/ml/azure-ai-ml/tests/component/e2etests/test_component.py @@ -850,3 +850,28 @@ def test_create_pipeline_component_from_job(self, client: MLClient, randstr: Cal component = PipelineComponent(name=name, source_job_id=job.id) rest_component = client.components.create_or_update(component) assert rest_component.name == name + + def test_component_with_default_label( + self, + client: MLClient, + randstr: Callable[[str], str], + ) -> None: + yaml_path: str = "./tests/test_configs/components/helloworld_component.yml" + component_name = randstr("component_name") + + create_component(client, component_name, path=yaml_path) + + target_component = client.components.get(component_name, label="latest") + + for default_component in [ + client.components.get(component_name), + client.components.get(component_name, label="default"), + ]: + expected_component_dict = target_component._to_dict() + default_component_dict = default_component._to_dict() + assert pydash.omit(default_component_dict, "id") == pydash.omit(expected_component_dict, "id") + + assert default_component.id.endswith(f"/components/{component_name}/labels/default") + + node = default_component() + assert node._to_rest_object()["componentId"] == default_component.id diff --git a/sdk/ml/azure-ai-ml/tests/component/unittests/test_component_operations.py b/sdk/ml/azure-ai-ml/tests/component/unittests/test_component_operations.py index 19a75bc0b6dd..9606fc94cff4 100644 --- a/sdk/ml/azure-ai-ml/tests/component/unittests/test_component_operations.py +++ b/sdk/ml/azure-ai-ml/tests/component/unittests/test_component_operations.py @@ -110,6 +110,15 @@ def test_get(self, mock_component_operation: ComponentOperations) -> None: assert "version='1'" in create_call_args_str mock_component_entity._from_rest_object.assert_called_once() + def test_get_default(self, mock_component_operation: ComponentOperations) -> None: + with patch("azure.ai.ml.operations._component_operations.Component") as mock_component_entity: + mock_component_operation.get("mock_component") + + mock_component_operation._version_operation.get.assert_called_once() + create_call_args_str = str(mock_component_operation._version_operation.get.call_args) + assert "name='mock_component'" in create_call_args_str + mock_component_entity._from_rest_object.assert_called_once() + def test_archive_version(self, mock_component_operation: ComponentOperations): name = "random_name" component = Mock(ComponentVersionData(properties=Mock(ComponentVersionDetails()))) diff --git a/sdk/ml/azure-ai-ml/tests/recordings/component/e2etests/test_component.pyTestComponenttest_component_with_default_label.json b/sdk/ml/azure-ai-ml/tests/recordings/component/e2etests/test_component.pyTestComponenttest_component_with_default_label.json new file mode 100644 index 000000000000..b676c3437404 --- /dev/null +++ b/sdk/ml/azure-ai-ml/tests/recordings/component/e2etests/test_component.pyTestComponenttest_component_with_default_label.json @@ -0,0 +1,671 @@ +{ + "Entries": [ + { + "RequestUri": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/datastores/workspaceblobstore?api-version=2022-05-01", + "RequestMethod": "GET", + "RequestHeaders": { + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "User-Agent": "azure-ai-ml/1.1.0 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)" + }, + "RequestBody": null, + "StatusCode": 200, + "ResponseHeaders": { + "Cache-Control": "no-cache", + "Content-Encoding": "gzip", + "Content-Type": "application/json; charset=utf-8", + "Date": "Wed, 19 Oct 2022 06:32:18 GMT", + "Expires": "-1", + "Pragma": "no-cache", + "Request-Context": "appId=cid-v1:512cc15a-13b5-415b-bfd0-dce7accb6bb1", + "Server-Timing": "traceparent;desc=\u002200-2810d8cee3c5146cae136f2701bc04df-88ae714415bf1572-01\u0022", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains", + "Transfer-Encoding": "chunked", + "Vary": [ + "Accept-Encoding", + "Accept-Encoding" + ], + "x-aml-cluster": "vienna-test-westus2-02", + "X-Content-Type-Options": "nosniff", + "x-ms-correlation-request-id": "d702ed79-e74d-4ec9-89f6-fe1e858d234f", + "x-ms-ratelimit-remaining-subscription-reads": "11998", + "x-ms-response-type": "standard", + "x-ms-routing-request-id": "JAPANEAST:20221019T063218Z:d702ed79-e74d-4ec9-89f6-fe1e858d234f", + "x-request-time": "0.087" + }, + "ResponseBody": { + "id": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/datastores/workspaceblobstore", + "name": "workspaceblobstore", + "type": "Microsoft.MachineLearningServices/workspaces/datastores", + "properties": { + "description": null, + "tags": null, + "properties": null, + "isDefault": true, + "credentials": { + "credentialsType": "AccountKey" + }, + "datastoreType": "AzureBlob", + "accountName": "sagvgsoim6nmhbq", + "containerName": "azureml-blobstore-e61cd5e2-512f-475e-9842-5e2a973993b8", + "endpoint": "core.windows.net", + "protocol": "https", + "serviceDataAccessAuthIdentity": "WorkspaceSystemAssignedIdentity" + }, + "systemData": { + "createdAt": "2022-09-22T09:02:03.2629568\u002B00:00", + "createdBy": "779301c0-18b2-4cdc-801b-a0a3368fee0a", + "createdByType": "Application", + "lastModifiedAt": "2022-09-22T09:02:04.166989\u002B00:00", + "lastModifiedBy": "779301c0-18b2-4cdc-801b-a0a3368fee0a", + "lastModifiedByType": "Application" + } + } + }, + { + "RequestUri": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/datastores/workspaceblobstore/listSecrets?api-version=2022-05-01", + "RequestMethod": "POST", + "RequestHeaders": { + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "Content-Length": "0", + "User-Agent": "azure-ai-ml/1.1.0 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)" + }, + "RequestBody": null, + "StatusCode": 200, + "ResponseHeaders": { + "Cache-Control": "no-cache", + "Content-Encoding": "gzip", + "Content-Type": "application/json; charset=utf-8", + "Date": "Wed, 19 Oct 2022 06:32:19 GMT", + "Expires": "-1", + "Pragma": "no-cache", + "Request-Context": "appId=cid-v1:512cc15a-13b5-415b-bfd0-dce7accb6bb1", + "Server-Timing": "traceparent;desc=\u002200-f53653ab3cd22db1e538b348589ab6d0-d4b03ddeda26ebb4-01\u0022", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains", + "Transfer-Encoding": "chunked", + "Vary": "Accept-Encoding", + "x-aml-cluster": "vienna-test-westus2-02", + "X-Content-Type-Options": "nosniff", + "x-ms-correlation-request-id": "fab44723-ae6d-497f-bc77-19407083edb7", + "x-ms-ratelimit-remaining-subscription-writes": "1199", + "x-ms-response-type": "standard", + "x-ms-routing-request-id": "JAPANEAST:20221019T063219Z:fab44723-ae6d-497f-bc77-19407083edb7", + "x-request-time": "0.093" + }, + "ResponseBody": { + "secretsType": "AccountKey", + "key": "dGhpcyBpcyBmYWtlIGtleQ==" + } + }, + { + "RequestUri": "https://sagvgsoim6nmhbq.blob.core.windows.net/azureml-blobstore-e61cd5e2-512f-475e-9842-5e2a973993b8/LocalUpload/00000000000000000000000000000000/COMPONENT_PLACEHOLDER", + "RequestMethod": "HEAD", + "RequestHeaders": { + "Accept": "application/xml", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "User-Agent": "azsdk-python-storage-blob/12.14.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)", + "x-ms-date": "Wed, 19 Oct 2022 06:32:19 GMT", + "x-ms-version": "2021-08-06" + }, + "RequestBody": null, + "StatusCode": 200, + "ResponseHeaders": { + "Accept-Ranges": "bytes", + "Content-Length": "35", + "Content-MD5": "L/DnSpFIn\u002BjaQWc\u002BsUQdcw==", + "Content-Type": "application/octet-stream", + "Date": "Wed, 19 Oct 2022 06:32:20 GMT", + "ETag": "\u00220x8DA9D48E17467D7\u0022", + "Last-Modified": "Fri, 23 Sep 2022 09:49:17 GMT", + "Server": [ + "Windows-Azure-Blob/1.0", + "Microsoft-HTTPAPI/2.0" + ], + "Vary": "Origin", + "x-ms-access-tier": "Hot", + "x-ms-access-tier-inferred": "true", + "x-ms-blob-type": "BlockBlob", + "x-ms-creation-time": "Fri, 23 Sep 2022 09:49:16 GMT", + "x-ms-lease-state": "available", + "x-ms-lease-status": "unlocked", + "x-ms-meta-name": "9c9cfba9-82bd-45db-ad06-07009d1d9672", + "x-ms-meta-upload_status": "completed", + "x-ms-meta-version": "1", + "x-ms-server-encrypted": "true", + "x-ms-version": "2021-08-06" + }, + "ResponseBody": null + }, + { + "RequestUri": "https://sagvgsoim6nmhbq.blob.core.windows.net/azureml-blobstore-e61cd5e2-512f-475e-9842-5e2a973993b8/az-ml-artifacts/00000000000000000000000000000000/COMPONENT_PLACEHOLDER", + "RequestMethod": "HEAD", + "RequestHeaders": { + "Accept": "application/xml", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "User-Agent": "azsdk-python-storage-blob/12.14.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)", + "x-ms-date": "Wed, 19 Oct 2022 06:32:20 GMT", + "x-ms-version": "2021-08-06" + }, + "RequestBody": null, + "StatusCode": 404, + "ResponseHeaders": { + "Date": "Wed, 19 Oct 2022 06:32:20 GMT", + "Server": [ + "Windows-Azure-Blob/1.0", + "Microsoft-HTTPAPI/2.0" + ], + "Transfer-Encoding": "chunked", + "Vary": "Origin", + "x-ms-error-code": "BlobNotFound", + "x-ms-version": "2021-08-06" + }, + "ResponseBody": null + }, + { + "RequestUri": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/codes/9c9cfba9-82bd-45db-ad06-07009d1d9672/versions/1?api-version=2022-05-01", + "RequestMethod": "PUT", + "RequestHeaders": { + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "Content-Length": "288", + "Content-Type": "application/json", + "User-Agent": "azure-ai-ml/1.1.0 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)" + }, + "RequestBody": { + "properties": { + "properties": { + "hash_sha256": "0000000000000", + "hash_version": "0000000000000" + }, + "isAnonymous": true, + "isArchived": false, + "codeUri": "https://sagvgsoim6nmhbq.blob.core.windows.net/azureml-blobstore-e61cd5e2-512f-475e-9842-5e2a973993b8/LocalUpload/00000000000000000000000000000000" + } + }, + "StatusCode": 200, + "ResponseHeaders": { + "Cache-Control": "no-cache", + "Content-Encoding": "gzip", + "Content-Type": "application/json; charset=utf-8", + "Date": "Wed, 19 Oct 2022 06:32:21 GMT", + "Expires": "-1", + "Pragma": "no-cache", + "Request-Context": "appId=cid-v1:512cc15a-13b5-415b-bfd0-dce7accb6bb1", + "Server-Timing": "traceparent;desc=\u002200-8b510a4ed8283bdd21fd852f1b0f9c03-6eed4f81e251986e-01\u0022", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains", + "Transfer-Encoding": "chunked", + "Vary": [ + "Accept-Encoding", + "Accept-Encoding" + ], + "x-aml-cluster": "vienna-test-westus2-02", + "X-Content-Type-Options": "nosniff", + "x-ms-correlation-request-id": "afb60c06-496f-484f-b4a0-99ba8bb73be5", + "x-ms-ratelimit-remaining-subscription-writes": "1199", + "x-ms-response-type": "standard", + "x-ms-routing-request-id": "JAPANEAST:20221019T063221Z:afb60c06-496f-484f-b4a0-99ba8bb73be5", + "x-request-time": "0.364" + }, + "ResponseBody": { + "id": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/codes/9c9cfba9-82bd-45db-ad06-07009d1d9672/versions/1", + "name": "1", + "type": "Microsoft.MachineLearningServices/workspaces/codes/versions", + "properties": { + "description": null, + "tags": {}, + "properties": { + "hash_sha256": "0000000000000", + "hash_version": "0000000000000" + }, + "isArchived": false, + "isAnonymous": false, + "codeUri": "https://sagvgsoim6nmhbq.blob.core.windows.net/azureml-blobstore-e61cd5e2-512f-475e-9842-5e2a973993b8/LocalUpload/00000000000000000000000000000000" + }, + "systemData": { + "createdAt": "2022-09-23T09:49:20.984936\u002B00:00", + "createdBy": "Ying Chen", + "createdByType": "User", + "lastModifiedAt": "2022-10-19T06:32:21.7273825\u002B00:00", + "lastModifiedBy": "Xingzhi Zhang", + "lastModifiedByType": "User" + } + } + }, + { + "RequestUri": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/components/test_789461188097/versions/0.0.1?api-version=2022-05-01", + "RequestMethod": "PUT", + "RequestHeaders": { + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "Content-Length": "1286", + "Content-Type": "application/json", + "User-Agent": "azure-ai-ml/1.1.0 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)" + }, + "RequestBody": { + "properties": { + "description": "This is the basic command component", + "properties": {}, + "tags": { + "tag": "tagvalue", + "owner": "sdkteam" + }, + "isAnonymous": false, + "isArchived": false, + "componentSpec": { + "command": "echo Hello World \u0026 echo $[[${{inputs.component_in_number}}]] \u0026 echo ${{inputs.component_in_path}} \u0026 echo ${{outputs.component_out_path}} \u003E ${{outputs.component_out_path}}/component_in_number", + "code": "azureml:/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/codes/9c9cfba9-82bd-45db-ad06-07009d1d9672/versions/1", + "environment": "azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:1", + "name": "test_789461188097", + "description": "This is the basic command component", + "tags": { + "tag": "tagvalue", + "owner": "sdkteam" + }, + "version": "0.0.1", + "$schema": "https://azuremlschemas.azureedge.net/development/commandComponent.schema.json", + "display_name": "CommandComponentBasic", + "is_deterministic": true, + "inputs": { + "component_in_number": { + "type": "number", + "optional": true, + "default": "10.99", + "description": "A number" + }, + "component_in_path": { + "type": "uri_folder", + "description": "A path" + } + }, + "outputs": { + "component_out_path": { + "type": "uri_folder" + } + }, + "type": "command", + "_source": "YAML.COMPONENT" + } + } + }, + "StatusCode": 201, + "ResponseHeaders": { + "Cache-Control": "no-cache", + "Content-Length": "2218", + "Content-Type": "application/json; charset=utf-8", + "Date": "Wed, 19 Oct 2022 06:32:24 GMT", + "Expires": "-1", + "Location": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/components/test_789461188097/versions/0.0.1?api-version=2022-05-01", + "Pragma": "no-cache", + "Request-Context": "appId=cid-v1:512cc15a-13b5-415b-bfd0-dce7accb6bb1", + "Server-Timing": "traceparent;desc=\u002200-6ac09887b6d366132012ba6d2ab9285b-3cc222c78cc72191-01\u0022", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains", + "x-aml-cluster": "vienna-test-westus2-02", + "X-Content-Type-Options": "nosniff", + "x-ms-correlation-request-id": "6199cc8a-62d6-48b6-ad75-c4c1dfa52048", + "x-ms-ratelimit-remaining-subscription-writes": "1198", + "x-ms-response-type": "standard", + "x-ms-routing-request-id": "JAPANEAST:20221019T063224Z:6199cc8a-62d6-48b6-ad75-c4c1dfa52048", + "x-request-time": "2.630" + }, + "ResponseBody": { + "id": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/components/test_789461188097/versions/0.0.1", + "name": "0.0.1", + "type": "Microsoft.MachineLearningServices/workspaces/components/versions", + "properties": { + "description": null, + "tags": { + "tag": "tagvalue", + "owner": "sdkteam" + }, + "properties": {}, + "isArchived": false, + "isAnonymous": false, + "componentSpec": { + "name": "test_789461188097", + "version": "0.0.1", + "display_name": "CommandComponentBasic", + "is_deterministic": "True", + "type": "command", + "description": "This is the basic command component", + "tags": { + "tag": "tagvalue", + "owner": "sdkteam" + }, + "inputs": { + "component_in_path": { + "type": "uri_folder", + "optional": "False", + "description": "A path" + }, + "component_in_number": { + "type": "number", + "optional": "True", + "default": "10.99", + "description": "A number" + } + }, + "outputs": { + "component_out_path": { + "type": "uri_folder" + } + }, + "code": "azureml:/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/codes/9c9cfba9-82bd-45db-ad06-07009d1d9672/versions/1", + "environment": "azureml://registries/azureml-dev/environments/AzureML-sklearn-0.24-ubuntu18.04-py37-cpu/versions/1", + "resources": { + "instance_count": "1" + }, + "command": "echo Hello World \u0026 echo $[[${{inputs.component_in_number}}]] \u0026 echo ${{inputs.component_in_path}} \u0026 echo ${{outputs.component_out_path}} \u003E ${{outputs.component_out_path}}/component_in_number", + "$schema": "https://azuremlschemas.azureedge.net/development/commandComponent.schema.json" + } + }, + "systemData": { + "createdAt": "2022-10-19T06:32:23.8162229\u002B00:00", + "createdBy": "Xingzhi Zhang", + "createdByType": "User", + "lastModifiedAt": "2022-10-19T06:32:24.4735383\u002B00:00", + "lastModifiedBy": "Xingzhi Zhang", + "lastModifiedByType": "User" + } + } + }, + { + "RequestUri": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/components/test_789461188097/versions?api-version=2022-05-01\u0026$orderBy=createdtime%20desc\u0026$top=1", + "RequestMethod": "GET", + "RequestHeaders": { + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "User-Agent": "azure-ai-ml/1.1.0 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)" + }, + "RequestBody": null, + "StatusCode": 200, + "ResponseHeaders": { + "Cache-Control": "no-cache", + "Content-Encoding": "gzip", + "Content-Type": "application/json; charset=utf-8", + "Date": "Wed, 19 Oct 2022 06:32:25 GMT", + "Expires": "-1", + "Pragma": "no-cache", + "Request-Context": "appId=cid-v1:512cc15a-13b5-415b-bfd0-dce7accb6bb1", + "Server-Timing": "traceparent;desc=\u002200-fca01b5a5647a415c7e99722863fe61f-0a1d6f268d295ea3-01\u0022", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains", + "Transfer-Encoding": "chunked", + "Vary": [ + "Accept-Encoding", + "Accept-Encoding" + ], + "x-aml-cluster": "vienna-test-westus2-02", + "X-Content-Type-Options": "nosniff", + "x-ms-correlation-request-id": "8d7f14b8-b4ac-4602-920b-f1452258b55e", + "x-ms-ratelimit-remaining-subscription-reads": "11997", + "x-ms-response-type": "standard", + "x-ms-routing-request-id": "JAPANEAST:20221019T063225Z:8d7f14b8-b4ac-4602-920b-f1452258b55e", + "x-request-time": "0.225" + }, + "ResponseBody": { + "value": [ + { + "id": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/components/test_789461188097/versions/0.0.1", + "name": "0.0.1", + "type": "Microsoft.MachineLearningServices/workspaces/components/versions", + "properties": { + "description": null, + "tags": { + "tag": "tagvalue", + "owner": "sdkteam" + }, + "properties": {}, + "isArchived": false, + "isAnonymous": false, + "componentSpec": { + "name": "test_789461188097", + "version": "0.0.1", + "display_name": "CommandComponentBasic", + "is_deterministic": "True", + "type": "command", + "description": "This is the basic command component", + "tags": { + "tag": "tagvalue", + "owner": "sdkteam" + }, + "inputs": { + "component_in_path": { + "type": "uri_folder", + "optional": "False", + "description": "A path" + }, + "component_in_number": { + "type": "number", + "optional": "True", + "default": "10.99", + "description": "A number" + } + }, + "outputs": { + "component_out_path": { + "type": "uri_folder" + } + }, + "code": "azureml:/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/codes/9c9cfba9-82bd-45db-ad06-07009d1d9672/versions/1", + "environment": "azureml://registries/azureml-dev/environments/AzureML-sklearn-0.24-ubuntu18.04-py37-cpu/versions/1", + "resources": { + "instance_count": "1" + }, + "command": "echo Hello World \u0026 echo $[[${{inputs.component_in_number}}]] \u0026 echo ${{inputs.component_in_path}} \u0026 echo ${{outputs.component_out_path}} \u003E ${{outputs.component_out_path}}/component_in_number", + "$schema": "https://azuremlschemas.azureedge.net/development/commandComponent.schema.json" + } + }, + "systemData": { + "createdAt": "2022-10-19T06:32:23.8162229\u002B00:00", + "createdBy": "Xingzhi Zhang", + "createdByType": "User", + "lastModifiedAt": "2022-10-19T06:32:24.4735383\u002B00:00", + "lastModifiedBy": "Xingzhi Zhang", + "lastModifiedByType": "User" + } + } + ] + } + }, + { + "RequestUri": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/components/test_789461188097/versions/azureml_default?api-version=2022-05-01", + "RequestMethod": "GET", + "RequestHeaders": { + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "User-Agent": "azure-ai-ml/1.1.0 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)" + }, + "RequestBody": null, + "StatusCode": 200, + "ResponseHeaders": { + "Cache-Control": "no-cache", + "Content-Encoding": "gzip", + "Content-Type": "application/json; charset=utf-8", + "Date": "Wed, 19 Oct 2022 06:32:25 GMT", + "Expires": "-1", + "Pragma": "no-cache", + "Request-Context": "appId=cid-v1:512cc15a-13b5-415b-bfd0-dce7accb6bb1", + "Server-Timing": "traceparent;desc=\u002200-a421da73d3f5caa610c8cdeb90c04c02-cc51b36c4cf318c5-01\u0022", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains", + "Transfer-Encoding": "chunked", + "Vary": [ + "Accept-Encoding", + "Accept-Encoding" + ], + "x-aml-cluster": "vienna-test-westus2-02", + "X-Content-Type-Options": "nosniff", + "x-ms-correlation-request-id": "4d15419a-5e78-475d-9be8-be46e47f2ed4", + "x-ms-ratelimit-remaining-subscription-reads": "11996", + "x-ms-response-type": "standard", + "x-ms-routing-request-id": "JAPANEAST:20221019T063226Z:4d15419a-5e78-475d-9be8-be46e47f2ed4", + "x-request-time": "0.406" + }, + "ResponseBody": { + "id": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/components/test_789461188097/labels/default", + "name": "0.0.1", + "type": "Microsoft.MachineLearningServices/workspaces/components/versions", + "properties": { + "description": null, + "tags": { + "tag": "tagvalue", + "owner": "sdkteam" + }, + "properties": {}, + "isArchived": false, + "isAnonymous": false, + "componentSpec": { + "name": "test_789461188097", + "version": "0.0.1", + "display_name": "CommandComponentBasic", + "is_deterministic": "True", + "type": "command", + "description": "This is the basic command component", + "tags": { + "tag": "tagvalue", + "owner": "sdkteam" + }, + "inputs": { + "component_in_path": { + "type": "uri_folder", + "optional": "False", + "description": "A path" + }, + "component_in_number": { + "type": "number", + "optional": "True", + "default": "10.99", + "description": "A number" + } + }, + "outputs": { + "component_out_path": { + "type": "uri_folder" + } + }, + "code": "azureml:/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/codes/9c9cfba9-82bd-45db-ad06-07009d1d9672/versions/1", + "environment": "azureml://registries/azureml-dev/environments/AzureML-sklearn-0.24-ubuntu18.04-py37-cpu/versions/1", + "resources": { + "instance_count": "1" + }, + "command": "echo Hello World \u0026 echo $[[${{inputs.component_in_number}}]] \u0026 echo ${{inputs.component_in_path}} \u0026 echo ${{outputs.component_out_path}} \u003E ${{outputs.component_out_path}}/component_in_number", + "$schema": "https://azuremlschemas.azureedge.net/development/commandComponent.schema.json" + } + }, + "systemData": { + "createdAt": "2022-10-19T06:32:23.8162229\u002B00:00", + "createdBy": "Xingzhi Zhang", + "createdByType": "User", + "lastModifiedAt": "2022-10-19T06:32:24.4735383\u002B00:00", + "lastModifiedBy": "Xingzhi Zhang", + "lastModifiedByType": "User" + } + } + }, + { + "RequestUri": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/components/test_789461188097/versions/azureml_default?api-version=2022-05-01", + "RequestMethod": "GET", + "RequestHeaders": { + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "User-Agent": "azure-ai-ml/1.1.0 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)" + }, + "RequestBody": null, + "StatusCode": 200, + "ResponseHeaders": { + "Cache-Control": "no-cache", + "Content-Encoding": "gzip", + "Content-Type": "application/json; charset=utf-8", + "Date": "Wed, 19 Oct 2022 06:32:26 GMT", + "Expires": "-1", + "Pragma": "no-cache", + "Request-Context": "appId=cid-v1:512cc15a-13b5-415b-bfd0-dce7accb6bb1", + "Server-Timing": "traceparent;desc=\u002200-edc3b0a767fcfb5aff5ab3070c5412b4-53b78632d09d9637-01\u0022", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains", + "Transfer-Encoding": "chunked", + "Vary": [ + "Accept-Encoding", + "Accept-Encoding" + ], + "x-aml-cluster": "vienna-test-westus2-02", + "X-Content-Type-Options": "nosniff", + "x-ms-correlation-request-id": "c79f2cde-78f3-461c-beb5-cf03f636612f", + "x-ms-ratelimit-remaining-subscription-reads": "11995", + "x-ms-response-type": "standard", + "x-ms-routing-request-id": "JAPANEAST:20221019T063227Z:c79f2cde-78f3-461c-beb5-cf03f636612f", + "x-request-time": "0.317" + }, + "ResponseBody": { + "id": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/components/test_789461188097/labels/default", + "name": "0.0.1", + "type": "Microsoft.MachineLearningServices/workspaces/components/versions", + "properties": { + "description": null, + "tags": { + "tag": "tagvalue", + "owner": "sdkteam" + }, + "properties": {}, + "isArchived": false, + "isAnonymous": false, + "componentSpec": { + "name": "test_789461188097", + "version": "0.0.1", + "display_name": "CommandComponentBasic", + "is_deterministic": "True", + "type": "command", + "description": "This is the basic command component", + "tags": { + "tag": "tagvalue", + "owner": "sdkteam" + }, + "inputs": { + "component_in_path": { + "type": "uri_folder", + "optional": "False", + "description": "A path" + }, + "component_in_number": { + "type": "number", + "optional": "True", + "default": "10.99", + "description": "A number" + } + }, + "outputs": { + "component_out_path": { + "type": "uri_folder" + } + }, + "code": "azureml:/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/codes/9c9cfba9-82bd-45db-ad06-07009d1d9672/versions/1", + "environment": "azureml://registries/azureml-dev/environments/AzureML-sklearn-0.24-ubuntu18.04-py37-cpu/versions/1", + "resources": { + "instance_count": "1" + }, + "command": "echo Hello World \u0026 echo $[[${{inputs.component_in_number}}]] \u0026 echo ${{inputs.component_in_path}} \u0026 echo ${{outputs.component_out_path}} \u003E ${{outputs.component_out_path}}/component_in_number", + "$schema": "https://azuremlschemas.azureedge.net/development/commandComponent.schema.json" + } + }, + "systemData": { + "createdAt": "2022-10-19T06:32:23.8162229\u002B00:00", + "createdBy": "Xingzhi Zhang", + "createdByType": "User", + "lastModifiedAt": "2022-10-19T06:32:24.4735383\u002B00:00", + "lastModifiedBy": "Xingzhi Zhang", + "lastModifiedByType": "User" + } + } + } + ], + "Variables": { + "component_name": "test_789461188097" + } +} From 0de72df1877ef1c0fd26d15f36437a84a26c0eca Mon Sep 17 00:00:00 2001 From: zhangxingzhi Date: Wed, 19 Oct 2022 14:51:39 +0800 Subject: [PATCH 3/5] refactor: divide test_dsl_pipeline.py --- .../tests/dsl/unittests/test_dsl_pipeline.py | 2279 +---------------- .../test_dsl_pipeline_with_specific_nodes.py | 1928 ++++++++++++++ .../dsl/unittests/test_init_finalize_job.py | 283 ++ 3 files changed, 2264 insertions(+), 2226 deletions(-) create mode 100644 sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_pipeline_with_specific_nodes.py create mode 100644 sdk/ml/azure-ai-ml/tests/dsl/unittests/test_init_finalize_job.py diff --git a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_pipeline.py b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_pipeline.py index 15666d0d1f9f..ccd54924d550 100644 --- a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_pipeline.py +++ b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_pipeline.py @@ -1,5 +1,4 @@ import os -from functools import partial from io import StringIO from pathlib import Path from unittest import mock @@ -12,7 +11,6 @@ from azure.ai.ml import Input, MLClient, MpiDistribution, Output, command, dsl, load_component, load_job, spark from azure.ai.ml._restclient.v2022_05_01.models import ComponentContainerData, ComponentContainerDetails, SystemData -from azure.ai.ml.automl import classification, regression from azure.ai.ml.constants._common import ( AZUREML_PRIVATE_FEATURES_ENV_VAR, AZUREML_RESOURCE_PROVIDER, @@ -20,40 +18,17 @@ VERSIONED_RESOURCE_ID_FORMAT, AssetTypes, AzureMLResourceType, - InputOutputModes, ) -from azure.ai.ml.dsl._load_import import to_component from azure.ai.ml.entities import ( - CommandComponent, - CommandJob, Component, Data, JobResourceConfiguration, - ParallelTask, PipelineJob, - SparkJob, ) -from azure.ai.ml.entities._builders import Command, Parallel, Spark, Sweep -from azure.ai.ml.entities._component.parallel_component import ParallelComponent -from azure.ai.ml.entities._job.automl.tabular import ClassificationJob -from azure.ai.ml.entities._job.job_service import JobService +from azure.ai.ml.entities._builders import Command from azure.ai.ml.entities._job.pipeline._io import PipelineInput from azure.ai.ml.entities._job.pipeline._load_component import _generate_component_function from azure.ai.ml.exceptions import UserErrorException, ValidationException -from azure.ai.ml.parallel import ParallelJob, RunFunction, parallel_run_function -from azure.ai.ml.sweep import ( - BanditPolicy, - Choice, - LogNormal, - LogUniform, - Normal, - QLogNormal, - QLogUniform, - QNormal, - QUniform, - Randint, - Uniform, -) from .._util import _DSL_TIMEOUT_SECOND @@ -61,10 +36,6 @@ components_dir = tests_root_dir / "test_configs/components/" -def mock_create_job(job, *args, **kwargs): - return job - - @pytest.mark.usefixtures("enable_pipeline_private_preview_features") @pytest.mark.timeout(_DSL_TIMEOUT_SECOND) @pytest.mark.unittest @@ -192,301 +163,6 @@ def sample_pipeline_with_comment(): pipeline = sample_pipeline_with_comment() assert pipeline.jobs["node"].comment == "arbitrary string" - def test_dsl_pipeline_sweep_node(self) -> None: - yaml_file = "./tests/test_configs/components/helloworld_component.yml" - - @dsl.pipeline(name="train_with_sweep_in_pipeline", default_compute="cpu-cluster") - def train_with_sweep_in_pipeline(raw_data, primary_metric: str = "AUC", max_total_trials: int = 10): - component_to_sweep: CommandComponent = load_component(source=yaml_file) - cmd_node1: Command = component_to_sweep( - component_in_number=Choice([2, 3, 4, 5]), component_in_path=raw_data - ) - - sweep_job1: Sweep = cmd_node1.sweep( - primary_metric="AUC", # primary_metric, - goal="maximize", - sampling_algorithm="random", - ) - sweep_job1.compute = "gpu-cluster" - sweep_job1.set_limits(max_total_trials=10) # max_total_trials - - cmd_node2: Command = component_to_sweep( - component_in_number=Choice([2, 3, 4, 5]), component_in_path=raw_data - ) - sweep_job2: Sweep = cmd_node2.sweep( - primary_metric="AUC", - goal="minimize", - sampling_algorithm="random", - max_total_trials=10, - ) - sweep_job2.compute = "gpu-cluster" - - sweep_job3: Sweep = component_to_sweep( - component_in_number=Choice([2, 3, 4, 5]), component_in_path=raw_data - ).sweep( - primary_metric="accuracy", - goal="maximize", - sampling_algorithm="random", - max_total_trials=10, - ) - - component_to_link = load_component(source=yaml_file, params_override=[{"name": "node_to_link"}]) - link_node = component_to_link( - component_in_number=2, component_in_path=sweep_job1.outputs.component_out_path - ) - - return { - "pipeline_job_best_model1": sweep_job1.outputs.component_out_path, - "pipeline_job_best_model2": sweep_job2.outputs.component_out_path, - "pipeline_job_best_model3": sweep_job3.outputs.component_out_path, - "pipeline_model_test_result": link_node.outputs.component_out_path, - } - - pipeline: PipelineJob = train_with_sweep_in_pipeline( - raw_data=Input(path="/a/path/on/ds", mode="ro_mount"), max_total_trials=100, primary_metric="accuracy" - ) - assert len(pipeline.jobs) == 4, f"Expect 4 nodes are collected but got {len(pipeline.jobs)}" - assert pipeline.component._source == "DSL" - assert pipeline.component._job_types == {"sweep": 3, "command": 1} - assert pipeline.component._job_sources == {"YAML.COMPONENT": 4} - - sweep_node: Sweep = pipeline.jobs["sweep_job1"] - sweep_node.component._id = "azureml:test_component:1" - sweep_node_dict = sweep_node._to_dict() - assert pydash.get(sweep_node_dict, "limits.max_total_trials", None) == 10 - sweep_node_rest_obj = sweep_node._to_rest_object() - sweep_node_dict_from_rest = Sweep._from_rest_object(sweep_node_rest_obj)._to_dict() - omit_fields = ["trial"] - assert pydash.omit(sweep_node_dict, *omit_fields) == pydash.omit(sweep_node_dict_from_rest, *omit_fields) - - pipeline_dict = pipeline._to_dict() - for dot_key, expected_value in [ - ("jobs.sweep_job2.objective.goal", "minimize"), - ("jobs.sweep_job3.objective.goal", "maximize"), - ("jobs.sweep_job2.objective.primary_metric", "AUC"), - ("jobs.sweep_job3.objective.primary_metric", "accuracy"), - ("jobs.sweep_job2.compute", "azureml:gpu-cluster"), - ("jobs.sweep_job3.compute", None), - ]: - assert ( - pydash.get(pipeline_dict, dot_key) == expected_value - ), f"Expect {dot_key} to be {expected_value} but got {pydash.get(pipeline_dict, dot_key)}" - - pipeline_rest_obj = pipeline._to_rest_object() - pipeline_regenerated_from_rest = PipelineJob._load_from_rest(pipeline_rest_obj) - omit_fields = [ - "name", - "display_name", - "jobs.*.trial", - "outputs", # TODO: figure out why outputs can't be regenerated correctly - ] - # Change float to string to make dict from local and rest compatible - pipeline_dict["inputs"]["max_total_trials"] = str(pipeline_dict["inputs"]["max_total_trials"]) - pipeline_dict["jobs"]["link_node"]["inputs"]["component_in_number"] = str( - pipeline_dict["jobs"]["link_node"]["inputs"]["component_in_number"] - ) - assert omit_with_wildcard(pipeline_dict, *omit_fields) == omit_with_wildcard( - pipeline_regenerated_from_rest._to_dict(), *omit_fields - ) - - def test_dsl_pipeline_sweep_distributions(self) -> None: - yaml_file = "./tests/test_configs/components/helloworld_component_for_sweep.yml" - - @dsl.pipeline(name="OneJob_RuntimeSweepWithFullSearchSpaces") - def train_with_sweep_in_pipeline(): - component_to_sweep: CommandComponent = load_component(source=yaml_file) - cmd_node1: Command = component_to_sweep( - batch_size=Choice([25, 35]), - first_layer_neurons=Randint(upper=50), - second_layer_neurons=QUniform(min_value=10, max_value=50, q=5), - third_layer_neurons=QLogNormal(mu=5, sigma=1, q=5), - epochs=QLogUniform(min_value=1, max_value=5, q=5), - momentum=QNormal(mu=10, sigma=5, q=2), - weight_decay=LogNormal(mu=0, sigma=1), - learning_rate=LogUniform(min_value=-6, max_value=-1), - f1=Normal(mu=0, sigma=1), - f2=Uniform(min_value=10, max_value=20), - data_folder=Input( - type=AssetTypes.MLTABLE, - path="https://dprepdata.blob.core.windows.net/demo/", - mode=InputOutputModes.RO_MOUNT, - ), - ) - - hello_sweep: Sweep = cmd_node1.sweep( - primary_metric="validation_acc", - goal="maximize", - sampling_algorithm="random", - ) - hello_sweep.compute = "cpu-cluster" - hello_sweep.set_limits(max_total_trials=2, max_concurrent_trials=3, timeout=600) - hello_sweep.early_termination = BanditPolicy(evaluation_interval=2, slack_factor=0.1, delay_evaluation=1) - - dsl_pipeline: PipelineJob = train_with_sweep_in_pipeline() - dsl_pipeline.jobs["hello_sweep"].outputs.trained_model_dir = Output( - type=AssetTypes.MLFLOW_MODEL, mode=InputOutputModes.RW_MOUNT - ) - - sweep_node: Sweep = dsl_pipeline.jobs["hello_sweep"] - random_seed_input = sweep_node.inputs["random_seed"]._meta - assert random_seed_input - assert random_seed_input.default == 42 - sweep_node.component._id = "azureml:test_component:1" - sweep_node_dict = sweep_node._to_dict() - sweep_node_rest_obj = sweep_node._to_rest_object() - sweep_node_dict_from_rest = Sweep._from_rest_object(sweep_node_rest_obj)._to_dict() - omit_fields = ["trial"] - assert pydash.omit(sweep_node_dict, *omit_fields) == pydash.omit(sweep_node_dict_from_rest, *omit_fields) - - def test_dsl_pipeline_with_parallel(self) -> None: - yaml_file = "./tests/test_configs/dsl_pipeline/parallel_component_with_file_input/score.yml" - - @dsl.pipeline(default_compute="cpu-cluster") - def train_with_parallel_in_pipeline(): - parallel_component: ParallelComponent = load_component(source=yaml_file) - node1: Parallel = parallel_component( - job_data_path=Input(type=AssetTypes.MLTABLE, path="/a/path/on/ds", mode=InputOutputModes.EVAL_MOUNT), - ) - node1.resources = {"instance_count": 2} - - dsl_pipeline: PipelineJob = train_with_parallel_in_pipeline() - dsl_pipeline.jobs["node1"].outputs.job_output_path = Output( - type=AssetTypes.MLFLOW_MODEL, mode=InputOutputModes.RW_MOUNT - ) - - parallel_node: Parallel = dsl_pipeline.jobs["node1"] - job_data_path_input = parallel_node.inputs["job_data_path"]._meta - assert job_data_path_input - parallel_node.component._id = "azureml:test_component:1" - parallel_node_dict = parallel_node._to_dict() - - parallel_node_rest_obj = parallel_node._to_rest_object() - regenerated_parallel_node = Parallel._from_rest_object(parallel_node_rest_obj) - # entity load from rest object is based on current working directory, while task.code is a local path based - # on the yaml file in unit tests. - regenerated_parallel_node._base_path = Path(yaml_file).parent - parallel_node_dict_from_rest = regenerated_parallel_node._to_dict() - omit_fields = ["component"] - assert pydash.omit(parallel_node_dict, *omit_fields) == pydash.omit(parallel_node_dict_from_rest, *omit_fields) - - def test_dsl_pipeline_with_spark(self) -> None: - add_greeting_column_func = load_component( - "./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/add_greeting_column_component.yml" - ) - count_by_row_func = load_component( - "./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/count_by_row_component.yml" - ) - synapse_compute_name = "spark31" - - @dsl.pipeline(description="submit a pipeline with spark job") - def spark_pipeline_from_yaml(iris_data): - add_greeting_column = add_greeting_column_func(file_input=iris_data) - add_greeting_column.compute = synapse_compute_name - count_by_row = count_by_row_func(file_input=iris_data) - count_by_row.compute = synapse_compute_name - - return {"output": count_by_row.outputs.output} - - dsl_pipeline: PipelineJob = spark_pipeline_from_yaml( - iris_data=Input( - path="https://azuremlexamples.blob.core.windows.net/datasets/iris.csv", - type=AssetTypes.URI_FILE, - mode=InputOutputModes.DIRECT, - ), - ) - dsl_pipeline.outputs.output.mode = "Direct" - - spark_node = dsl_pipeline.jobs["add_greeting_column"] - job_data_path_input = spark_node.inputs["file_input"]._meta - assert job_data_path_input - # spark_node.component._id = "azureml:test_component:1" - spark_node_dict = spark_node._to_dict() - - spark_node_rest_obj = spark_node._to_rest_object() - regenerated_spark_node = Spark._from_rest_object(spark_node_rest_obj) - - spark_node_dict_from_rest = regenerated_spark_node._to_dict() - omit_fields = [] - assert pydash.omit(spark_node_dict, *omit_fields) == pydash.omit(spark_node_dict_from_rest, *omit_fields) - omit_fields = [ - "jobs.add_greeting_column.componentId", - "jobs.add_greeting_column.properties", - "jobs.count_by_row.componentId", - "jobs.count_by_row.properties", - ] - actual_job = pydash.omit(dsl_pipeline._to_rest_object().properties.as_dict(), *omit_fields) - assert actual_job == { - "description": "submit a pipeline with spark job", - "properties": {}, - "tags": {}, - "display_name": "spark_pipeline_from_yaml", - "is_archived": False, - "job_type": "Pipeline", - "inputs": { - "iris_data": { - "mode": "Direct", - "uri": "https://azuremlexamples.blob.core.windows.net/datasets/iris.csv", - "job_input_type": "uri_file", - } - }, - "jobs": { - "add_greeting_column": { - "type": "spark", - "resources": None, - "entry": {"file": "add_greeting_column.py", "spark_job_entry_type": "SparkJobPythonEntry"}, - "py_files": ["utils.zip"], - "files": ["my_files.txt"], - "archives": None, - "jars": None, - "identity": {"identity_type": "Managed"}, - "conf": { - "spark.driver.cores": 2, - "spark.driver.memory": "1g", - "spark.executor.cores": 1, - "spark.executor.memory": "1g", - "spark.executor.instances": 1, - }, - "args": "--file_input ${{inputs.file_input}}", - "name": "add_greeting_column", - "display_name": None, - "tags": {}, - "computeId": "spark31", - "inputs": { - "file_input": {"job_input_type": "literal", "value": "${{parent.inputs.iris_data}}"}, - }, - "outputs": {}, - "_source": "YAML.COMPONENT", - }, - "count_by_row": { - "_source": "YAML.COMPONENT", - "archives": None, - "args": "--file_input ${{inputs.file_input}} " "--output ${{outputs.output}}", - "computeId": "spark31", - "conf": { - "spark.driver.cores": 2, - "spark.driver.memory": "1g", - "spark.executor.cores": 1, - "spark.executor.instances": 1, - "spark.executor.memory": "1g", - }, - "display_name": None, - "entry": {"file": "count_by_row.py", "spark_job_entry_type": "SparkJobPythonEntry"}, - "files": ["my_files.txt"], - "identity": {"identity_type": "Managed"}, - "inputs": {"file_input": {"job_input_type": "literal", "value": "${{parent.inputs.iris_data}}"}}, - "jars": ["scalaproj.jar"], - "name": "count_by_row", - "outputs": {"output": {"type": "literal", "value": "${{parent.outputs.output}}"}}, - "py_files": None, - "resources": None, - "tags": {}, - "type": "spark", - }, - }, - "outputs": {"output": {"job_output_type": "uri_folder", "mode": "Direct"}}, - "settings": {"_source": "DSL"}, - } - def test_dsl_pipeline_input_output(self) -> None: yaml_file = "./tests/test_configs/components/helloworld_component.yml" @@ -1115,1121 +791,60 @@ def mock_add_to_builder(component): rest_component["componentId"] = "fake_arm_id" component_from_rest = Command._from_rest_object(rest_component) - # SDK - component_from_sdk = Command( - name="hello_world_component_1", - component=component_entity, - inputs={ - "component_in_number": "${{parent.inputs.job_in_number}}", - "component_in_path": "${{parent.inputs.job_in_path}}", - }, - outputs={"component_out_path": Output(mode="upload")}, - compute="cpu-cluster", - ) - - # component load from different sources are same type - assert isinstance(component_from_dsl, Command) - assert isinstance(component_from_sdk, Command) - assert isinstance(component_from_rest, Command) - assert isinstance(component_from_yaml, Command) - - # only Mldesigner component will be added to the stack - assert mocker.call_count == 1 - - # Node with component entity(DSL, SDK, YAML) inputs will have meta - assert component_from_dsl.inputs.component_in_number._meta is not None - assert component_from_sdk.inputs.component_in_number._meta is not None - assert component_from_yaml.inputs.component_in_number._meta is not None - - # Node without component entity(REST) component inputs won't - assert component_from_rest.inputs.component_in_number._meta is None - - # all components will have same format when passing to backend - expected_component = { - "_source": "YAML.COMPONENT", - "computeId": "cpu-cluster", - "display_name": None, - "distribution": None, - "environment_variables": {}, - "inputs": { - "component_in_number": {"job_input_type": "literal", "value": "${{parent.inputs.job_in_number}}"}, - "component_in_path": {"job_input_type": "literal", "value": "${{parent.inputs.job_in_path}}"}, - }, - "limits": None, - "name": "hello_world_component_1", - "outputs": {"component_out_path": {"job_output_type": "uri_folder", "mode": "Upload"}}, - "resources": None, - "tags": {}, - "type": "command", - } - omit_fields = ["componentId", "properties"] - assert pydash.omit(component_from_dsl._to_rest_object(), *omit_fields) == expected_component - assert pydash.omit(component_from_sdk._to_rest_object(), *omit_fields) == expected_component - expected_component.update({"_source": "REMOTE.WORKSPACE.COMPONENT"}) - assert pydash.omit(component_from_rest._to_rest_object(), *omit_fields) == expected_component - expected_component.update({"_source": "YAML.JOB"}) - assert pydash.omit(component_from_yaml._to_rest_object(), *omit_fields) == expected_component - - def test_pipeline_with_command_function(self): - # component func - yaml_file = "./tests/test_configs/components/helloworld_component.yml" - component_func = load_component(source=yaml_file) - - # command job with dict distribution - environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5" - expected_resources = {"instance_count": 2} - expected_environment_variables = {"key": "val"} - inputs = { - "component_in_path": Input(type="uri_folder", path="https://my-blob/path/to/data", mode="ro_mount"), - "component_in_number": 0.01, - } - outputs = {"component_out_path": Output(type="mlflow_model", mode="rw_mount")} - - command_job = CommandJob( - display_name="my-evaluate-job", - environment=environment, - command='echo "hello world"', - distribution={"type": "Pytorch", "process_count_per_instance": 2}, - resources=expected_resources, - environment_variables=expected_environment_variables, - inputs=inputs, - outputs=outputs, - ) - command_job_func = to_component(job=command_job) - - # Command from command() function - command_function = command( - display_name="my-evaluate-job", - environment=environment, - command='echo "hello world"', - distribution={"type": "Pytorch", "process_count_per_instance": 2}, - resources=expected_resources, - environment_variables=expected_environment_variables, - inputs=inputs, - outputs=outputs, - ) - - data = Input(type=AssetTypes.URI_FOLDER, path="/a/path/on/ds", mode="ro_mount") - - @dsl.pipeline(experiment_name="test_pipeline_with_command_function") - def pipeline(number, path): - node1 = component_func(component_in_number=number, component_in_path=path) - node2 = command_job_func(component_in_number=number, component_in_path=node1.outputs.component_out_path) - node3 = command_function(component_in_number=number, component_in_path=node2.outputs.component_out_path) - return { - "pipeline_output1": node1.outputs.component_out_path, - "pipeline_output2": node2.outputs.component_out_path, - "pipeline_output3": node3.outputs.component_out_path, - } - - omit_fields = [ - "name", - "properties.jobs.*.componentId", - "properties.jobs.*.properties", - "properties.settings._source", - ] - - pipeline1 = pipeline(10, data) - pipeline_job1 = pipeline1._to_rest_object().as_dict() - pipeline_job1 = omit_with_wildcard(pipeline_job1, *omit_fields) - assert pipeline_job1 == { - "properties": { - "display_name": "pipeline", - "experiment_name": "test_pipeline_with_command_function", - "inputs": { - "number": {"job_input_type": "literal", "value": "10"}, - "path": {"job_input_type": "uri_folder", "mode": "ReadOnlyMount", "uri": "/a/path/on/ds"}, - }, - "is_archived": False, - "job_type": "Pipeline", - "jobs": { - "node1": { - "_source": "YAML.COMPONENT", - "computeId": None, - "display_name": None, - "distribution": None, - "environment_variables": {}, - "inputs": { - "component_in_number": {"job_input_type": "literal", "value": "${{parent.inputs.number}}"}, - "component_in_path": {"job_input_type": "literal", "value": "${{parent.inputs.path}}"}, - }, - "limits": None, - "name": "node1", - "outputs": { - "component_out_path": {"type": "literal", "value": "${{parent.outputs.pipeline_output1}}"} - }, - "resources": None, - "tags": {}, - "type": "command", - }, - "node2": { - "_source": "CLASS", - "computeId": None, - "display_name": None, - "distribution": {"distribution_type": "PyTorch", "process_count_per_instance": 2}, - "environment_variables": {}, - "inputs": { - "component_in_number": {"job_input_type": "literal", "value": "${{parent.inputs.number}}"}, - "component_in_path": { - "job_input_type": "literal", - "value": "${{parent.jobs.node1.outputs.component_out_path}}", - }, - }, - "limits": None, - "name": "node2", - "outputs": { - "component_out_path": {"type": "literal", "value": "${{parent.outputs.pipeline_output2}}"} - }, - "resources": {"instance_count": 2, "properties": {}}, - "tags": {}, - "type": "command", - }, - "node3": { - "_source": "BUILDER", - "computeId": None, - "display_name": "my-evaluate-job", - "distribution": {"distribution_type": "PyTorch", "process_count_per_instance": 2}, - "environment_variables": {"key": "val"}, - "inputs": { - "component_in_number": {"job_input_type": "literal", "value": "${{parent.inputs.number}}"}, - "component_in_path": { - "job_input_type": "literal", - "value": "${{parent.jobs.node2.outputs.component_out_path}}", - }, - }, - "limits": None, - "name": "node3", - "outputs": { - "component_out_path": {"type": "literal", "value": "${{parent.outputs.pipeline_output3}}"} - }, - "resources": {"instance_count": 2, "properties": {}}, - "tags": {}, - "type": "command", - }, - }, - "outputs": { - "pipeline_output1": {"job_output_type": "uri_folder"}, - "pipeline_output2": {"job_output_type": "uri_folder"}, - "pipeline_output3": {"job_output_type": "uri_folder"}, - }, - "properties": {}, - "settings": {}, - "tags": {}, - } - } - - def test_pipeline_with_spark_function(self): - # component func - yaml_file = "./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/sample_component.yml" - component_func = load_component(yaml_file) - - environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5" - iris_data = Input( - path="./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/dataset/shakespeare.txt", - type=AssetTypes.URI_FILE, - mode=InputOutputModes.DIRECT, - ) - sample_rate = 0.01 - synapse_compute_name = "rezas-synapse-10" - inputs = { - "input1": iris_data, - "sample_rate": sample_rate, - } - outputs = {"output1": Output(type="uri_folder", mode=InputOutputModes.DIRECT)} - - spark_job = SparkJob( - code="./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/basic_src", - entry={"file": "sampleword.py"}, - driver_cores=2, - driver_memory="1g", - executor_cores=1, - executor_memory="1g", - executor_instances=1, - environment=environment, - inputs=inputs, - outputs=outputs, - args="--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", - compute=synapse_compute_name, - ) - spark_job_func = to_component(job=spark_job) - - # Spark from spark() function - spark_function = spark( - code="./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/basic_src", - entry={"file": "sampleword.py"}, - driver_cores=2, - driver_memory="1g", - executor_cores=1, - executor_memory="1g", - executor_instances=1, - environment=environment, - inputs=inputs, - outputs=outputs, - args="--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", - compute=synapse_compute_name, - # For HOBO spark, provide 'resources' - # resources={"instance_type": "Standard_E8S_V3", "runtime_version": "3.1.0"} - ) - - @dsl.pipeline(experiment_name="test_pipeline_with_spark_function") - def pipeline(iris_data, sample_rate): - node1 = component_func(input1=iris_data, sample_rate=sample_rate) - node1.compute = synapse_compute_name - node2 = spark_job_func(input1=node1.outputs.output1, sample_rate=sample_rate) - node2.compute = synapse_compute_name - node3 = spark_function(input1=node2.outputs.output1, sample_rate=sample_rate) - return { - "pipeline_output1": node1.outputs.output1, - "pipeline_output2": node2.outputs.output1, - "pipeline_output3": node3.outputs.output1, - } - - omit_fields = [ - "properties.jobs.*.componentId", - "properties.jobs.*.code", - "properties.jobs.*.properties", - "properties.settings._source", - ] - - pipeline1 = pipeline(iris_data, sample_rate) - pipeline_job1 = pipeline1._to_rest_object().as_dict() - pipeline_job1 = omit_with_wildcard(pipeline_job1, *omit_fields) - assert pipeline_job1 == { - "properties": { - "properties": {}, - "tags": {}, - "display_name": "pipeline", - "experiment_name": "test_pipeline_with_spark_function", - "is_archived": False, - "job_type": "Pipeline", - "inputs": { - "iris_data": { - "mode": "Direct", - "uri": "./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/dataset/shakespeare.txt", - "job_input_type": "uri_file", - }, - "sample_rate": {"job_input_type": "literal", "value": "0.01"}, - }, - "jobs": { - "node1": { - "type": "spark", - "resources": None, - "entry": {"file": "sampleword.py", "spark_job_entry_type": "SparkJobPythonEntry"}, - "py_files": None, - "jars": None, - "files": None, - "archives": None, - "identity": {"identity_type": "Managed"}, - "conf": { - "spark.driver.cores": 1, - "spark.driver.memory": "2g", - "spark.dynamicAllocation.enabled": True, - "spark.dynamicAllocation.maxExecutors": 4, - "spark.dynamicAllocation.minExecutors": 1, - "spark.executor.cores": 2, - "spark.executor.instances": 1, - "spark.executor.memory": "2g", - }, - "args": "--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", - "name": "node1", - "display_name": None, - "tags": {}, - "computeId": "rezas-synapse-10", - "inputs": { - "input1": {"job_input_type": "literal", "value": "${{parent.inputs.iris_data}}"}, - "sample_rate": {"job_input_type": "literal", "value": "${{parent.inputs.sample_rate}}"}, - }, - "outputs": {"output1": {"type": "literal", "value": "${{parent.outputs.pipeline_output1}}"}}, - "_source": "YAML.COMPONENT", - }, - "node2": { - "type": "spark", - "resources": None, - "entry": {"file": "sampleword.py", "spark_job_entry_type": "SparkJobPythonEntry"}, - "py_files": None, - "jars": None, - "files": None, - "archives": None, - "identity": {"identity_type": "Managed"}, - "conf": { - "spark.driver.cores": 2, - "spark.driver.memory": "1g", - "spark.executor.cores": 1, - "spark.executor.memory": "1g", - "spark.executor.instances": 1, - }, - "args": "--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", - "name": "node2", - "display_name": None, - "tags": {}, - "computeId": "rezas-synapse-10", - "inputs": { - "input1": {"job_input_type": "literal", "value": "${{parent.jobs.node1.outputs.output1}}"}, - "sample_rate": {"job_input_type": "literal", "value": "${{parent.inputs.sample_rate}}"}, - }, - "outputs": {"output1": {"value": "${{parent.outputs.pipeline_output2}}", "type": "literal"}}, - "_source": "CLASS", - }, - "node3": { - "type": "spark", - "resources": None, - "entry": {"file": "sampleword.py", "spark_job_entry_type": "SparkJobPythonEntry"}, - "py_files": None, - "jars": None, - "files": None, - "archives": None, - "identity": {"identity_type": "Managed"}, - "conf": { - "spark.driver.cores": 2, - "spark.driver.memory": "1g", - "spark.executor.cores": 1, - "spark.executor.memory": "1g", - "spark.executor.instances": 1, - }, - "args": "--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", - "name": "node3", - "display_name": None, - "tags": {}, - "computeId": "rezas-synapse-10", - "inputs": { - "input1": {"job_input_type": "literal", "value": "${{parent.jobs.node2.outputs.output1}}"}, - "sample_rate": {"job_input_type": "literal", "value": "${{parent.inputs.sample_rate}}"}, - }, - "outputs": {"output1": {"type": "literal", "value": "${{parent.outputs.pipeline_output3}}"}}, - "_source": "BUILDER", - }, - }, - "outputs": { - "pipeline_output1": {"job_output_type": "uri_folder"}, - "pipeline_output2": {"job_output_type": "uri_folder"}, - "pipeline_output3": {"job_output_type": "uri_folder"}, - }, - "settings": {}, - } - } - - def test_pipeline_with_spark_function_by_setting_conf(self, client): - # component func - yaml_file = "./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/sample_component.yml" - component_func = load_component(yaml_file) - - environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5" - iris_data = Input( - path="./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/dataset/shakespeare.txt", - type=AssetTypes.URI_FILE, - mode=InputOutputModes.DIRECT, - ) - sample_rate = 0.01 - synapse_compute_name = "rezas-synapse-10" - inputs = { - "input1": iris_data, - "sample_rate": sample_rate, - } - outputs = {"output1": Output(type="uri_folder", mode=InputOutputModes.DIRECT)} - - spark_job = SparkJob( - code="./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/basic_src", - entry={"file": "sampleword.py"}, - conf={ - "spark.driver.cores": 2, - "spark.driver.memory": "1g", - "spark.executor.cores": 1, - "spark.executor.memory": "1g", - "spark.executor.instances": 1, - }, - environment=environment, - inputs=inputs, - outputs=outputs, - args="--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", - compute=synapse_compute_name, - ) - spark_job_func = to_component(job=spark_job) - - # Spark from spark() function - spark_function = spark( - code="./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/basic_src", - entry={"file": "sampleword.py"}, - conf={ - "spark.driver.cores": 2, - "spark.driver.memory": "1g", - "spark.executor.cores": 1, - "spark.executor.memory": "1g", - "spark.executor.instances": 1, - }, - environment=environment, - inputs=inputs, - outputs=outputs, - args="--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", - compute=synapse_compute_name, - # For HOBO spark, provide 'resources' - # resources={"instance_type": "Standard_E8S_V3", "runtime_version": "3.1.0"} - ) - - @dsl.pipeline(experiment_name="test_pipeline_with_spark_function") - def pipeline(iris_data, sample_rate): - node1 = component_func(input1=iris_data, sample_rate=sample_rate) - node1.compute = synapse_compute_name - node2 = spark_job_func(input1=node1.outputs.output1, sample_rate=sample_rate) - node2.compute = synapse_compute_name - node3 = spark_function(input1=node2.outputs.output1, sample_rate=sample_rate) - return { - "pipeline_output1": node1.outputs.output1, - "pipeline_output2": node2.outputs.output1, - "pipeline_output3": node3.outputs.output1, - } - - omit_fields = [ - "properties.jobs.*.componentId", - "properties.jobs.*.code", - "properties.jobs.*.properties", - "properties.settings._source", - ] - - pipeline1 = pipeline(iris_data, sample_rate) - pipeline_job1 = pipeline1._to_rest_object().as_dict() - pipeline_job1 = omit_with_wildcard(pipeline_job1, *omit_fields) - assert pipeline_job1 == { - "properties": { - "properties": {}, - "tags": {}, - "display_name": "pipeline", - "experiment_name": "test_pipeline_with_spark_function", - "is_archived": False, - "job_type": "Pipeline", - "inputs": { - "iris_data": { - "mode": "Direct", - "uri": "./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/dataset/shakespeare.txt", - "job_input_type": "uri_file", - }, - "sample_rate": {"job_input_type": "literal", "value": "0.01"}, - }, - "jobs": { - "node1": { - "type": "spark", - "resources": None, - "entry": {"file": "sampleword.py", "spark_job_entry_type": "SparkJobPythonEntry"}, - "py_files": None, - "jars": None, - "files": None, - "archives": None, - "identity": {"identity_type": "Managed"}, - "conf": { - "spark.driver.cores": 1, - "spark.driver.memory": "2g", - "spark.dynamicAllocation.enabled": True, - "spark.dynamicAllocation.maxExecutors": 4, - "spark.dynamicAllocation.minExecutors": 1, - "spark.executor.cores": 2, - "spark.executor.instances": 1, - "spark.executor.memory": "2g", - }, - "args": "--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", - "name": "node1", - "display_name": None, - "tags": {}, - "computeId": "rezas-synapse-10", - "inputs": { - "input1": {"job_input_type": "literal", "value": "${{parent.inputs.iris_data}}"}, - "sample_rate": {"job_input_type": "literal", "value": "${{parent.inputs.sample_rate}}"}, - }, - "outputs": {"output1": {"type": "literal", "value": "${{parent.outputs.pipeline_output1}}"}}, - "_source": "YAML.COMPONENT", - }, - "node2": { - "type": "spark", - "resources": None, - "entry": {"file": "sampleword.py", "spark_job_entry_type": "SparkJobPythonEntry"}, - "py_files": None, - "jars": None, - "files": None, - "archives": None, - "identity": {"identity_type": "Managed"}, - "conf": { - "spark.driver.cores": 2, - "spark.driver.memory": "1g", - "spark.executor.cores": 1, - "spark.executor.memory": "1g", - "spark.executor.instances": 1, - }, - "args": "--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", - "name": "node2", - "display_name": None, - "tags": {}, - "computeId": "rezas-synapse-10", - "inputs": { - "input1": {"job_input_type": "literal", "value": "${{parent.jobs.node1.outputs.output1}}"}, - "sample_rate": {"job_input_type": "literal", "value": "${{parent.inputs.sample_rate}}"}, - }, - "outputs": {"output1": {"value": "${{parent.outputs.pipeline_output2}}", "type": "literal"}}, - "_source": "CLASS", - }, - "node3": { - "type": "spark", - "resources": None, - "entry": {"file": "sampleword.py", "spark_job_entry_type": "SparkJobPythonEntry"}, - "py_files": None, - "jars": None, - "files": None, - "archives": None, - "identity": {"identity_type": "Managed"}, - "conf": { - "spark.driver.cores": 2, - "spark.driver.memory": "1g", - "spark.executor.cores": 1, - "spark.executor.memory": "1g", - "spark.executor.instances": 1, - }, - "args": "--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", - "name": "node3", - "display_name": None, - "tags": {}, - "computeId": "rezas-synapse-10", - "inputs": { - "input1": {"job_input_type": "literal", "value": "${{parent.jobs.node2.outputs.output1}}"}, - "sample_rate": {"job_input_type": "literal", "value": "${{parent.inputs.sample_rate}}"}, - }, - "outputs": {"output1": {"type": "literal", "value": "${{parent.outputs.pipeline_output3}}"}}, - "_source": "BUILDER", - }, - }, - "outputs": { - "pipeline_output1": {"job_output_type": "uri_folder"}, - "pipeline_output2": {"job_output_type": "uri_folder"}, - "pipeline_output3": {"job_output_type": "uri_folder"}, - }, - "settings": {}, - } - } - - def test_pipeline_with_spark_job_dynamic_allocation_disabled(self, client): - environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5" - iris_data = Input( - path="https://azuremlexamples.blob.core.windows.net/datasets/iris.csv", - type=AssetTypes.URI_FILE, - mode=InputOutputModes.DIRECT, - ) - synapse_compute_name = "rezas-synapse-10" - inputs = { - "file_input1": iris_data, - "file_input2": iris_data, - } - outputs = {"output": Output(type="uri_folder", mode=InputOutputModes.DIRECT)} - - spark_job = SparkJob( - code="./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/basic_src", - entry={"file": "sampleword.py"}, - conf={ - "spark.driver.cores": 2, - "spark.driver.memory": "1g", - "spark.executor.cores": 1, - "spark.executor.memory": "1g", - "spark.executor.instances": 1, - "spark.dynamicAllocation.minExecutors": 1, - "spark.dynamicAllocation.maxExecutors": 2, - }, - environment=environment, - inputs=inputs, - outputs=outputs, - args="--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", - compute=synapse_compute_name, - ) - - spark_job_func = to_component(job=spark_job) - - @dsl.pipeline(experiment_name="test_pipeline_with_spark_function") - def pipeline(iris_data): - node = spark_job_func(file_input1=iris_data, file_input2=iris_data) - node.compute = synapse_compute_name - return { - "pipeline_output": node.outputs.output, - } - - pipeline1 = pipeline(iris_data) - with pytest.raises(ValidationException) as ve: - pipeline1._to_rest_object().as_dict() - assert ve.message == "Should not specify min or max executors when dynamic allocation is disabled." - - def test_pipeline_with_spark_job(self): - environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5" - iris_data = Input( - path="./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/dataset/shakespeare.txt", - type=AssetTypes.URI_FILE, - mode=InputOutputModes.DIRECT, - ) - sample_rate = 0.01 - synapse_compute_name = "rezas-synapse-10" - inputs = { - "input1": iris_data, - "sample_rate": sample_rate, - } - outputs = {"output1": Output(type="uri_folder", mode=InputOutputModes.DIRECT)} - - spark_job = SparkJob( - code="./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/basic_src", - entry={"file": "sampleword.py"}, - conf={ - "spark.driver.cores": 2, - "spark.driver.memory": "1g", - "spark.executor.cores": 1, - "spark.executor.memory": "1g", - "spark.executor.instances": 1, - }, - environment=environment, - inputs=inputs, - outputs=outputs, - args="--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", - compute=synapse_compute_name, - ) - - spark_job_func = to_component(job=spark_job) - - @dsl.pipeline(experiment_name="test_pipeline_with_spark_job") - def pipeline(iris_data, sample_rate): - spark_node = spark_job_func(input1=iris_data, sample_rate=sample_rate) - spark_node.compute = synapse_compute_name - return { - "pipeline_output1": spark_node.outputs.output1, - } - - pipeline1 = pipeline(iris_data, sample_rate) - pipeline_rest_obj = pipeline1._to_rest_object() - pipeline_job1 = pipeline_rest_obj.as_dict() - - pipeline_regenerated_from_rest = PipelineJob._load_from_rest(pipeline_rest_obj) - omit_field = [ - "outputs", # TODO: figure out why outputs can't be regenerated correctly - ] - - pipeline1_dict = pipeline1._to_dict() - # Change float to string to make dict from local and rest compatible - pipeline1_dict["inputs"]["sample_rate"] = str(pipeline1_dict["inputs"]["sample_rate"]) - assert pydash.omit(pipeline1_dict, *omit_field) == pydash.omit( - pipeline_regenerated_from_rest._to_dict(), *omit_field - ) - omit_fields = [ - "properties.jobs.spark_node.componentId", - "properties.jobs.spark_node.properties", - ] - pipeline_job1 = pydash.omit(pipeline_job1, *omit_fields) - assert pipeline_job1 == { - "properties": { - "properties": {}, - "tags": {}, - "display_name": "pipeline", - "experiment_name": "test_pipeline_with_spark_job", - "is_archived": False, - "job_type": "Pipeline", - "inputs": { - "iris_data": { - "mode": "Direct", - "uri": "./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/dataset/shakespeare.txt", - "job_input_type": "uri_file", - }, - "sample_rate": {"job_input_type": "literal", "value": "0.01"}, - }, - "settings": {"_source": "DSL"}, - "jobs": { - "spark_node": { - "_source": "CLASS", - "type": "spark", - "resources": None, - "entry": {"file": "sampleword.py", "spark_job_entry_type": "SparkJobPythonEntry"}, - "py_files": None, - "jars": None, - "files": None, - "archives": None, - "identity": {"identity_type": "Managed"}, - "conf": { - "spark.driver.cores": 2, - "spark.driver.memory": "1g", - "spark.executor.cores": 1, - "spark.executor.memory": "1g", - "spark.executor.instances": 1, - }, - "args": "--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", - "name": "spark_node", - "display_name": None, - "tags": {}, - "computeId": "rezas-synapse-10", - "inputs": { - "input1": {"job_input_type": "literal", "value": "${{parent.inputs.iris_data}}"}, - "sample_rate": {"job_input_type": "literal", "value": "${{parent.inputs.sample_rate}}"}, - }, - "outputs": {"output1": {"type": "literal", "value": "${{parent.outputs.pipeline_output1}}"}}, - }, - }, - "outputs": {"pipeline_output1": {"job_output_type": "uri_folder"}}, - } - } - - def test_pipeline_with_parallel_job(self): - # command job with dict distribution - environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5" - inputs = { - "job_data_path": Input(type=AssetTypes.MLTABLE, path="./tests/test_configs/data", mode="eval_mount"), - } - outputs = {"job_output_path": Output(type=AssetTypes.URI_FOLDER, mode="rw_mount")} - expected_resources = {"instance_count": 2} - expected_environment_variables = {"key": "val"} - - task = ParallelTask( - type="run_function", - code="./tests/test_configs/dsl_pipeline/parallel_component_with_file_input/src/", - entry_script="score.py", - program_arguments="--job_output_path ${{outputs.job_output_path}}", - environment=environment, - ) - logging_level = "DEBUG" - max_concurrency_per_instance = 1 - error_threshold = 1 - mini_batch_error_threshold = 1 - mini_batch_size = "5" - input_data = "${{inputs.job_data_path}}" - - parallel_job = ParallelJob( - display_name="my-evaluate-job", - resources=expected_resources, - mini_batch_size=mini_batch_size, - task=task, - input_data=input_data, - logging_level=logging_level, - max_concurrency_per_instance=max_concurrency_per_instance, - error_threshold=error_threshold, - mini_batch_error_threshold=mini_batch_error_threshold, - inputs=inputs, - outputs=outputs, - environment_variables=expected_environment_variables, - ) - - parallel_job_func = to_component(job=parallel_job) - data = Input(type=AssetTypes.MLTABLE, path="/a/path/on/ds", mode="eval_mount") - - @dsl.pipeline(experiment_name="test_pipeline_with_parallel_function") - def pipeline(job_data_path): - parallel_node = parallel_job_func(job_data_path=job_data_path) - return { - "pipeline_job_out": parallel_node.outputs.job_output_path, - } - - omit_fields = [ - "name", - "properties.jobs.parallel_node.componentId", - "properties.jobs.parallel_node.properties", - ] - - pipeline1 = pipeline(data) - pipeline_rest_obj = pipeline1._to_rest_object() - pipeline_job1 = pipeline_rest_obj.as_dict() - pipeline_regenerated_from_rest = PipelineJob._load_from_rest(pipeline_rest_obj) - omit_field = [ - "jobs.parallel_node.task", - "jobs.*.properties", - "outputs", # TODO: figure out why outputs can't be regenerated correctly - ] - - assert pydash.omit(pipeline1._to_dict(), *omit_field) == pydash.omit( - pipeline_regenerated_from_rest._to_dict(), *omit_field - ) - - pipeline_job1 = pydash.omit(pipeline_job1, *omit_fields) - assert pipeline_job1 == { - "properties": { - "display_name": "pipeline", - "experiment_name": "test_pipeline_with_parallel_function", - "inputs": { - "job_data_path": {"job_input_type": "mltable", "mode": "EvalMount", "uri": "/a/path/on/ds"}, - }, - "is_archived": False, - "job_type": "Pipeline", - "jobs": { - "parallel_node": { - "_source": "CLASS", - "type": "parallel", - "input_data": "${{inputs.job_data_path}}", - "computeId": None, - "display_name": None, - "inputs": { - "job_data_path": {"job_input_type": "literal", "value": "${{parent.inputs.job_data_path}}"}, - }, - "name": "parallel_node", - "outputs": { - "job_output_path": {"type": "literal", "value": "${{parent.outputs.pipeline_job_out}}"} - }, - "resources": {"instance_count": 2, "properties": {}}, - "mini_batch_size": 5, - "retry_settings": None, - "logging_level": None, - "max_concurrency_per_instance": 1, - "error_threshold": None, - "mini_batch_error_threshold": 1, - "tags": {}, - "environment_variables": {}, - "task": { - "program_arguments": "--job_output_path " "${{outputs.job_output_path}}", - "code": "./tests/test_configs/dsl_pipeline/parallel_component_with_file_input/src/", - "entry_script": "score.py", - "environment": "azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5", - "type": "run_function", - }, - }, - }, - "outputs": {"pipeline_job_out": {"job_output_type": "uri_folder"}}, - "properties": {}, - "settings": {"_source": "DSL"}, - "tags": {}, - } - } - - def test_pipeline_with_parallel_function_inside(self): - environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5" - expected_environment_variables = {"key": "val"} - expected_resources = {"instance_count": 2} - inputs = { - "job_data_path": Input(type=AssetTypes.MLTABLE, path="./tests/test_configs/data", mode="eval_mount"), - } - input_data = "${{inputs.job_data_path}}" - outputs = {"job_output_path": Output(type=AssetTypes.URI_FOLDER, mode="rw_mount")} - task = RunFunction( - code="./tests/test_configs/dsl_pipeline/parallel_component_with_file_input/src/", - entry_script="score.py", - program_arguments="--job_output_path ${{outputs.job_output_path}}", - environment=environment, - ) - logging_level = "DEBUG" - max_concurrency_per_instance = 1 - error_threshold = 1 - mini_batch_error_threshold = 1 - mini_batch_size = "5" - - # parallel job - @dsl.pipeline(experiment_name="test_pipeline_with_parallel_function_inside") - def pipeline(path): - # Parallel from parallel_run_function() - parallel_function = parallel_run_function( - display_name="my-evaluate-job", - inputs=inputs, - outputs=outputs, - mini_batch_size=mini_batch_size, - task=task, - logging_level=logging_level, - max_concurrency_per_instance=max_concurrency_per_instance, - error_threshold=error_threshold, - mini_batch_error_threshold=mini_batch_error_threshold, - resources=expected_resources, - input_data=input_data, - environment_variables=expected_environment_variables, - ) - node1 = parallel_function(job_data_path=path) - node2 = parallel_function(job_data_path=Input(type=AssetTypes.MLTABLE, path="new_path", mode="eval_mount")) - - return { - "pipeline_output1": node1.outputs.job_output_path, - "pipeline_output2": node2.outputs.job_output_path, - } - - omit_fields = [ - "name", - "properties.jobs.node1.componentId", - "properties.jobs.node2.componentId", - "properties.jobs.node1.properties", - "properties.jobs.node2.properties", - ] - - data = Input(type=AssetTypes.MLTABLE, path="/a/path/on/ds", mode="eval_mount") - pipeline1 = pipeline(data) - pipeline_job1 = pipeline1._to_rest_object().as_dict() - pipeline_job1 = pydash.omit(pipeline_job1, omit_fields) - assert pipeline_job1 == { - "properties": { - "display_name": "pipeline", - "experiment_name": "test_pipeline_with_parallel_function_inside", - "inputs": { - "path": {"job_input_type": "mltable", "mode": "EvalMount", "uri": "/a/path/on/ds"}, - }, - "is_archived": False, - "job_type": "Pipeline", - "jobs": { - "node1": { - "_source": "BUILDER", - "type": "parallel", - "input_data": "${{inputs.job_data_path}}", - "computeId": None, - "display_name": "my-evaluate-job", - "inputs": { - "job_data_path": {"job_input_type": "literal", "value": "${{parent.inputs.path}}"}, - }, - "name": "node1", - "outputs": { - "job_output_path": {"type": "literal", "value": "${{parent.outputs.pipeline_output1}}"} - }, - "resources": {"instance_count": 2, "properties": {}}, - "mini_batch_size": 5, - "task": { - "type": "run_function", - "code": "./tests/test_configs/dsl_pipeline/parallel_component_with_file_input/src/", - "entry_script": "score.py", - "program_arguments": "--job_output_path ${{outputs.job_output_path}}", - "environment": "azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5", - }, - "retry_settings": None, - "logging_level": "DEBUG", - "max_concurrency_per_instance": 1, - "error_threshold": 1, - "mini_batch_error_threshold": 1, - "tags": {}, - "environment_variables": {"key": "val"}, - }, - "node2": { - "_source": "BUILDER", - "type": "parallel", - "input_data": "${{inputs.job_data_path}}", - "computeId": None, - "display_name": "my-evaluate-job", - "inputs": { - "job_data_path": { - "job_input_type": "mltable", - "mode": "EvalMount", - "uri": "new_path", - }, - }, - "name": "node2", - "outputs": { - "job_output_path": {"type": "literal", "value": "${{parent.outputs.pipeline_output2}}"} - }, - "resources": {"instance_count": 2, "properties": {}}, - "mini_batch_size": 5, - "task": { - "type": "run_function", - "code": "./tests/test_configs/dsl_pipeline/parallel_component_with_file_input/src/", - "entry_script": "score.py", - "program_arguments": "--job_output_path ${{outputs.job_output_path}}", - "environment": "azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5", - }, - "retry_settings": None, - "logging_level": "DEBUG", - "max_concurrency_per_instance": 1, - "error_threshold": 1, - "mini_batch_error_threshold": 1, - "tags": {}, - "environment_variables": {"key": "val"}, - }, - }, - "outputs": { - "pipeline_output1": {"job_output_type": "uri_folder"}, - "pipeline_output2": {"job_output_type": "uri_folder"}, - }, - "properties": {}, - "settings": {"_source": "DSL"}, - "tags": {}, - } - } - - def test_pipeline_with_command_function_inside(self): - environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5" - expected_resources = {"instance_count": 2} - expected_environment_variables = {"key": "val"} - inputs = { - "component_in_path": Input(type="uri_folder", path="https://my-blob/path/to/data", mode="ro_mount"), - "component_in_number": 0.01, - } - outputs = {"component_out_path": Output(type="mlflow_model", mode="rw_mount")} - - @dsl.pipeline(experiment_name="test_pipeline_with_command_function_inside") - def pipeline(number, path): - # Command from command() function - command_function = command( - display_name="my-evaluate-job", - environment=environment, - command='echo "hello world"', - distribution={"type": "Pytorch", "process_count_per_instance": 2}, - resources=expected_resources, - environment_variables=expected_environment_variables, - inputs=inputs, - outputs=outputs, - ) - node1 = command_function(component_in_number=number, component_in_path=path) - node2 = command_function(component_in_number=1, component_in_path=Input(path="new_path")) - - return { - "pipeline_output1": node1.outputs.component_out_path, - "pipeline_output2": node2.outputs.component_out_path, - } - - omit_fields = [ - "name", - "properties.jobs.node1.componentId", - "properties.jobs.node2.componentId", - "properties.jobs.node1.properties", - "properties.jobs.node2.properties", - ] - - data = Input(type=AssetTypes.URI_FOLDER, path="/a/path/on/ds") - pipeline1 = pipeline(10, data) - pipeline_job1 = pipeline1._to_rest_object().as_dict() - pipeline_job1 = pydash.omit(pipeline_job1, omit_fields) - assert pipeline_job1 == { - "properties": { - "display_name": "pipeline", - "experiment_name": "test_pipeline_with_command_function_inside", - "inputs": { - "number": {"job_input_type": "literal", "value": "10"}, - "path": {"job_input_type": "uri_folder", "uri": "/a/path/on/ds"}, - }, - "is_archived": False, - "job_type": "Pipeline", - "jobs": { - "node1": { - "type": "command", - "_source": "BUILDER", - "computeId": None, - "display_name": "my-evaluate-job", - "distribution": {"distribution_type": "PyTorch", "process_count_per_instance": 2}, - "environment_variables": {"key": "val"}, - "inputs": { - "component_in_number": {"job_input_type": "literal", "value": "${{parent.inputs.number}}"}, - "component_in_path": {"job_input_type": "literal", "value": "${{parent.inputs.path}}"}, - }, - "limits": None, - "name": "node1", - "outputs": { - "component_out_path": {"type": "literal", "value": "${{parent.outputs.pipeline_output1}}"} - }, - "resources": {"instance_count": 2, "properties": {}}, - "tags": {}, - }, - "node2": { - "type": "command", - "_source": "BUILDER", - "computeId": None, - "display_name": "my-evaluate-job", - "distribution": {"distribution_type": "PyTorch", "process_count_per_instance": 2}, - "environment_variables": {"key": "val"}, - "inputs": { - "component_in_number": {"job_input_type": "literal", "value": "1"}, - "component_in_path": { - "job_input_type": "uri_folder", - "uri": "new_path", - }, - }, - "limits": None, - "name": "node2", - "outputs": { - "component_out_path": {"type": "literal", "value": "${{parent.outputs.pipeline_output2}}"} - }, - "resources": {"instance_count": 2, "properties": {}}, - "tags": {}, - }, - }, - "outputs": { - "pipeline_output1": {"job_output_type": "uri_folder"}, - "pipeline_output2": {"job_output_type": "uri_folder"}, + # SDK + component_from_sdk = Command( + name="hello_world_component_1", + component=component_entity, + inputs={ + "component_in_number": "${{parent.inputs.job_in_number}}", + "component_in_path": "${{parent.inputs.job_in_path}}", }, - "properties": {}, - "settings": {"_source": "DSL"}, - "tags": {}, - } + outputs={"component_out_path": Output(mode="upload")}, + compute="cpu-cluster", + ) + + # component load from different sources are same type + assert isinstance(component_from_dsl, Command) + assert isinstance(component_from_sdk, Command) + assert isinstance(component_from_rest, Command) + assert isinstance(component_from_yaml, Command) + + # only Mldesigner component will be added to the stack + assert mocker.call_count == 1 + + # Node with component entity(DSL, SDK, YAML) inputs will have meta + assert component_from_dsl.inputs.component_in_number._meta is not None + assert component_from_sdk.inputs.component_in_number._meta is not None + assert component_from_yaml.inputs.component_in_number._meta is not None + + # Node without component entity(REST) component inputs won't + assert component_from_rest.inputs.component_in_number._meta is None + + # all components will have same format when passing to backend + expected_component = { + "_source": "YAML.COMPONENT", + "computeId": "cpu-cluster", + "display_name": None, + "distribution": None, + "environment_variables": {}, + "inputs": { + "component_in_number": {"job_input_type": "literal", "value": "${{parent.inputs.job_in_number}}"}, + "component_in_path": {"job_input_type": "literal", "value": "${{parent.inputs.job_in_path}}"}, + }, + "limits": None, + "name": "hello_world_component_1", + "outputs": {"component_out_path": {"job_output_type": "uri_folder", "mode": "Upload"}}, + "resources": None, + "tags": {}, + "type": "command", } + omit_fields = ["componentId", "properties"] + assert pydash.omit(component_from_dsl._to_rest_object(), *omit_fields) == expected_component + assert pydash.omit(component_from_sdk._to_rest_object(), *omit_fields) == expected_component + expected_component.update({"_source": "REMOTE.WORKSPACE.COMPONENT"}) + assert pydash.omit(component_from_rest._to_rest_object(), *omit_fields) == expected_component + expected_component.update({"_source": "YAML.JOB"}) + assert pydash.omit(component_from_yaml._to_rest_object(), *omit_fields) == expected_component def assert_component_reuse(self, pipeline, expected_component_num, mock_machinelearning_client: MLClient): def mock_arm_id(asset, azureml_type: str, *args, **kwargs): @@ -2438,389 +1053,6 @@ def pipeline(number, path): in std_out.getvalue() ) - def test_multi_parallel_components_with_file_input_pipeline_output(self) -> None: - components_dir = tests_root_dir / "test_configs/dsl_pipeline/parallel_component_with_file_input" - batch_inference1 = load_component(source=str(components_dir / "score.yml")) - batch_inference2 = load_component(source=str(components_dir / "score.yml")) - convert_data = load_component(source=str(components_dir / "convert_data.yml")) - - # Construct pipeline - @dsl.pipeline(default_compute="cpu-cluster", experiment_name="sdk-cli-v2") - def parallel_in_pipeline(job_data_path): - batch_inference_node1 = batch_inference1(job_data_path=job_data_path) - convert_data_node = convert_data(input_data=batch_inference_node1.outputs.job_output_path) - convert_data_node.outputs.file_output_data.type = AssetTypes.MLTABLE - batch_inference_node2 = batch_inference2(job_data_path=convert_data_node.outputs.file_output_data) - batch_inference_node2.inputs.job_data_path.mode = InputOutputModes.EVAL_MOUNT - - return {"job_out_data": batch_inference_node2.outputs.job_output_path} - - pipeline = parallel_in_pipeline( - job_data_path=Input( - type=AssetTypes.MLTABLE, - path="./tests/test_configs/dataset/mnist-data/", - mode=InputOutputModes.EVAL_MOUNT, - ), - ) - pipeline.outputs.job_out_data.mode = "upload" - omit_fields = [ - "jobs.batch_inference_node1.componentId", - "jobs.batch_inference_node1.properties", - "jobs.convert_data_node.componentId", - "jobs.convert_data_node.properties", - "jobs.batch_inference_node2.componentId", - "jobs.batch_inference_node2.properties", - ] - actual_job = pydash.omit(pipeline._to_rest_object().properties.as_dict(), *omit_fields) - assert actual_job == { - "properties": {}, - "tags": {}, - "display_name": "parallel_in_pipeline", - "experiment_name": "sdk-cli-v2", - "is_archived": False, - "job_type": "Pipeline", - "inputs": { - "job_data_path": { - "mode": "EvalMount", - "uri": "./tests/test_configs/dataset/mnist-data/", - "job_input_type": "mltable", - } - }, - "jobs": { - "batch_inference_node1": { - "_source": "YAML.COMPONENT", - "type": "parallel", - "name": "batch_inference_node1", - "display_name": None, - "tags": {}, - "computeId": None, - "inputs": { - "job_data_path": {"job_input_type": "literal", "value": "${{parent.inputs.job_data_path}}"} - }, - "outputs": {}, - "mini_batch_size": 1, - "task": { - "program_arguments": "--job_output_path " "${{outputs.job_output_path}}", - "code": "./src", - "entry_script": "score.py", - "environment": "azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:1", - "type": "run_function", - }, - "input_data": "${{inputs.job_data_path}}", - "retry_settings": None, - "logging_level": None, - "resources": {"instance_count": 2, "properties": {}}, - "max_concurrency_per_instance": 1, - "error_threshold": None, - "mini_batch_error_threshold": 1, - "environment_variables": {}, - }, - "convert_data_node": { - "_source": "YAML.COMPONENT", - "computeId": None, - "display_name": None, - "distribution": None, - "environment_variables": {}, - "inputs": { - "input_data": { - "job_input_type": "literal", - "value": "${{parent.jobs.batch_inference_node1.outputs.job_output_path}}", - } - }, - "limits": None, - "name": "convert_data_node", - "outputs": {"file_output_data": {"job_output_type": "mltable"}}, - "resources": None, - "tags": {}, - "type": "command", - }, - "batch_inference_node2": { - "_source": "YAML.COMPONENT", - "type": "parallel", - "name": "batch_inference_node2", - "display_name": None, - "tags": {}, - "computeId": None, - "inputs": { - "job_data_path": { - "job_input_type": "literal", - "value": "${{parent.jobs.convert_data_node.outputs.file_output_data}}", - "mode": "EvalMount", - } - }, - "outputs": {"job_output_path": {"value": "${{parent.outputs.job_out_data}}", "type": "literal"}}, - "mini_batch_size": 1, - "task": { - "program_arguments": "--job_output_path " "${{outputs.job_output_path}}", - "code": "./src", - "entry_script": "score.py", - "environment": "azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:1", - "type": "run_function", - }, - "input_data": "${{inputs.job_data_path}}", - "retry_settings": None, - "logging_level": None, - "resources": {"instance_count": 2, "properties": {}}, - "max_concurrency_per_instance": 1, - "error_threshold": None, - "mini_batch_error_threshold": 1, - "environment_variables": {}, - }, - }, - "outputs": {"job_out_data": {"mode": "Upload", "job_output_type": "uri_folder"}}, - "settings": {"_source": "DSL", "default_compute": "cpu-cluster"}, - } - - def test_automl_node_in_pipeline(self) -> None: - # create ClassificationJob with classification func inside pipeline is also supported - @dsl.pipeline(name="train_with_automl_in_pipeline", default_compute_target="cpu-cluster") - def train_with_automl_in_pipeline( - main_data_input, target_column_name_input: str, max_total_trials_input: int, validation_data_size: float - ): - automl_classif_job = classification( - training_data=main_data_input, - # validation_data_size="${{parent.inputs.validation_data_size}}", - target_column_name=target_column_name_input, - primary_metric="accuracy", - enable_model_explainability=True, - outputs={"best_model": Output(type="mlflow_model")}, - ) - - automl_classif_job.set_limits( - max_trials=max_total_trials_input, - max_concurrent_trials=4, # Matches number of cluster's nodes - enable_early_termination=True, - ) - - automl_classif_job.set_training(enable_onnx_compatible_models=True) - - job_input = Input( - type=AssetTypes.MLTABLE, - path="fake_path", - ) - pipeline1: PipelineJob = train_with_automl_in_pipeline(job_input, "target", 10, 0.2) - - pipeline_dict1 = pipeline1._to_rest_object().as_dict() - pipeline_dict1 = pydash.omit( - pipeline_dict1["properties"], ["jobs.automl_classif_job.display_name", "jobs.automl_classif_job.properties"] - ) - - expected_dict = { - "display_name": "train_with_automl_in_pipeline", - "inputs": { - "main_data_input": {"job_input_type": "mltable", "uri": "fake_path"}, - "max_total_trials_input": {"job_input_type": "literal", "value": "10"}, - "target_column_name_input": {"job_input_type": "literal", "value": "target"}, - "validation_data_size": {"job_input_type": "literal", "value": "0.2"}, - }, - "is_archived": False, - "job_type": "Pipeline", - "jobs": { - "automl_classif_job": { - "limits": { - "enable_early_termination": True, - "max_concurrent_trials": 4, - "max_trials": "${{parent.inputs.max_total_trials_input}}", - }, - "log_verbosity": "info", - "name": "automl_classif_job", - "outputs": {"best_model": {"job_output_type": "mlflow_model"}}, - "primary_metric": "accuracy", - "tags": {}, - "target_column_name": "${{parent.inputs.target_column_name_input}}", - "task": "classification", - "training": {"enable_model_explainability": True, "enable_onnx_compatible_models": True}, - "training_data": "${{parent.inputs.main_data_input}}", - "type": "automl", - } - }, - "outputs": {}, - "settings": {"_source": "DSL", "default_compute": "cpu-cluster"}, - "properties": {}, - "tags": {}, - } - assert pipeline_dict1 == expected_dict - - # create ClassificationJob inside pipeline is NOT supported - @dsl.pipeline(name="train_with_automl_in_pipeline", default_compute_target="cpu-cluster") - def train_with_automl_in_pipeline( - main_data_input, target_column_name_input: str, max_total_trials_input: int, validation_data_size: float - ): - automl_classif_job = ClassificationJob( - primary_metric="accuracy", - outputs={"best_model": Output(type="mlflow_model")}, - ) - automl_classif_job.set_data( - training_data=main_data_input, - target_column_name=target_column_name_input, - validation_data_size="${{parent.inputs.validation_data_size}}", - ) - - pipeline = train_with_automl_in_pipeline(job_input, "target", 10, 0.2) - # classification job defined with ClassificationJob won't be collected in pipeline job - assert pipeline.jobs == {} - - def test_automl_node_with_command_node(self): - path = "./tests/test_configs/components/helloworld_component.yml" - component_func1 = load_component(source=path) - - @dsl.pipeline(name="train_with_automl_in_pipeline", force_rerun=False) - def train_with_automl_in_pipeline(component_in_number, component_in_path, target_column_name_input: str): - node1 = component_func1(component_in_number=component_in_number, component_in_path=component_in_path) - - node2 = classification( - training_data=node1.outputs.component_out_path, - # validation_data_size="${{parent.inputs.validation_data_size}}", - target_column_name=target_column_name_input, - primary_metric="accuracy", - enable_model_explainability=True, - outputs=dict(best_model=Output(type="mlflow_model")), - ) - node2.set_limits(max_concurrent_trials=1) - - job_input = Input( - type=AssetTypes.MLTABLE, - path="fake_path", - ) - pipeline1: PipelineJob = train_with_automl_in_pipeline(10, job_input, "target") - pipeline1.compute = "cpu-cluster" - pipeline_dict1 = pipeline1._to_rest_object().as_dict() - pipeline_dict1 = pydash.omit( - pipeline_dict1["properties"], - "jobs.node1.componentId", - "jobs.node2.display_name", - "jobs.node1.properties", - "jobs.node2.properties", - ) - assert pipeline_dict1 == { - "compute_id": "cpu-cluster", - "display_name": "train_with_automl_in_pipeline", - "inputs": { - "component_in_number": {"job_input_type": "literal", "value": "10"}, - "component_in_path": {"job_input_type": "mltable", "uri": "fake_path"}, - "target_column_name_input": {"job_input_type": "literal", "value": "target"}, - }, - "is_archived": False, - "job_type": "Pipeline", - "jobs": { - "node1": { - "type": "command", - "_source": "YAML.COMPONENT", - "computeId": None, - "display_name": None, - "distribution": None, - "environment_variables": {}, - "inputs": { - "component_in_number": { - "job_input_type": "literal", - "value": "${{parent.inputs.component_in_number}}", - }, - "component_in_path": { - "job_input_type": "literal", - "value": "${{parent.inputs.component_in_path}}", - }, - }, - "limits": None, - "name": "node1", - "outputs": {}, - "resources": None, - "tags": {}, - }, - "node2": { - "limits": {"max_concurrent_trials": 1}, - "log_verbosity": "info", - "name": "node2", - "outputs": {"best_model": {"job_output_type": "mlflow_model"}}, - "primary_metric": "accuracy", - "tags": {}, - "target_column_name": "${{parent.inputs.target_column_name_input}}", - "task": "classification", - "training": {"enable_model_explainability": True}, - "training_data": "${{parent.jobs.node1.outputs.component_out_path}}", - "type": "automl", - }, - }, - "outputs": {}, - "properties": {}, - "settings": {"force_rerun": False, "_source": "DSL"}, - "tags": {}, - } - - def test_automl_node_with_pipeline_level_output(self): - @dsl.pipeline(name="train_with_automl_in_pipeline") - def train_with_automl_in_pipeline(training_data, target_column_name_input: str): - classification_node = classification( - training_data=training_data, - # validation_data_size="${{parent.inputs.validation_data_size}}", - target_column_name=target_column_name_input, - primary_metric="accuracy", - enable_model_explainability=True, - outputs=dict(best_model=Output(type="mlflow_model")), - ) - return {"pipeline_job_out_best_model": classification_node.outputs.best_model} - - job_input = Input( - type=AssetTypes.MLTABLE, - path="fake_path", - ) - pipeline1: PipelineJob = train_with_automl_in_pipeline(job_input, "target") - pipeline1.compute = "cpu-cluster" - - pipeline_dict1 = pipeline1._to_rest_object().as_dict() - pipeline_dict1 = pydash.omit( - pipeline_dict1["properties"], - ["jobs.classification_node.display_name", "jobs.classification_node.properties"], - ) - expected_dict = { - "compute_id": "cpu-cluster", - "display_name": "train_with_automl_in_pipeline", - "inputs": { - "target_column_name_input": {"job_input_type": "literal", "value": "target"}, - "training_data": {"job_input_type": "mltable", "uri": "fake_path"}, - }, - "is_archived": False, - "job_type": "Pipeline", - "jobs": { - "classification_node": { - "log_verbosity": "info", - "name": "classification_node", - "outputs": { - "best_model": {"type": "literal", "value": "${{parent.outputs.pipeline_job_out_best_model}}"} - }, - "primary_metric": "accuracy", - "tags": {}, - "target_column_name": "${{parent.inputs.target_column_name_input}}", - "task": "classification", - "training": {"enable_model_explainability": True}, - "training_data": "${{parent.inputs.training_data}}", - "type": "automl", - } - }, - # default to uri folder with rwmount - "outputs": {"pipeline_job_out_best_model": {"job_output_type": "uri_folder"}}, - "properties": {}, - "settings": {"_source": "DSL"}, - "tags": {}, - } - assert pipeline_dict1 == expected_dict - - # in order to get right type, user need to specify it on pipeline level - pipeline1.outputs.pipeline_job_out_best_model.type = "mlflow_model" - pipeline1.outputs.pipeline_job_out_best_model.mode = "rw_mount" - pipeline_dict2 = pipeline1._to_rest_object().as_dict() - pipeline_dict2 = pydash.omit( - pipeline_dict2["properties"], - ["jobs.classification_node.display_name", "jobs.classification_node.properties"], - ) - expected_dict.update( - { - "outputs": { - "pipeline_job_out_best_model": {"job_output_type": "mlflow_model", "mode": "ReadWriteMount"} - }, - } - ) - assert pipeline_dict2 == expected_dict - @pytest.mark.parametrize( "target_yml, target_dsl_pipeline", [ @@ -2895,50 +1127,6 @@ def test_dsl_pipeline_support_data_binding_for_fields(self) -> None: dumped = schema.dump(distribution) assert dumped == {"type": "mpi", "process_count_per_instance": "${{parent.inputs.test}}"} - def test_automl_node_without_variable_name(self) -> None: - @dsl.pipeline(name="train_with_automl_in_pipeline", default_compute_target="cpu-cluster") - def train_with_automl_in_pipeline(training_data, target_column_name_input: str): - classification( - training_data=training_data, - # validation_data_size="${{parent.inputs.validation_data_size}}", - target_column_name=target_column_name_input, - primary_metric="accuracy", - enable_model_explainability=True, - outputs=dict(best_model=Output(type="mlflow_model")), - ) - classification( - training_data=training_data, - # validation_data_size="${{parent.inputs.validation_data_size}}", - target_column_name=target_column_name_input, - primary_metric="accuracy", - enable_model_explainability=True, - outputs=dict(best_model=Output(type="mlflow_model")), - ) - regression( - training_data=training_data, - target_column_name="SalePrice", - primary_metric="r2_score", - outputs={"best_model": Output(type="mlflow_model")}, - ) - regression( - training_data=training_data, - target_column_name="SalePrice", - primary_metric="r2_score", - outputs={"best_model": Output(type="mlflow_model")}, - ) - - job_input = Input( - type=AssetTypes.MLTABLE, - path="fake_path", - ) - pipeline1: PipelineJob = train_with_automl_in_pipeline(job_input, "target") - pipeline_dict1 = pipeline1._to_rest_object().as_dict() - assert set(pipeline_dict1["properties"]["jobs"].keys()) == { - "regressionjob", - "regressionjob_1", - "classificationjob_1", - "classificationjob", - } def test_dsl_pipeline_without_setting_binding_node(self) -> None: from dsl_pipeline.pipeline_with_set_binding_output_input.pipeline import pipeline_without_setting_binding_node @@ -3698,74 +1886,6 @@ def root_pipeline(component_in_number: int, component_in_path: str): ) assert actual_dict == expected_root_dict - def test_pipeline_with_command_services(self): - services = { - "my_jupyter": {"job_service_type": "Jupyter"}, - "my_tensorboard": { - "job_service_type": "TensorBoard", - "properties": { - "logDir": "~/tblog", - }, - }, - "my_jupyterlab": {"job_service_type": "JupyterLab"}, - } - - command_func = command( - name="test_component_with_services", - display_name="command_with_services", - environment="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5", - command=('echo "hello world" & sleep 1h'), - environment_variables={"key": "val"}, - inputs={}, - outputs={"component_out_path": Output(type="uri_folder")}, - services=services, - ) - - @dsl.pipeline( - name="test_component_with_services_pipeline", - description="The command node with services", - tags={"owner": "sdkteam", "tag": "tagvalue"}, - compute="cpu-cluster", - ) - def sample_pipeline(): - node = command_func() - return {"pipeline_output": node.outputs.component_out_path} - - pipeline = sample_pipeline() - node_services = pipeline.jobs["node"].services - - assert len(node_services) == 3 - for name, service in node_services.items(): - assert isinstance(service, JobService) - - job_rest_obj = pipeline._to_rest_object() - assert job_rest_obj.properties.jobs["node"]["services"] == services - - recovered_obj = PipelineJob._from_rest_object(job_rest_obj) - node_services = recovered_obj.jobs["node"].services - - assert len(node_services) == 3 - for name, service in node_services.items(): - assert isinstance(service, JobService) - - # test set services in pipeline - new_services = {"my_jupyter": {"job_service_type": "Jupyter"}} - - @dsl.pipeline() - def sample_pipeline_with_new_services(): - node = command_func() - node.services = new_services - - pipeline = sample_pipeline_with_new_services() - node_services = pipeline.jobs["node"].services - - assert len(node_services) == 1 - for name, service in node_services.items(): - assert isinstance(service, JobService) - - job_rest_obj = pipeline._to_rest_object() - assert job_rest_obj.properties.jobs["node"]["services"] == new_services - def test_pipeline_decorator_without_brackets(self): path = "./tests/test_configs/components/helloworld_component.yml" component_func1 = load_component(path) @@ -3820,296 +1940,3 @@ def pipeline_func(component_in_path): pipeline_job = pipeline_func(component_in_path=Data(name="test", version="1", type=AssetTypes.MLTABLE)) result = pipeline_job._validate() assert result._to_dict() == {"result": "Succeeded"} - - def test_pipeline_with_pipeline_component_entity(self): - path = "./tests/test_configs/components/helloworld_component.yml" - component_func1 = load_component(path) - data = Data(name="test", version="1", type=AssetTypes.MLTABLE) - - @dsl.pipeline - def sub_pipeline(component_in_number, component_in_path): - node1 = component_func1(component_in_number=component_in_number, component_in_path=component_in_path) - return {"pipeline_out": node1.outputs.component_out_path} - - @dsl.pipeline - def root_pipeline(component_in_number, component_in_path): - node1 = sub_pipeline(component_in_number=component_in_number, component_in_path=component_in_path) - sub_pipeline(component_in_number=2, component_in_path=data) - return {"pipeline_out": node1.outputs.pipeline_out} - - pipeline = root_pipeline(1, data) - pipeline_dict = pipeline._to_dict() - assert pipeline_dict["jobs"]["node1"]["inputs"] == { - "component_in_number": {"path": "${{parent.inputs.component_in_number}}"}, - "component_in_path": {"path": "${{parent.inputs.component_in_path}}"}, - } - assert pipeline_dict["jobs"]["node1_1"]["inputs"] == { - "component_in_number": 2, - "component_in_path": {"type": "mltable", "path": "azureml:test:1"}, - } - - -@pytest.mark.usefixtures("enable_pipeline_private_preview_features") -@pytest.mark.timeout(_DSL_TIMEOUT_SECOND) -@pytest.mark.unittest -class TestInitFinalizeJob: - component_func = partial( - load_component(str(components_dir / "echo_string_component.yml")), - component_in_string="not important", - ) - hello_world_func = load_component(str(components_dir / "helloworld_component.yml")) - - def test_init_finalize_job(self) -> None: - from azure.ai.ml._internal.dsl import set_pipeline_settings - from azure.ai.ml.dsl import pipeline - - def assert_pipeline_job_init_finalize_job(pipeline_job: PipelineJob): - assert pipeline_job._validate_init_finalize_job().passed - assert pipeline_job.settings.on_init == "init_job" - assert pipeline_job.settings.on_finalize == "finalize_job" - pipeline_job_dict = pipeline_job._to_rest_object().as_dict() - assert pipeline_job_dict["properties"]["settings"]["on_init"] == "init_job" - assert pipeline_job_dict["properties"]["settings"]["on_finalize"] == "finalize_job" - - # pipeline.settings.on_init/on_finalize - @pipeline() - def job_settings_func(): - init_job = self.component_func() # noqa: F841 - work1 = self.component_func() # noqa: F841 - work2 = self.component_func() # noqa: F841 - finalize_job = self.component_func() # noqa: F841 - - pipeline1 = job_settings_func() - pipeline1.settings.on_init = "init_job" - pipeline1.settings.on_finalize = "finalize_job" - assert_pipeline_job_init_finalize_job(pipeline1) - - # dsl.settings() - @pipeline() - def dsl_settings_func(): - init_job = self.component_func() - work1 = self.component_func() # noqa: F841 - work2 = self.component_func() # noqa: F841 - finalize_job = self.component_func() # noqa: F841 - # `set_pipeline_settings` can receive either `BaseNode` or str, both should work - set_pipeline_settings(on_init=init_job, on_finalize="finalize_job") - - pipeline2 = dsl_settings_func() - assert_pipeline_job_init_finalize_job(pipeline2) - - # @pipeline(on_init, on_finalize) - @pipeline( - on_init="init_job", - on_finalize="finalize_job", - ) - def in_decorator_func(): - init_job = self.component_func() # noqa: F841 - work1 = self.component_func() # noqa: F841 - work2 = self.component_func() # noqa: F841 - finalize_job = self.component_func() # noqa: F841 - - pipeline3 = in_decorator_func() - assert_pipeline_job_init_finalize_job(pipeline3) - - def test_invalid_init_finalize_job(self) -> None: - # invalid case: job name not exists - @dsl.pipeline() - def invalid_init_finalize_job_func(): - self.component_func() - - invalid_pipeline1 = invalid_init_finalize_job_func() - invalid_pipeline1.settings.on_init = "init_job" - invalid_pipeline1.settings.on_finalize = "finalize_job" - validation_result1 = invalid_pipeline1._validate_init_finalize_job() - assert not validation_result1.passed - assert validation_result1.error_messages["settings.on_init"] == "On_init job name init_job not exists in jobs." - assert ( - validation_result1.error_messages["settings.on_finalize"] - == "On_finalize job name finalize_job not exists in jobs." - ) - - # invalid case: no normal node, on_init/on_finalize job is not isolated - @dsl.pipeline() - def init_finalize_with_invalid_connection_func(int_param: int, str_param: str): - node1 = self.hello_world_func(component_in_number=int_param, component_in_path=str_param) - node2 = self.hello_world_func( # noqa: F841 - component_in_number=int_param, - component_in_path=node1.outputs.component_out_path, - ) - - invalid_pipeline2 = init_finalize_with_invalid_connection_func(int_param=0, str_param="str") - invalid_pipeline2.settings.on_init = "node2" - invalid_pipeline2.settings.on_finalize = "node1" - validation_result2 = invalid_pipeline2._validate_init_finalize_job() - assert not validation_result2.passed - assert validation_result2.error_messages["jobs"] == "No other job except for on_init/on_finalize job." - assert ( - validation_result2.error_messages["settings.on_init"] - == "On_init job should not have connection to other execution node." - ) - assert ( - validation_result2.error_messages["settings.on_finalize"] - == "On_finalize job should not have connection to other execution node." - ) - - # invalid case: call `set_pipeline_settings` out of `pipeline` decorator - from azure.ai.ml._internal.dsl import set_pipeline_settings - from azure.ai.ml.exceptions import UserErrorException - - with pytest.raises(UserErrorException) as e: - set_pipeline_settings(on_init="init_job", on_finalize="finalize_job") - assert str(e.value) == "Please call `set_pipeline_settings` inside a `pipeline` decorated function." - - # invalid case: set on_init for pipeline component - @dsl.pipeline - def subgraph_func(): - node = self.component_func() - set_pipeline_settings(on_init=node) # set on_init for subgraph (pipeline component) - - @dsl.pipeline - def subgraph_with_init_func(): - subgraph_func() - self.component_func() - - with pytest.raises(UserErrorException) as e: - subgraph_with_init_func() - assert str(e.value) == "On_init/on_finalize is not supported for pipeline component." - - def test_init_finalize_job_with_subgraph(self, caplog) -> None: - from azure.ai.ml._internal.dsl import set_pipeline_settings - - # happy path - @dsl.pipeline() - def subgraph_func(): - node = self.component_func() - node.compute = "cpu-cluster" - - @dsl.pipeline() - def subgraph_init_finalize_job_func(): - init_job = subgraph_func() - subgraph_work = subgraph_func() # noqa: F841 - finalize_job = subgraph_func() - set_pipeline_settings(on_init=init_job, on_finalize=finalize_job) - - valid_pipeline = subgraph_init_finalize_job_func() - assert valid_pipeline._validate().passed - assert valid_pipeline.settings.on_init == "init_job" - assert valid_pipeline.settings.on_finalize == "finalize_job" - - def test_dsl_pipeline_with_spark_hobo(self) -> None: - add_greeting_column_func = load_component( - "./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/add_greeting_column_component.yml" - ) - count_by_row_func = load_component( - "./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/count_by_row_component.yml" - ) - - @dsl.pipeline(description="submit a pipeline with spark job") - def spark_pipeline_from_yaml(iris_data): - add_greeting_column = add_greeting_column_func(file_input=iris_data) - add_greeting_column.resources = {"instance_type": "Standard_E8S_V3", "runtime_version": "3.1.0"} - count_by_row = count_by_row_func(file_input=iris_data) - count_by_row.resources = {"instance_type": "Standard_E8S_V3", "runtime_version": "3.1.0"} - count_by_row.identity = {"type": "managed"} - - return {"output": count_by_row.outputs.output} - - dsl_pipeline: PipelineJob = spark_pipeline_from_yaml( - iris_data=Input( - path="https://azuremlexamples.blob.core.windows.net/datasets/iris.csv", - type=AssetTypes.URI_FILE, - mode=InputOutputModes.DIRECT, - ), - ) - dsl_pipeline.outputs.output.mode = "Direct" - - spark_node = dsl_pipeline.jobs["add_greeting_column"] - job_data_path_input = spark_node.inputs["file_input"]._meta - assert job_data_path_input - # spark_node.component._id = "azureml:test_component:1" - spark_node_dict = spark_node._to_dict() - - spark_node_rest_obj = spark_node._to_rest_object() - regenerated_spark_node = Spark._from_rest_object(spark_node_rest_obj) - - spark_node_dict_from_rest = regenerated_spark_node._to_dict() - omit_fields = [] - assert pydash.omit(spark_node_dict, *omit_fields) == pydash.omit(spark_node_dict_from_rest, *omit_fields) - omit_fields = [ - "jobs.add_greeting_column.componentId", - "jobs.count_by_row.componentId", - "jobs.add_greeting_column.properties", - "jobs.count_by_row.properties", - ] - actual_job = pydash.omit(dsl_pipeline._to_rest_object().properties.as_dict(), *omit_fields) - assert actual_job == { - "description": "submit a pipeline with spark job", - "properties": {}, - "tags": {}, - "display_name": "spark_pipeline_from_yaml", - "is_archived": False, - "job_type": "Pipeline", - "inputs": { - "iris_data": { - "mode": "Direct", - "uri": "https://azuremlexamples.blob.core.windows.net/datasets/iris.csv", - "job_input_type": "uri_file", - } - }, - "jobs": { - "add_greeting_column": { - "type": "spark", - "resources": {"instance_type": "Standard_E8S_V3", "runtime_version": "3.1.0"}, - "entry": {"file": "add_greeting_column.py", "spark_job_entry_type": "SparkJobPythonEntry"}, - "py_files": ["utils.zip"], - "files": ["my_files.txt"], - "archives": None, - "jars": None, - "identity": {"identity_type": "UserIdentity"}, - "conf": { - "spark.driver.cores": 2, - "spark.driver.memory": "1g", - "spark.executor.cores": 1, - "spark.executor.memory": "1g", - "spark.executor.instances": 1, - }, - "args": "--file_input ${{inputs.file_input}}", - "name": "add_greeting_column", - "display_name": None, - "tags": {}, - "computeId": None, - "inputs": { - "file_input": {"job_input_type": "literal", "value": "${{parent.inputs.iris_data}}"}, - }, - "outputs": {}, - "_source": "YAML.COMPONENT", - }, - "count_by_row": { - "_source": "YAML.COMPONENT", - "archives": None, - "args": "--file_input ${{inputs.file_input}} " "--output ${{outputs.output}}", - "computeId": None, - "conf": { - "spark.driver.cores": 2, - "spark.driver.memory": "1g", - "spark.executor.cores": 1, - "spark.executor.instances": 1, - "spark.executor.memory": "1g", - }, - "display_name": None, - "entry": {"file": "count_by_row.py", "spark_job_entry_type": "SparkJobPythonEntry"}, - "files": ["my_files.txt"], - "identity": {"identity_type": "Managed"}, - "inputs": {"file_input": {"job_input_type": "literal", "value": "${{parent.inputs.iris_data}}"}}, - "jars": ["scalaproj.jar"], - "name": "count_by_row", - "outputs": {"output": {"type": "literal", "value": "${{parent.outputs.output}}"}}, - "py_files": None, - "resources": {"instance_type": "Standard_E8S_V3", "runtime_version": "3.1.0"}, - "tags": {}, - "type": "spark", - }, - }, - "outputs": {"output": {"job_output_type": "uri_folder", "mode": "Direct"}}, - "settings": {"_source": "DSL"}, - } diff --git a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_pipeline_with_specific_nodes.py b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_pipeline_with_specific_nodes.py new file mode 100644 index 000000000000..8b088199f1ac --- /dev/null +++ b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_dsl_pipeline_with_specific_nodes.py @@ -0,0 +1,1928 @@ +from pathlib import Path + +import pydash +import pytest +from test_utilities.utils import omit_with_wildcard + +from azure.ai.ml import Input, Output, command, dsl, load_component, spark +from azure.ai.ml.automl import classification, regression +from azure.ai.ml.constants._common import ( + AssetTypes, + InputOutputModes, +) +from azure.ai.ml.dsl._load_import import to_component +from azure.ai.ml.entities import ( + CommandComponent, + CommandJob, + Data, + ParallelTask, + PipelineJob, + SparkJob, +) +from azure.ai.ml.entities._builders import Command, Parallel, Spark, Sweep +from azure.ai.ml.entities._component.parallel_component import ParallelComponent +from azure.ai.ml.entities._job.automl.tabular import ClassificationJob +from azure.ai.ml.entities._job.job_service import JobService +from azure.ai.ml.exceptions import ValidationException +from azure.ai.ml.parallel import ParallelJob, RunFunction, parallel_run_function +from azure.ai.ml.sweep import ( + BanditPolicy, + Choice, + LogNormal, + LogUniform, + Normal, + QLogNormal, + QLogUniform, + QNormal, + QUniform, + Randint, + Uniform, +) + +from .._util import _DSL_TIMEOUT_SECOND + +tests_root_dir = Path(__file__).parent.parent.parent +components_dir = tests_root_dir / "test_configs/components/" + + +@pytest.mark.usefixtures("enable_pipeline_private_preview_features") +@pytest.mark.timeout(_DSL_TIMEOUT_SECOND) +@pytest.mark.unittest +class TestDSLPipelineWithSpecificNodes: + def test_dsl_pipeline_sweep_node(self) -> None: + yaml_file = "./tests/test_configs/components/helloworld_component.yml" + + @dsl.pipeline(name="train_with_sweep_in_pipeline", default_compute="cpu-cluster") + def train_with_sweep_in_pipeline(raw_data, primary_metric: str = "AUC", max_total_trials: int = 10): + component_to_sweep: CommandComponent = load_component(source=yaml_file) + cmd_node1: Command = component_to_sweep( + component_in_number=Choice([2, 3, 4, 5]), component_in_path=raw_data + ) + + sweep_job1: Sweep = cmd_node1.sweep( + primary_metric="AUC", # primary_metric, + goal="maximize", + sampling_algorithm="random", + ) + sweep_job1.compute = "gpu-cluster" + sweep_job1.set_limits(max_total_trials=10) # max_total_trials + + cmd_node2: Command = component_to_sweep( + component_in_number=Choice([2, 3, 4, 5]), component_in_path=raw_data + ) + sweep_job2: Sweep = cmd_node2.sweep( + primary_metric="AUC", + goal="minimize", + sampling_algorithm="random", + max_total_trials=10, + ) + sweep_job2.compute = "gpu-cluster" + + sweep_job3: Sweep = component_to_sweep( + component_in_number=Choice([2, 3, 4, 5]), component_in_path=raw_data + ).sweep( + primary_metric="accuracy", + goal="maximize", + sampling_algorithm="random", + max_total_trials=10, + ) + + component_to_link = load_component(source=yaml_file, params_override=[{"name": "node_to_link"}]) + link_node = component_to_link( + component_in_number=2, component_in_path=sweep_job1.outputs.component_out_path + ) + + return { + "pipeline_job_best_model1": sweep_job1.outputs.component_out_path, + "pipeline_job_best_model2": sweep_job2.outputs.component_out_path, + "pipeline_job_best_model3": sweep_job3.outputs.component_out_path, + "pipeline_model_test_result": link_node.outputs.component_out_path, + } + + pipeline: PipelineJob = train_with_sweep_in_pipeline( + raw_data=Input(path="/a/path/on/ds", mode="ro_mount"), max_total_trials=100, primary_metric="accuracy" + ) + assert len(pipeline.jobs) == 4, f"Expect 4 nodes are collected but got {len(pipeline.jobs)}" + assert pipeline.component._source == "DSL" + assert pipeline.component._job_types == {"sweep": 3, "command": 1} + assert pipeline.component._job_sources == {"YAML.COMPONENT": 4} + + sweep_node: Sweep = pipeline.jobs["sweep_job1"] + sweep_node.component._id = "azureml:test_component:1" + sweep_node_dict = sweep_node._to_dict() + assert pydash.get(sweep_node_dict, "limits.max_total_trials", None) == 10 + sweep_node_rest_obj = sweep_node._to_rest_object() + sweep_node_dict_from_rest = Sweep._from_rest_object(sweep_node_rest_obj)._to_dict() + omit_fields = ["trial"] + assert pydash.omit(sweep_node_dict, *omit_fields) == pydash.omit(sweep_node_dict_from_rest, *omit_fields) + + pipeline_dict = pipeline._to_dict() + for dot_key, expected_value in [ + ("jobs.sweep_job2.objective.goal", "minimize"), + ("jobs.sweep_job3.objective.goal", "maximize"), + ("jobs.sweep_job2.objective.primary_metric", "AUC"), + ("jobs.sweep_job3.objective.primary_metric", "accuracy"), + ("jobs.sweep_job2.compute", "azureml:gpu-cluster"), + ("jobs.sweep_job3.compute", None), + ]: + assert ( + pydash.get(pipeline_dict, dot_key) == expected_value + ), f"Expect {dot_key} to be {expected_value} but got {pydash.get(pipeline_dict, dot_key)}" + + pipeline_rest_obj = pipeline._to_rest_object() + pipeline_regenerated_from_rest = PipelineJob._load_from_rest(pipeline_rest_obj) + omit_fields = [ + "name", + "display_name", + "jobs.*.trial", + "outputs", # TODO: figure out why outputs can't be regenerated correctly + ] + # Change float to string to make dict from local and rest compatible + pipeline_dict["inputs"]["max_total_trials"] = str(pipeline_dict["inputs"]["max_total_trials"]) + pipeline_dict["jobs"]["link_node"]["inputs"]["component_in_number"] = str( + pipeline_dict["jobs"]["link_node"]["inputs"]["component_in_number"] + ) + assert omit_with_wildcard(pipeline_dict, *omit_fields) == omit_with_wildcard( + pipeline_regenerated_from_rest._to_dict(), *omit_fields + ) + + def test_dsl_pipeline_sweep_distributions(self) -> None: + yaml_file = "./tests/test_configs/components/helloworld_component_for_sweep.yml" + + @dsl.pipeline(name="OneJob_RuntimeSweepWithFullSearchSpaces") + def train_with_sweep_in_pipeline(): + component_to_sweep: CommandComponent = load_component(source=yaml_file) + cmd_node1: Command = component_to_sweep( + batch_size=Choice([25, 35]), + first_layer_neurons=Randint(upper=50), + second_layer_neurons=QUniform(min_value=10, max_value=50, q=5), + third_layer_neurons=QLogNormal(mu=5, sigma=1, q=5), + epochs=QLogUniform(min_value=1, max_value=5, q=5), + momentum=QNormal(mu=10, sigma=5, q=2), + weight_decay=LogNormal(mu=0, sigma=1), + learning_rate=LogUniform(min_value=-6, max_value=-1), + f1=Normal(mu=0, sigma=1), + f2=Uniform(min_value=10, max_value=20), + data_folder=Input( + type=AssetTypes.MLTABLE, + path="https://dprepdata.blob.core.windows.net/demo/", + mode=InputOutputModes.RO_MOUNT, + ), + ) + + hello_sweep: Sweep = cmd_node1.sweep( + primary_metric="validation_acc", + goal="maximize", + sampling_algorithm="random", + ) + hello_sweep.compute = "cpu-cluster" + hello_sweep.set_limits(max_total_trials=2, max_concurrent_trials=3, timeout=600) + hello_sweep.early_termination = BanditPolicy(evaluation_interval=2, slack_factor=0.1, delay_evaluation=1) + + dsl_pipeline: PipelineJob = train_with_sweep_in_pipeline() + dsl_pipeline.jobs["hello_sweep"].outputs.trained_model_dir = Output( + type=AssetTypes.MLFLOW_MODEL, mode=InputOutputModes.RW_MOUNT + ) + + sweep_node: Sweep = dsl_pipeline.jobs["hello_sweep"] + random_seed_input = sweep_node.inputs["random_seed"]._meta + assert random_seed_input + assert random_seed_input.default == 42 + sweep_node.component._id = "azureml:test_component:1" + sweep_node_dict = sweep_node._to_dict() + sweep_node_rest_obj = sweep_node._to_rest_object() + sweep_node_dict_from_rest = Sweep._from_rest_object(sweep_node_rest_obj)._to_dict() + omit_fields = ["trial"] + assert pydash.omit(sweep_node_dict, *omit_fields) == pydash.omit(sweep_node_dict_from_rest, *omit_fields) + + def test_dsl_pipeline_with_parallel(self) -> None: + yaml_file = "./tests/test_configs/dsl_pipeline/parallel_component_with_file_input/score.yml" + + @dsl.pipeline(default_compute="cpu-cluster") + def train_with_parallel_in_pipeline(): + parallel_component: ParallelComponent = load_component(source=yaml_file) + node1: Parallel = parallel_component( + job_data_path=Input(type=AssetTypes.MLTABLE, path="/a/path/on/ds", mode=InputOutputModes.EVAL_MOUNT), + ) + node1.resources = {"instance_count": 2} + + dsl_pipeline: PipelineJob = train_with_parallel_in_pipeline() + dsl_pipeline.jobs["node1"].outputs.job_output_path = Output( + type=AssetTypes.MLFLOW_MODEL, mode=InputOutputModes.RW_MOUNT + ) + + parallel_node: Parallel = dsl_pipeline.jobs["node1"] + job_data_path_input = parallel_node.inputs["job_data_path"]._meta + assert job_data_path_input + parallel_node.component._id = "azureml:test_component:1" + parallel_node_dict = parallel_node._to_dict() + + parallel_node_rest_obj = parallel_node._to_rest_object() + regenerated_parallel_node = Parallel._from_rest_object(parallel_node_rest_obj) + # entity load from rest object is based on current working directory, while task.code is a local path based + # on the yaml file in unit tests. + regenerated_parallel_node._base_path = Path(yaml_file).parent + parallel_node_dict_from_rest = regenerated_parallel_node._to_dict() + omit_fields = ["component"] + assert pydash.omit(parallel_node_dict, *omit_fields) == pydash.omit(parallel_node_dict_from_rest, *omit_fields) + + def test_dsl_pipeline_with_spark(self) -> None: + add_greeting_column_func = load_component( + "./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/add_greeting_column_component.yml" + ) + count_by_row_func = load_component( + "./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/count_by_row_component.yml" + ) + synapse_compute_name = "spark31" + + @dsl.pipeline(description="submit a pipeline with spark job") + def spark_pipeline_from_yaml(iris_data): + add_greeting_column = add_greeting_column_func(file_input=iris_data) + add_greeting_column.compute = synapse_compute_name + count_by_row = count_by_row_func(file_input=iris_data) + count_by_row.compute = synapse_compute_name + + return {"output": count_by_row.outputs.output} + + dsl_pipeline: PipelineJob = spark_pipeline_from_yaml( + iris_data=Input( + path="https://azuremlexamples.blob.core.windows.net/datasets/iris.csv", + type=AssetTypes.URI_FILE, + mode=InputOutputModes.DIRECT, + ), + ) + dsl_pipeline.outputs.output.mode = "Direct" + + spark_node = dsl_pipeline.jobs["add_greeting_column"] + job_data_path_input = spark_node.inputs["file_input"]._meta + assert job_data_path_input + # spark_node.component._id = "azureml:test_component:1" + spark_node_dict = spark_node._to_dict() + + spark_node_rest_obj = spark_node._to_rest_object() + regenerated_spark_node = Spark._from_rest_object(spark_node_rest_obj) + + spark_node_dict_from_rest = regenerated_spark_node._to_dict() + omit_fields = [] + assert pydash.omit(spark_node_dict, *omit_fields) == pydash.omit(spark_node_dict_from_rest, *omit_fields) + omit_fields = [ + "jobs.add_greeting_column.componentId", + "jobs.add_greeting_column.properties", + "jobs.count_by_row.componentId", + "jobs.count_by_row.properties", + ] + actual_job = pydash.omit(dsl_pipeline._to_rest_object().properties.as_dict(), *omit_fields) + assert actual_job == { + "description": "submit a pipeline with spark job", + "properties": {}, + "tags": {}, + "display_name": "spark_pipeline_from_yaml", + "is_archived": False, + "job_type": "Pipeline", + "inputs": { + "iris_data": { + "mode": "Direct", + "uri": "https://azuremlexamples.blob.core.windows.net/datasets/iris.csv", + "job_input_type": "uri_file", + } + }, + "jobs": { + "add_greeting_column": { + "type": "spark", + "resources": None, + "entry": {"file": "add_greeting_column.py", "spark_job_entry_type": "SparkJobPythonEntry"}, + "py_files": ["utils.zip"], + "files": ["my_files.txt"], + "archives": None, + "jars": None, + "identity": {"identity_type": "Managed"}, + "conf": { + "spark.driver.cores": 2, + "spark.driver.memory": "1g", + "spark.executor.cores": 1, + "spark.executor.memory": "1g", + "spark.executor.instances": 1, + }, + "args": "--file_input ${{inputs.file_input}}", + "name": "add_greeting_column", + "display_name": None, + "tags": {}, + "computeId": "spark31", + "inputs": { + "file_input": {"job_input_type": "literal", "value": "${{parent.inputs.iris_data}}"}, + }, + "outputs": {}, + "_source": "YAML.COMPONENT", + }, + "count_by_row": { + "_source": "YAML.COMPONENT", + "archives": None, + "args": "--file_input ${{inputs.file_input}} " "--output ${{outputs.output}}", + "computeId": "spark31", + "conf": { + "spark.driver.cores": 2, + "spark.driver.memory": "1g", + "spark.executor.cores": 1, + "spark.executor.instances": 1, + "spark.executor.memory": "1g", + }, + "display_name": None, + "entry": {"file": "count_by_row.py", "spark_job_entry_type": "SparkJobPythonEntry"}, + "files": ["my_files.txt"], + "identity": {"identity_type": "Managed"}, + "inputs": {"file_input": {"job_input_type": "literal", "value": "${{parent.inputs.iris_data}}"}}, + "jars": ["scalaproj.jar"], + "name": "count_by_row", + "outputs": {"output": {"type": "literal", "value": "${{parent.outputs.output}}"}}, + "py_files": None, + "resources": None, + "tags": {}, + "type": "spark", + }, + }, + "outputs": {"output": {"job_output_type": "uri_folder", "mode": "Direct"}}, + "settings": {"_source": "DSL"}, + } + + def test_pipeline_with_command_function(self): + # component func + yaml_file = "./tests/test_configs/components/helloworld_component.yml" + component_func = load_component(source=yaml_file) + + # command job with dict distribution + environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5" + expected_resources = {"instance_count": 2} + expected_environment_variables = {"key": "val"} + inputs = { + "component_in_path": Input(type="uri_folder", path="https://my-blob/path/to/data", mode="ro_mount"), + "component_in_number": 0.01, + } + outputs = {"component_out_path": Output(type="mlflow_model", mode="rw_mount")} + + command_job = CommandJob( + display_name="my-evaluate-job", + environment=environment, + command='echo "hello world"', + distribution={"type": "Pytorch", "process_count_per_instance": 2}, + resources=expected_resources, + environment_variables=expected_environment_variables, + inputs=inputs, + outputs=outputs, + ) + command_job_func = to_component(job=command_job) + + # Command from command() function + command_function = command( + display_name="my-evaluate-job", + environment=environment, + command='echo "hello world"', + distribution={"type": "Pytorch", "process_count_per_instance": 2}, + resources=expected_resources, + environment_variables=expected_environment_variables, + inputs=inputs, + outputs=outputs, + ) + + data = Input(type=AssetTypes.URI_FOLDER, path="/a/path/on/ds", mode="ro_mount") + + @dsl.pipeline(experiment_name="test_pipeline_with_command_function") + def pipeline(number, path): + node1 = component_func(component_in_number=number, component_in_path=path) + node2 = command_job_func(component_in_number=number, component_in_path=node1.outputs.component_out_path) + node3 = command_function(component_in_number=number, component_in_path=node2.outputs.component_out_path) + return { + "pipeline_output1": node1.outputs.component_out_path, + "pipeline_output2": node2.outputs.component_out_path, + "pipeline_output3": node3.outputs.component_out_path, + } + + omit_fields = [ + "name", + "properties.jobs.*.componentId", + "properties.jobs.*.properties", + "properties.settings._source", + ] + + pipeline1 = pipeline(10, data) + pipeline_job1 = pipeline1._to_rest_object().as_dict() + pipeline_job1 = omit_with_wildcard(pipeline_job1, *omit_fields) + assert pipeline_job1 == { + "properties": { + "display_name": "pipeline", + "experiment_name": "test_pipeline_with_command_function", + "inputs": { + "number": {"job_input_type": "literal", "value": "10"}, + "path": {"job_input_type": "uri_folder", "mode": "ReadOnlyMount", "uri": "/a/path/on/ds"}, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "node1": { + "_source": "YAML.COMPONENT", + "computeId": None, + "display_name": None, + "distribution": None, + "environment_variables": {}, + "inputs": { + "component_in_number": {"job_input_type": "literal", "value": "${{parent.inputs.number}}"}, + "component_in_path": {"job_input_type": "literal", "value": "${{parent.inputs.path}}"}, + }, + "limits": None, + "name": "node1", + "outputs": { + "component_out_path": {"type": "literal", "value": "${{parent.outputs.pipeline_output1}}"} + }, + "resources": None, + "tags": {}, + "type": "command", + }, + "node2": { + "_source": "CLASS", + "computeId": None, + "display_name": None, + "distribution": {"distribution_type": "PyTorch", "process_count_per_instance": 2}, + "environment_variables": {}, + "inputs": { + "component_in_number": {"job_input_type": "literal", "value": "${{parent.inputs.number}}"}, + "component_in_path": { + "job_input_type": "literal", + "value": "${{parent.jobs.node1.outputs.component_out_path}}", + }, + }, + "limits": None, + "name": "node2", + "outputs": { + "component_out_path": {"type": "literal", "value": "${{parent.outputs.pipeline_output2}}"} + }, + "resources": {"instance_count": 2, "properties": {}}, + "tags": {}, + "type": "command", + }, + "node3": { + "_source": "BUILDER", + "computeId": None, + "display_name": "my-evaluate-job", + "distribution": {"distribution_type": "PyTorch", "process_count_per_instance": 2}, + "environment_variables": {"key": "val"}, + "inputs": { + "component_in_number": {"job_input_type": "literal", "value": "${{parent.inputs.number}}"}, + "component_in_path": { + "job_input_type": "literal", + "value": "${{parent.jobs.node2.outputs.component_out_path}}", + }, + }, + "limits": None, + "name": "node3", + "outputs": { + "component_out_path": {"type": "literal", "value": "${{parent.outputs.pipeline_output3}}"} + }, + "resources": {"instance_count": 2, "properties": {}}, + "tags": {}, + "type": "command", + }, + }, + "outputs": { + "pipeline_output1": {"job_output_type": "uri_folder"}, + "pipeline_output2": {"job_output_type": "uri_folder"}, + "pipeline_output3": {"job_output_type": "uri_folder"}, + }, + "properties": {}, + "settings": {}, + "tags": {}, + } + } + + def test_pipeline_with_spark_function(self): + # component func + yaml_file = "./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/sample_component.yml" + component_func = load_component(yaml_file) + + environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5" + iris_data = Input( + path="./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/dataset/shakespeare.txt", + type=AssetTypes.URI_FILE, + mode=InputOutputModes.DIRECT, + ) + sample_rate = 0.01 + synapse_compute_name = "rezas-synapse-10" + inputs = { + "input1": iris_data, + "sample_rate": sample_rate, + } + outputs = {"output1": Output(type="uri_folder", mode=InputOutputModes.DIRECT)} + + spark_job = SparkJob( + code="./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/basic_src", + entry={"file": "sampleword.py"}, + driver_cores=2, + driver_memory="1g", + executor_cores=1, + executor_memory="1g", + executor_instances=1, + environment=environment, + inputs=inputs, + outputs=outputs, + args="--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", + compute=synapse_compute_name, + ) + spark_job_func = to_component(job=spark_job) + + # Spark from spark() function + spark_function = spark( + code="./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/basic_src", + entry={"file": "sampleword.py"}, + driver_cores=2, + driver_memory="1g", + executor_cores=1, + executor_memory="1g", + executor_instances=1, + environment=environment, + inputs=inputs, + outputs=outputs, + args="--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", + compute=synapse_compute_name, + # For HOBO spark, provide 'resources' + # resources={"instance_type": "Standard_E8S_V3", "runtime_version": "3.1.0"} + ) + + @dsl.pipeline(experiment_name="test_pipeline_with_spark_function") + def pipeline(iris_data, sample_rate): + node1 = component_func(input1=iris_data, sample_rate=sample_rate) + node1.compute = synapse_compute_name + node2 = spark_job_func(input1=node1.outputs.output1, sample_rate=sample_rate) + node2.compute = synapse_compute_name + node3 = spark_function(input1=node2.outputs.output1, sample_rate=sample_rate) + return { + "pipeline_output1": node1.outputs.output1, + "pipeline_output2": node2.outputs.output1, + "pipeline_output3": node3.outputs.output1, + } + + omit_fields = [ + "properties.jobs.*.componentId", + "properties.jobs.*.code", + "properties.jobs.*.properties", + "properties.settings._source", + ] + + pipeline1 = pipeline(iris_data, sample_rate) + pipeline_job1 = pipeline1._to_rest_object().as_dict() + pipeline_job1 = omit_with_wildcard(pipeline_job1, *omit_fields) + assert pipeline_job1 == { + "properties": { + "properties": {}, + "tags": {}, + "display_name": "pipeline", + "experiment_name": "test_pipeline_with_spark_function", + "is_archived": False, + "job_type": "Pipeline", + "inputs": { + "iris_data": { + "mode": "Direct", + "uri": "./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/dataset/shakespeare.txt", + "job_input_type": "uri_file", + }, + "sample_rate": {"job_input_type": "literal", "value": "0.01"}, + }, + "jobs": { + "node1": { + "type": "spark", + "resources": None, + "entry": {"file": "sampleword.py", "spark_job_entry_type": "SparkJobPythonEntry"}, + "py_files": None, + "jars": None, + "files": None, + "archives": None, + "identity": {"identity_type": "Managed"}, + "conf": { + "spark.driver.cores": 1, + "spark.driver.memory": "2g", + "spark.dynamicAllocation.enabled": True, + "spark.dynamicAllocation.maxExecutors": 4, + "spark.dynamicAllocation.minExecutors": 1, + "spark.executor.cores": 2, + "spark.executor.instances": 1, + "spark.executor.memory": "2g", + }, + "args": "--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", + "name": "node1", + "display_name": None, + "tags": {}, + "computeId": "rezas-synapse-10", + "inputs": { + "input1": {"job_input_type": "literal", "value": "${{parent.inputs.iris_data}}"}, + "sample_rate": {"job_input_type": "literal", "value": "${{parent.inputs.sample_rate}}"}, + }, + "outputs": {"output1": {"type": "literal", "value": "${{parent.outputs.pipeline_output1}}"}}, + "_source": "YAML.COMPONENT", + }, + "node2": { + "type": "spark", + "resources": None, + "entry": {"file": "sampleword.py", "spark_job_entry_type": "SparkJobPythonEntry"}, + "py_files": None, + "jars": None, + "files": None, + "archives": None, + "identity": {"identity_type": "Managed"}, + "conf": { + "spark.driver.cores": 2, + "spark.driver.memory": "1g", + "spark.executor.cores": 1, + "spark.executor.memory": "1g", + "spark.executor.instances": 1, + }, + "args": "--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", + "name": "node2", + "display_name": None, + "tags": {}, + "computeId": "rezas-synapse-10", + "inputs": { + "input1": {"job_input_type": "literal", "value": "${{parent.jobs.node1.outputs.output1}}"}, + "sample_rate": {"job_input_type": "literal", "value": "${{parent.inputs.sample_rate}}"}, + }, + "outputs": {"output1": {"value": "${{parent.outputs.pipeline_output2}}", "type": "literal"}}, + "_source": "CLASS", + }, + "node3": { + "type": "spark", + "resources": None, + "entry": {"file": "sampleword.py", "spark_job_entry_type": "SparkJobPythonEntry"}, + "py_files": None, + "jars": None, + "files": None, + "archives": None, + "identity": {"identity_type": "Managed"}, + "conf": { + "spark.driver.cores": 2, + "spark.driver.memory": "1g", + "spark.executor.cores": 1, + "spark.executor.memory": "1g", + "spark.executor.instances": 1, + }, + "args": "--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", + "name": "node3", + "display_name": None, + "tags": {}, + "computeId": "rezas-synapse-10", + "inputs": { + "input1": {"job_input_type": "literal", "value": "${{parent.jobs.node2.outputs.output1}}"}, + "sample_rate": {"job_input_type": "literal", "value": "${{parent.inputs.sample_rate}}"}, + }, + "outputs": {"output1": {"type": "literal", "value": "${{parent.outputs.pipeline_output3}}"}}, + "_source": "BUILDER", + }, + }, + "outputs": { + "pipeline_output1": {"job_output_type": "uri_folder"}, + "pipeline_output2": {"job_output_type": "uri_folder"}, + "pipeline_output3": {"job_output_type": "uri_folder"}, + }, + "settings": {}, + } + } + + def test_pipeline_with_spark_function_by_setting_conf(self, client): + # component func + yaml_file = "./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/sample_component.yml" + component_func = load_component(yaml_file) + + environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5" + iris_data = Input( + path="./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/dataset/shakespeare.txt", + type=AssetTypes.URI_FILE, + mode=InputOutputModes.DIRECT, + ) + sample_rate = 0.01 + synapse_compute_name = "rezas-synapse-10" + inputs = { + "input1": iris_data, + "sample_rate": sample_rate, + } + outputs = {"output1": Output(type="uri_folder", mode=InputOutputModes.DIRECT)} + + spark_job = SparkJob( + code="./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/basic_src", + entry={"file": "sampleword.py"}, + conf={ + "spark.driver.cores": 2, + "spark.driver.memory": "1g", + "spark.executor.cores": 1, + "spark.executor.memory": "1g", + "spark.executor.instances": 1, + }, + environment=environment, + inputs=inputs, + outputs=outputs, + args="--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", + compute=synapse_compute_name, + ) + spark_job_func = to_component(job=spark_job) + + # Spark from spark() function + spark_function = spark( + code="./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/basic_src", + entry={"file": "sampleword.py"}, + conf={ + "spark.driver.cores": 2, + "spark.driver.memory": "1g", + "spark.executor.cores": 1, + "spark.executor.memory": "1g", + "spark.executor.instances": 1, + }, + environment=environment, + inputs=inputs, + outputs=outputs, + args="--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", + compute=synapse_compute_name, + # For HOBO spark, provide 'resources' + # resources={"instance_type": "Standard_E8S_V3", "runtime_version": "3.1.0"} + ) + + @dsl.pipeline(experiment_name="test_pipeline_with_spark_function") + def pipeline(iris_data, sample_rate): + node1 = component_func(input1=iris_data, sample_rate=sample_rate) + node1.compute = synapse_compute_name + node2 = spark_job_func(input1=node1.outputs.output1, sample_rate=sample_rate) + node2.compute = synapse_compute_name + node3 = spark_function(input1=node2.outputs.output1, sample_rate=sample_rate) + return { + "pipeline_output1": node1.outputs.output1, + "pipeline_output2": node2.outputs.output1, + "pipeline_output3": node3.outputs.output1, + } + + omit_fields = [ + "properties.jobs.*.componentId", + "properties.jobs.*.code", + "properties.jobs.*.properties", + "properties.settings._source", + ] + + pipeline1 = pipeline(iris_data, sample_rate) + pipeline_job1 = pipeline1._to_rest_object().as_dict() + pipeline_job1 = omit_with_wildcard(pipeline_job1, *omit_fields) + assert pipeline_job1 == { + "properties": { + "properties": {}, + "tags": {}, + "display_name": "pipeline", + "experiment_name": "test_pipeline_with_spark_function", + "is_archived": False, + "job_type": "Pipeline", + "inputs": { + "iris_data": { + "mode": "Direct", + "uri": "./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/dataset/shakespeare.txt", + "job_input_type": "uri_file", + }, + "sample_rate": {"job_input_type": "literal", "value": "0.01"}, + }, + "jobs": { + "node1": { + "type": "spark", + "resources": None, + "entry": {"file": "sampleword.py", "spark_job_entry_type": "SparkJobPythonEntry"}, + "py_files": None, + "jars": None, + "files": None, + "archives": None, + "identity": {"identity_type": "Managed"}, + "conf": { + "spark.driver.cores": 1, + "spark.driver.memory": "2g", + "spark.dynamicAllocation.enabled": True, + "spark.dynamicAllocation.maxExecutors": 4, + "spark.dynamicAllocation.minExecutors": 1, + "spark.executor.cores": 2, + "spark.executor.instances": 1, + "spark.executor.memory": "2g", + }, + "args": "--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", + "name": "node1", + "display_name": None, + "tags": {}, + "computeId": "rezas-synapse-10", + "inputs": { + "input1": {"job_input_type": "literal", "value": "${{parent.inputs.iris_data}}"}, + "sample_rate": {"job_input_type": "literal", "value": "${{parent.inputs.sample_rate}}"}, + }, + "outputs": {"output1": {"type": "literal", "value": "${{parent.outputs.pipeline_output1}}"}}, + "_source": "YAML.COMPONENT", + }, + "node2": { + "type": "spark", + "resources": None, + "entry": {"file": "sampleword.py", "spark_job_entry_type": "SparkJobPythonEntry"}, + "py_files": None, + "jars": None, + "files": None, + "archives": None, + "identity": {"identity_type": "Managed"}, + "conf": { + "spark.driver.cores": 2, + "spark.driver.memory": "1g", + "spark.executor.cores": 1, + "spark.executor.memory": "1g", + "spark.executor.instances": 1, + }, + "args": "--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", + "name": "node2", + "display_name": None, + "tags": {}, + "computeId": "rezas-synapse-10", + "inputs": { + "input1": {"job_input_type": "literal", "value": "${{parent.jobs.node1.outputs.output1}}"}, + "sample_rate": {"job_input_type": "literal", "value": "${{parent.inputs.sample_rate}}"}, + }, + "outputs": {"output1": {"value": "${{parent.outputs.pipeline_output2}}", "type": "literal"}}, + "_source": "CLASS", + }, + "node3": { + "type": "spark", + "resources": None, + "entry": {"file": "sampleword.py", "spark_job_entry_type": "SparkJobPythonEntry"}, + "py_files": None, + "jars": None, + "files": None, + "archives": None, + "identity": {"identity_type": "Managed"}, + "conf": { + "spark.driver.cores": 2, + "spark.driver.memory": "1g", + "spark.executor.cores": 1, + "spark.executor.memory": "1g", + "spark.executor.instances": 1, + }, + "args": "--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", + "name": "node3", + "display_name": None, + "tags": {}, + "computeId": "rezas-synapse-10", + "inputs": { + "input1": {"job_input_type": "literal", "value": "${{parent.jobs.node2.outputs.output1}}"}, + "sample_rate": {"job_input_type": "literal", "value": "${{parent.inputs.sample_rate}}"}, + }, + "outputs": {"output1": {"type": "literal", "value": "${{parent.outputs.pipeline_output3}}"}}, + "_source": "BUILDER", + }, + }, + "outputs": { + "pipeline_output1": {"job_output_type": "uri_folder"}, + "pipeline_output2": {"job_output_type": "uri_folder"}, + "pipeline_output3": {"job_output_type": "uri_folder"}, + }, + "settings": {}, + } + } + + def test_pipeline_with_spark_job_dynamic_allocation_disabled(self, client): + environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5" + iris_data = Input( + path="https://azuremlexamples.blob.core.windows.net/datasets/iris.csv", + type=AssetTypes.URI_FILE, + mode=InputOutputModes.DIRECT, + ) + synapse_compute_name = "rezas-synapse-10" + inputs = { + "file_input1": iris_data, + "file_input2": iris_data, + } + outputs = {"output": Output(type="uri_folder", mode=InputOutputModes.DIRECT)} + + spark_job = SparkJob( + code="./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/basic_src", + entry={"file": "sampleword.py"}, + conf={ + "spark.driver.cores": 2, + "spark.driver.memory": "1g", + "spark.executor.cores": 1, + "spark.executor.memory": "1g", + "spark.executor.instances": 1, + "spark.dynamicAllocation.minExecutors": 1, + "spark.dynamicAllocation.maxExecutors": 2, + }, + environment=environment, + inputs=inputs, + outputs=outputs, + args="--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", + compute=synapse_compute_name, + ) + + spark_job_func = to_component(job=spark_job) + + @dsl.pipeline(experiment_name="test_pipeline_with_spark_function") + def pipeline(iris_data): + node = spark_job_func(file_input1=iris_data, file_input2=iris_data) + node.compute = synapse_compute_name + return { + "pipeline_output": node.outputs.output, + } + + pipeline1 = pipeline(iris_data) + with pytest.raises(ValidationException) as ve: + pipeline1._to_rest_object().as_dict() + assert ve.message == "Should not specify min or max executors when dynamic allocation is disabled." + + def test_pipeline_with_spark_job(self): + environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5" + iris_data = Input( + path="./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/dataset/shakespeare.txt", + type=AssetTypes.URI_FILE, + mode=InputOutputModes.DIRECT, + ) + sample_rate = 0.01 + synapse_compute_name = "rezas-synapse-10" + inputs = { + "input1": iris_data, + "sample_rate": sample_rate, + } + outputs = {"output1": Output(type="uri_folder", mode=InputOutputModes.DIRECT)} + + spark_job = SparkJob( + code="./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/basic_src", + entry={"file": "sampleword.py"}, + conf={ + "spark.driver.cores": 2, + "spark.driver.memory": "1g", + "spark.executor.cores": 1, + "spark.executor.memory": "1g", + "spark.executor.instances": 1, + }, + environment=environment, + inputs=inputs, + outputs=outputs, + args="--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", + compute=synapse_compute_name, + ) + + spark_job_func = to_component(job=spark_job) + + @dsl.pipeline(experiment_name="test_pipeline_with_spark_job") + def pipeline(iris_data, sample_rate): + spark_node = spark_job_func(input1=iris_data, sample_rate=sample_rate) + spark_node.compute = synapse_compute_name + return { + "pipeline_output1": spark_node.outputs.output1, + } + + pipeline1 = pipeline(iris_data, sample_rate) + pipeline_rest_obj = pipeline1._to_rest_object() + pipeline_job1 = pipeline_rest_obj.as_dict() + + pipeline_regenerated_from_rest = PipelineJob._load_from_rest(pipeline_rest_obj) + omit_field = [ + "outputs", # TODO: figure out why outputs can't be regenerated correctly + ] + + pipeline1_dict = pipeline1._to_dict() + # Change float to string to make dict from local and rest compatible + pipeline1_dict["inputs"]["sample_rate"] = str(pipeline1_dict["inputs"]["sample_rate"]) + assert pydash.omit(pipeline1_dict, *omit_field) == pydash.omit( + pipeline_regenerated_from_rest._to_dict(), *omit_field + ) + omit_fields = [ + "properties.jobs.spark_node.componentId", + "properties.jobs.spark_node.properties", + ] + pipeline_job1 = pydash.omit(pipeline_job1, *omit_fields) + assert pipeline_job1 == { + "properties": { + "properties": {}, + "tags": {}, + "display_name": "pipeline", + "experiment_name": "test_pipeline_with_spark_job", + "is_archived": False, + "job_type": "Pipeline", + "inputs": { + "iris_data": { + "mode": "Direct", + "uri": "./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/dataset/shakespeare.txt", + "job_input_type": "uri_file", + }, + "sample_rate": {"job_input_type": "literal", "value": "0.01"}, + }, + "settings": {"_source": "DSL"}, + "jobs": { + "spark_node": { + "_source": "CLASS", + "type": "spark", + "resources": None, + "entry": {"file": "sampleword.py", "spark_job_entry_type": "SparkJobPythonEntry"}, + "py_files": None, + "jars": None, + "files": None, + "archives": None, + "identity": {"identity_type": "Managed"}, + "conf": { + "spark.driver.cores": 2, + "spark.driver.memory": "1g", + "spark.executor.cores": 1, + "spark.executor.memory": "1g", + "spark.executor.instances": 1, + }, + "args": "--input1 ${{inputs.input1}} --output2 ${{outputs.output1}} --my_sample_rate ${{inputs.sample_rate}}", + "name": "spark_node", + "display_name": None, + "tags": {}, + "computeId": "rezas-synapse-10", + "inputs": { + "input1": {"job_input_type": "literal", "value": "${{parent.inputs.iris_data}}"}, + "sample_rate": {"job_input_type": "literal", "value": "${{parent.inputs.sample_rate}}"}, + }, + "outputs": {"output1": {"type": "literal", "value": "${{parent.outputs.pipeline_output1}}"}}, + }, + }, + "outputs": {"pipeline_output1": {"job_output_type": "uri_folder"}}, + } + } + + def test_pipeline_with_parallel_job(self): + # command job with dict distribution + environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5" + inputs = { + "job_data_path": Input(type=AssetTypes.MLTABLE, path="./tests/test_configs/data", mode="eval_mount"), + } + outputs = {"job_output_path": Output(type=AssetTypes.URI_FOLDER, mode="rw_mount")} + expected_resources = {"instance_count": 2} + expected_environment_variables = {"key": "val"} + + task = ParallelTask( + type="run_function", + code="./tests/test_configs/dsl_pipeline/parallel_component_with_file_input/src/", + entry_script="score.py", + program_arguments="--job_output_path ${{outputs.job_output_path}}", + environment=environment, + ) + logging_level = "DEBUG" + max_concurrency_per_instance = 1 + error_threshold = 1 + mini_batch_error_threshold = 1 + mini_batch_size = "5" + input_data = "${{inputs.job_data_path}}" + + parallel_job = ParallelJob( + display_name="my-evaluate-job", + resources=expected_resources, + mini_batch_size=mini_batch_size, + task=task, + input_data=input_data, + logging_level=logging_level, + max_concurrency_per_instance=max_concurrency_per_instance, + error_threshold=error_threshold, + mini_batch_error_threshold=mini_batch_error_threshold, + inputs=inputs, + outputs=outputs, + environment_variables=expected_environment_variables, + ) + + parallel_job_func = to_component(job=parallel_job) + data = Input(type=AssetTypes.MLTABLE, path="/a/path/on/ds", mode="eval_mount") + + @dsl.pipeline(experiment_name="test_pipeline_with_parallel_function") + def pipeline(job_data_path): + parallel_node = parallel_job_func(job_data_path=job_data_path) + return { + "pipeline_job_out": parallel_node.outputs.job_output_path, + } + + omit_fields = [ + "name", + "properties.jobs.parallel_node.componentId", + "properties.jobs.parallel_node.properties", + ] + + pipeline1 = pipeline(data) + pipeline_rest_obj = pipeline1._to_rest_object() + pipeline_job1 = pipeline_rest_obj.as_dict() + pipeline_regenerated_from_rest = PipelineJob._load_from_rest(pipeline_rest_obj) + omit_field = [ + "jobs.parallel_node.task", + "jobs.*.properties", + "outputs", # TODO: figure out why outputs can't be regenerated correctly + ] + + assert pydash.omit(pipeline1._to_dict(), *omit_field) == pydash.omit( + pipeline_regenerated_from_rest._to_dict(), *omit_field + ) + + pipeline_job1 = pydash.omit(pipeline_job1, *omit_fields) + assert pipeline_job1 == { + "properties": { + "display_name": "pipeline", + "experiment_name": "test_pipeline_with_parallel_function", + "inputs": { + "job_data_path": {"job_input_type": "mltable", "mode": "EvalMount", "uri": "/a/path/on/ds"}, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "parallel_node": { + "_source": "CLASS", + "type": "parallel", + "input_data": "${{inputs.job_data_path}}", + "computeId": None, + "display_name": None, + "inputs": { + "job_data_path": {"job_input_type": "literal", "value": "${{parent.inputs.job_data_path}}"}, + }, + "name": "parallel_node", + "outputs": { + "job_output_path": {"type": "literal", "value": "${{parent.outputs.pipeline_job_out}}"} + }, + "resources": {"instance_count": 2, "properties": {}}, + "mini_batch_size": 5, + "retry_settings": None, + "logging_level": None, + "max_concurrency_per_instance": 1, + "error_threshold": None, + "mini_batch_error_threshold": 1, + "tags": {}, + "environment_variables": {}, + "task": { + "program_arguments": "--job_output_path " "${{outputs.job_output_path}}", + "code": "./tests/test_configs/dsl_pipeline/parallel_component_with_file_input/src/", + "entry_script": "score.py", + "environment": "azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5", + "type": "run_function", + }, + }, + }, + "outputs": {"pipeline_job_out": {"job_output_type": "uri_folder"}}, + "properties": {}, + "settings": {"_source": "DSL"}, + "tags": {}, + } + } + + def test_pipeline_with_parallel_function_inside(self): + environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5" + expected_environment_variables = {"key": "val"} + expected_resources = {"instance_count": 2} + inputs = { + "job_data_path": Input(type=AssetTypes.MLTABLE, path="./tests/test_configs/data", mode="eval_mount"), + } + input_data = "${{inputs.job_data_path}}" + outputs = {"job_output_path": Output(type=AssetTypes.URI_FOLDER, mode="rw_mount")} + task = RunFunction( + code="./tests/test_configs/dsl_pipeline/parallel_component_with_file_input/src/", + entry_script="score.py", + program_arguments="--job_output_path ${{outputs.job_output_path}}", + environment=environment, + ) + logging_level = "DEBUG" + max_concurrency_per_instance = 1 + error_threshold = 1 + mini_batch_error_threshold = 1 + mini_batch_size = "5" + + # parallel job + @dsl.pipeline(experiment_name="test_pipeline_with_parallel_function_inside") + def pipeline(path): + # Parallel from parallel_run_function() + parallel_function = parallel_run_function( + display_name="my-evaluate-job", + inputs=inputs, + outputs=outputs, + mini_batch_size=mini_batch_size, + task=task, + logging_level=logging_level, + max_concurrency_per_instance=max_concurrency_per_instance, + error_threshold=error_threshold, + mini_batch_error_threshold=mini_batch_error_threshold, + resources=expected_resources, + input_data=input_data, + environment_variables=expected_environment_variables, + ) + node1 = parallel_function(job_data_path=path) + node2 = parallel_function(job_data_path=Input(type=AssetTypes.MLTABLE, path="new_path", mode="eval_mount")) + + return { + "pipeline_output1": node1.outputs.job_output_path, + "pipeline_output2": node2.outputs.job_output_path, + } + + omit_fields = [ + "name", + "properties.jobs.node1.componentId", + "properties.jobs.node2.componentId", + "properties.jobs.node1.properties", + "properties.jobs.node2.properties", + ] + + data = Input(type=AssetTypes.MLTABLE, path="/a/path/on/ds", mode="eval_mount") + pipeline1 = pipeline(data) + pipeline_job1 = pipeline1._to_rest_object().as_dict() + pipeline_job1 = pydash.omit(pipeline_job1, omit_fields) + assert pipeline_job1 == { + "properties": { + "display_name": "pipeline", + "experiment_name": "test_pipeline_with_parallel_function_inside", + "inputs": { + "path": {"job_input_type": "mltable", "mode": "EvalMount", "uri": "/a/path/on/ds"}, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "node1": { + "_source": "BUILDER", + "type": "parallel", + "input_data": "${{inputs.job_data_path}}", + "computeId": None, + "display_name": "my-evaluate-job", + "inputs": { + "job_data_path": {"job_input_type": "literal", "value": "${{parent.inputs.path}}"}, + }, + "name": "node1", + "outputs": { + "job_output_path": {"type": "literal", "value": "${{parent.outputs.pipeline_output1}}"} + }, + "resources": {"instance_count": 2, "properties": {}}, + "mini_batch_size": 5, + "task": { + "type": "run_function", + "code": "./tests/test_configs/dsl_pipeline/parallel_component_with_file_input/src/", + "entry_script": "score.py", + "program_arguments": "--job_output_path ${{outputs.job_output_path}}", + "environment": "azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5", + }, + "retry_settings": None, + "logging_level": "DEBUG", + "max_concurrency_per_instance": 1, + "error_threshold": 1, + "mini_batch_error_threshold": 1, + "tags": {}, + "environment_variables": {"key": "val"}, + }, + "node2": { + "_source": "BUILDER", + "type": "parallel", + "input_data": "${{inputs.job_data_path}}", + "computeId": None, + "display_name": "my-evaluate-job", + "inputs": { + "job_data_path": { + "job_input_type": "mltable", + "mode": "EvalMount", + "uri": "new_path", + }, + }, + "name": "node2", + "outputs": { + "job_output_path": {"type": "literal", "value": "${{parent.outputs.pipeline_output2}}"} + }, + "resources": {"instance_count": 2, "properties": {}}, + "mini_batch_size": 5, + "task": { + "type": "run_function", + "code": "./tests/test_configs/dsl_pipeline/parallel_component_with_file_input/src/", + "entry_script": "score.py", + "program_arguments": "--job_output_path ${{outputs.job_output_path}}", + "environment": "azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5", + }, + "retry_settings": None, + "logging_level": "DEBUG", + "max_concurrency_per_instance": 1, + "error_threshold": 1, + "mini_batch_error_threshold": 1, + "tags": {}, + "environment_variables": {"key": "val"}, + }, + }, + "outputs": { + "pipeline_output1": {"job_output_type": "uri_folder"}, + "pipeline_output2": {"job_output_type": "uri_folder"}, + }, + "properties": {}, + "settings": {"_source": "DSL"}, + "tags": {}, + } + } + + def test_pipeline_with_command_function_inside(self): + environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5" + expected_resources = {"instance_count": 2} + expected_environment_variables = {"key": "val"} + inputs = { + "component_in_path": Input(type="uri_folder", path="https://my-blob/path/to/data", mode="ro_mount"), + "component_in_number": 0.01, + } + outputs = {"component_out_path": Output(type="mlflow_model", mode="rw_mount")} + + @dsl.pipeline(experiment_name="test_pipeline_with_command_function_inside") + def pipeline(number, path): + # Command from command() function + command_function = command( + display_name="my-evaluate-job", + environment=environment, + command='echo "hello world"', + distribution={"type": "Pytorch", "process_count_per_instance": 2}, + resources=expected_resources, + environment_variables=expected_environment_variables, + inputs=inputs, + outputs=outputs, + ) + node1 = command_function(component_in_number=number, component_in_path=path) + node2 = command_function(component_in_number=1, component_in_path=Input(path="new_path")) + + return { + "pipeline_output1": node1.outputs.component_out_path, + "pipeline_output2": node2.outputs.component_out_path, + } + + omit_fields = [ + "name", + "properties.jobs.node1.componentId", + "properties.jobs.node2.componentId", + "properties.jobs.node1.properties", + "properties.jobs.node2.properties", + ] + + data = Input(type=AssetTypes.URI_FOLDER, path="/a/path/on/ds") + pipeline1 = pipeline(10, data) + pipeline_job1 = pipeline1._to_rest_object().as_dict() + pipeline_job1 = pydash.omit(pipeline_job1, omit_fields) + assert pipeline_job1 == { + "properties": { + "display_name": "pipeline", + "experiment_name": "test_pipeline_with_command_function_inside", + "inputs": { + "number": {"job_input_type": "literal", "value": "10"}, + "path": {"job_input_type": "uri_folder", "uri": "/a/path/on/ds"}, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "node1": { + "type": "command", + "_source": "BUILDER", + "computeId": None, + "display_name": "my-evaluate-job", + "distribution": {"distribution_type": "PyTorch", "process_count_per_instance": 2}, + "environment_variables": {"key": "val"}, + "inputs": { + "component_in_number": {"job_input_type": "literal", "value": "${{parent.inputs.number}}"}, + "component_in_path": {"job_input_type": "literal", "value": "${{parent.inputs.path}}"}, + }, + "limits": None, + "name": "node1", + "outputs": { + "component_out_path": {"type": "literal", "value": "${{parent.outputs.pipeline_output1}}"} + }, + "resources": {"instance_count": 2, "properties": {}}, + "tags": {}, + }, + "node2": { + "type": "command", + "_source": "BUILDER", + "computeId": None, + "display_name": "my-evaluate-job", + "distribution": {"distribution_type": "PyTorch", "process_count_per_instance": 2}, + "environment_variables": {"key": "val"}, + "inputs": { + "component_in_number": {"job_input_type": "literal", "value": "1"}, + "component_in_path": { + "job_input_type": "uri_folder", + "uri": "new_path", + }, + }, + "limits": None, + "name": "node2", + "outputs": { + "component_out_path": {"type": "literal", "value": "${{parent.outputs.pipeline_output2}}"} + }, + "resources": {"instance_count": 2, "properties": {}}, + "tags": {}, + }, + }, + "outputs": { + "pipeline_output1": {"job_output_type": "uri_folder"}, + "pipeline_output2": {"job_output_type": "uri_folder"}, + }, + "properties": {}, + "settings": {"_source": "DSL"}, + "tags": {}, + } + } + def test_multi_parallel_components_with_file_input_pipeline_output(self) -> None: + components_dir = tests_root_dir / "test_configs/dsl_pipeline/parallel_component_with_file_input" + batch_inference1 = load_component(source=str(components_dir / "score.yml")) + batch_inference2 = load_component(source=str(components_dir / "score.yml")) + convert_data = load_component(source=str(components_dir / "convert_data.yml")) + + # Construct pipeline + @dsl.pipeline(default_compute="cpu-cluster", experiment_name="sdk-cli-v2") + def parallel_in_pipeline(job_data_path): + batch_inference_node1 = batch_inference1(job_data_path=job_data_path) + convert_data_node = convert_data(input_data=batch_inference_node1.outputs.job_output_path) + convert_data_node.outputs.file_output_data.type = AssetTypes.MLTABLE + batch_inference_node2 = batch_inference2(job_data_path=convert_data_node.outputs.file_output_data) + batch_inference_node2.inputs.job_data_path.mode = InputOutputModes.EVAL_MOUNT + + return {"job_out_data": batch_inference_node2.outputs.job_output_path} + + pipeline = parallel_in_pipeline( + job_data_path=Input( + type=AssetTypes.MLTABLE, + path="./tests/test_configs/dataset/mnist-data/", + mode=InputOutputModes.EVAL_MOUNT, + ), + ) + pipeline.outputs.job_out_data.mode = "upload" + omit_fields = [ + "jobs.batch_inference_node1.componentId", + "jobs.batch_inference_node1.properties", + "jobs.convert_data_node.componentId", + "jobs.convert_data_node.properties", + "jobs.batch_inference_node2.componentId", + "jobs.batch_inference_node2.properties", + ] + actual_job = pydash.omit(pipeline._to_rest_object().properties.as_dict(), *omit_fields) + assert actual_job == { + "properties": {}, + "tags": {}, + "display_name": "parallel_in_pipeline", + "experiment_name": "sdk-cli-v2", + "is_archived": False, + "job_type": "Pipeline", + "inputs": { + "job_data_path": { + "mode": "EvalMount", + "uri": "./tests/test_configs/dataset/mnist-data/", + "job_input_type": "mltable", + } + }, + "jobs": { + "batch_inference_node1": { + "_source": "YAML.COMPONENT", + "type": "parallel", + "name": "batch_inference_node1", + "display_name": None, + "tags": {}, + "computeId": None, + "inputs": { + "job_data_path": {"job_input_type": "literal", "value": "${{parent.inputs.job_data_path}}"} + }, + "outputs": {}, + "mini_batch_size": 1, + "task": { + "program_arguments": "--job_output_path " "${{outputs.job_output_path}}", + "code": "./src", + "entry_script": "score.py", + "environment": "azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:1", + "type": "run_function", + }, + "input_data": "${{inputs.job_data_path}}", + "retry_settings": None, + "logging_level": None, + "resources": {"instance_count": 2, "properties": {}}, + "max_concurrency_per_instance": 1, + "error_threshold": None, + "mini_batch_error_threshold": 1, + "environment_variables": {}, + }, + "convert_data_node": { + "_source": "YAML.COMPONENT", + "computeId": None, + "display_name": None, + "distribution": None, + "environment_variables": {}, + "inputs": { + "input_data": { + "job_input_type": "literal", + "value": "${{parent.jobs.batch_inference_node1.outputs.job_output_path}}", + } + }, + "limits": None, + "name": "convert_data_node", + "outputs": {"file_output_data": {"job_output_type": "mltable"}}, + "resources": None, + "tags": {}, + "type": "command", + }, + "batch_inference_node2": { + "_source": "YAML.COMPONENT", + "type": "parallel", + "name": "batch_inference_node2", + "display_name": None, + "tags": {}, + "computeId": None, + "inputs": { + "job_data_path": { + "job_input_type": "literal", + "value": "${{parent.jobs.convert_data_node.outputs.file_output_data}}", + "mode": "EvalMount", + } + }, + "outputs": {"job_output_path": {"value": "${{parent.outputs.job_out_data}}", "type": "literal"}}, + "mini_batch_size": 1, + "task": { + "program_arguments": "--job_output_path " "${{outputs.job_output_path}}", + "code": "./src", + "entry_script": "score.py", + "environment": "azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:1", + "type": "run_function", + }, + "input_data": "${{inputs.job_data_path}}", + "retry_settings": None, + "logging_level": None, + "resources": {"instance_count": 2, "properties": {}}, + "max_concurrency_per_instance": 1, + "error_threshold": None, + "mini_batch_error_threshold": 1, + "environment_variables": {}, + }, + }, + "outputs": {"job_out_data": {"mode": "Upload", "job_output_type": "uri_folder"}}, + "settings": {"_source": "DSL", "default_compute": "cpu-cluster"}, + } + + def test_automl_node_in_pipeline(self) -> None: + # create ClassificationJob with classification func inside pipeline is also supported + @dsl.pipeline(name="train_with_automl_in_pipeline", default_compute_target="cpu-cluster") + def train_with_automl_in_pipeline( + main_data_input, target_column_name_input: str, max_total_trials_input: int, validation_data_size: float + ): + automl_classif_job = classification( + training_data=main_data_input, + # validation_data_size="${{parent.inputs.validation_data_size}}", + target_column_name=target_column_name_input, + primary_metric="accuracy", + enable_model_explainability=True, + outputs={"best_model": Output(type="mlflow_model")}, + ) + + automl_classif_job.set_limits( + max_trials=max_total_trials_input, + max_concurrent_trials=4, # Matches number of cluster's nodes + enable_early_termination=True, + ) + + automl_classif_job.set_training(enable_onnx_compatible_models=True) + + job_input = Input( + type=AssetTypes.MLTABLE, + path="fake_path", + ) + pipeline1: PipelineJob = train_with_automl_in_pipeline(job_input, "target", 10, 0.2) + + pipeline_dict1 = pipeline1._to_rest_object().as_dict() + pipeline_dict1 = pydash.omit( + pipeline_dict1["properties"], ["jobs.automl_classif_job.display_name", "jobs.automl_classif_job.properties"] + ) + + expected_dict = { + "display_name": "train_with_automl_in_pipeline", + "inputs": { + "main_data_input": {"job_input_type": "mltable", "uri": "fake_path"}, + "max_total_trials_input": {"job_input_type": "literal", "value": "10"}, + "target_column_name_input": {"job_input_type": "literal", "value": "target"}, + "validation_data_size": {"job_input_type": "literal", "value": "0.2"}, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "automl_classif_job": { + "limits": { + "enable_early_termination": True, + "max_concurrent_trials": 4, + "max_trials": "${{parent.inputs.max_total_trials_input}}", + }, + "log_verbosity": "info", + "name": "automl_classif_job", + "outputs": {"best_model": {"job_output_type": "mlflow_model"}}, + "primary_metric": "accuracy", + "tags": {}, + "target_column_name": "${{parent.inputs.target_column_name_input}}", + "task": "classification", + "training": {"enable_model_explainability": True, "enable_onnx_compatible_models": True}, + "training_data": "${{parent.inputs.main_data_input}}", + "type": "automl", + } + }, + "outputs": {}, + "settings": {"_source": "DSL", "default_compute": "cpu-cluster"}, + "properties": {}, + "tags": {}, + } + assert pipeline_dict1 == expected_dict + + # create ClassificationJob inside pipeline is NOT supported + @dsl.pipeline(name="train_with_automl_in_pipeline", default_compute_target="cpu-cluster") + def train_with_automl_in_pipeline( + main_data_input, target_column_name_input: str, max_total_trials_input: int, validation_data_size: float + ): + automl_classif_job = ClassificationJob( + primary_metric="accuracy", + outputs={"best_model": Output(type="mlflow_model")}, + ) + automl_classif_job.set_data( + training_data=main_data_input, + target_column_name=target_column_name_input, + validation_data_size="${{parent.inputs.validation_data_size}}", + ) + + pipeline = train_with_automl_in_pipeline(job_input, "target", 10, 0.2) + # classification job defined with ClassificationJob won't be collected in pipeline job + assert pipeline.jobs == {} + + def test_automl_node_with_command_node(self): + path = "./tests/test_configs/components/helloworld_component.yml" + component_func1 = load_component(source=path) + + @dsl.pipeline(name="train_with_automl_in_pipeline", force_rerun=False) + def train_with_automl_in_pipeline(component_in_number, component_in_path, target_column_name_input: str): + node1 = component_func1(component_in_number=component_in_number, component_in_path=component_in_path) + + node2 = classification( + training_data=node1.outputs.component_out_path, + # validation_data_size="${{parent.inputs.validation_data_size}}", + target_column_name=target_column_name_input, + primary_metric="accuracy", + enable_model_explainability=True, + outputs=dict(best_model=Output(type="mlflow_model")), + ) + node2.set_limits(max_concurrent_trials=1) + + job_input = Input( + type=AssetTypes.MLTABLE, + path="fake_path", + ) + pipeline1: PipelineJob = train_with_automl_in_pipeline(10, job_input, "target") + pipeline1.compute = "cpu-cluster" + pipeline_dict1 = pipeline1._to_rest_object().as_dict() + pipeline_dict1 = pydash.omit( + pipeline_dict1["properties"], + "jobs.node1.componentId", + "jobs.node2.display_name", + "jobs.node1.properties", + "jobs.node2.properties", + ) + assert pipeline_dict1 == { + "compute_id": "cpu-cluster", + "display_name": "train_with_automl_in_pipeline", + "inputs": { + "component_in_number": {"job_input_type": "literal", "value": "10"}, + "component_in_path": {"job_input_type": "mltable", "uri": "fake_path"}, + "target_column_name_input": {"job_input_type": "literal", "value": "target"}, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "node1": { + "type": "command", + "_source": "YAML.COMPONENT", + "computeId": None, + "display_name": None, + "distribution": None, + "environment_variables": {}, + "inputs": { + "component_in_number": { + "job_input_type": "literal", + "value": "${{parent.inputs.component_in_number}}", + }, + "component_in_path": { + "job_input_type": "literal", + "value": "${{parent.inputs.component_in_path}}", + }, + }, + "limits": None, + "name": "node1", + "outputs": {}, + "resources": None, + "tags": {}, + }, + "node2": { + "limits": {"max_concurrent_trials": 1}, + "log_verbosity": "info", + "name": "node2", + "outputs": {"best_model": {"job_output_type": "mlflow_model"}}, + "primary_metric": "accuracy", + "tags": {}, + "target_column_name": "${{parent.inputs.target_column_name_input}}", + "task": "classification", + "training": {"enable_model_explainability": True}, + "training_data": "${{parent.jobs.node1.outputs.component_out_path}}", + "type": "automl", + }, + }, + "outputs": {}, + "properties": {}, + "settings": {"force_rerun": False, "_source": "DSL"}, + "tags": {}, + } + + def test_automl_node_with_pipeline_level_output(self): + @dsl.pipeline(name="train_with_automl_in_pipeline") + def train_with_automl_in_pipeline(training_data, target_column_name_input: str): + classification_node = classification( + training_data=training_data, + # validation_data_size="${{parent.inputs.validation_data_size}}", + target_column_name=target_column_name_input, + primary_metric="accuracy", + enable_model_explainability=True, + outputs=dict(best_model=Output(type="mlflow_model")), + ) + return {"pipeline_job_out_best_model": classification_node.outputs.best_model} + + job_input = Input( + type=AssetTypes.MLTABLE, + path="fake_path", + ) + pipeline1: PipelineJob = train_with_automl_in_pipeline(job_input, "target") + pipeline1.compute = "cpu-cluster" + + pipeline_dict1 = pipeline1._to_rest_object().as_dict() + pipeline_dict1 = pydash.omit( + pipeline_dict1["properties"], + ["jobs.classification_node.display_name", "jobs.classification_node.properties"], + ) + expected_dict = { + "compute_id": "cpu-cluster", + "display_name": "train_with_automl_in_pipeline", + "inputs": { + "target_column_name_input": {"job_input_type": "literal", "value": "target"}, + "training_data": {"job_input_type": "mltable", "uri": "fake_path"}, + }, + "is_archived": False, + "job_type": "Pipeline", + "jobs": { + "classification_node": { + "log_verbosity": "info", + "name": "classification_node", + "outputs": { + "best_model": {"type": "literal", "value": "${{parent.outputs.pipeline_job_out_best_model}}"} + }, + "primary_metric": "accuracy", + "tags": {}, + "target_column_name": "${{parent.inputs.target_column_name_input}}", + "task": "classification", + "training": {"enable_model_explainability": True}, + "training_data": "${{parent.inputs.training_data}}", + "type": "automl", + } + }, + # default to uri folder with rwmount + "outputs": {"pipeline_job_out_best_model": {"job_output_type": "uri_folder"}}, + "properties": {}, + "settings": {"_source": "DSL"}, + "tags": {}, + } + assert pipeline_dict1 == expected_dict + + # in order to get right type, user need to specify it on pipeline level + pipeline1.outputs.pipeline_job_out_best_model.type = "mlflow_model" + pipeline1.outputs.pipeline_job_out_best_model.mode = "rw_mount" + pipeline_dict2 = pipeline1._to_rest_object().as_dict() + pipeline_dict2 = pydash.omit( + pipeline_dict2["properties"], + ["jobs.classification_node.display_name", "jobs.classification_node.properties"], + ) + expected_dict.update( + { + "outputs": { + "pipeline_job_out_best_model": {"job_output_type": "mlflow_model", "mode": "ReadWriteMount"} + }, + } + ) + assert pipeline_dict2 == expected_dict + + def test_automl_node_without_variable_name(self) -> None: + @dsl.pipeline(name="train_with_automl_in_pipeline", default_compute_target="cpu-cluster") + def train_with_automl_in_pipeline(training_data, target_column_name_input: str): + classification( + training_data=training_data, + # validation_data_size="${{parent.inputs.validation_data_size}}", + target_column_name=target_column_name_input, + primary_metric="accuracy", + enable_model_explainability=True, + outputs=dict(best_model=Output(type="mlflow_model")), + ) + classification( + training_data=training_data, + # validation_data_size="${{parent.inputs.validation_data_size}}", + target_column_name=target_column_name_input, + primary_metric="accuracy", + enable_model_explainability=True, + outputs=dict(best_model=Output(type="mlflow_model")), + ) + regression( + training_data=training_data, + target_column_name="SalePrice", + primary_metric="r2_score", + outputs={"best_model": Output(type="mlflow_model")}, + ) + regression( + training_data=training_data, + target_column_name="SalePrice", + primary_metric="r2_score", + outputs={"best_model": Output(type="mlflow_model")}, + ) + + job_input = Input( + type=AssetTypes.MLTABLE, + path="fake_path", + ) + pipeline1: PipelineJob = train_with_automl_in_pipeline(job_input, "target") + pipeline_dict1 = pipeline1._to_rest_object().as_dict() + assert set(pipeline_dict1["properties"]["jobs"].keys()) == { + "regressionjob", + "regressionjob_1", + "classificationjob_1", + "classificationjob", + } + + def test_pipeline_with_command_services(self): + services = { + "my_jupyter": {"job_service_type": "Jupyter"}, + "my_tensorboard": { + "job_service_type": "TensorBoard", + "properties": { + "logDir": "~/tblog", + }, + }, + "my_jupyterlab": {"job_service_type": "JupyterLab"}, + } + + command_func = command( + name="test_component_with_services", + display_name="command_with_services", + environment="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5", + command=('echo "hello world" & sleep 1h'), + environment_variables={"key": "val"}, + inputs={}, + outputs={"component_out_path": Output(type="uri_folder")}, + services=services, + ) + + @dsl.pipeline( + name="test_component_with_services_pipeline", + description="The command node with services", + tags={"owner": "sdkteam", "tag": "tagvalue"}, + compute="cpu-cluster", + ) + def sample_pipeline(): + node = command_func() + return {"pipeline_output": node.outputs.component_out_path} + + pipeline = sample_pipeline() + node_services = pipeline.jobs["node"].services + + assert len(node_services) == 3 + for name, service in node_services.items(): + assert isinstance(service, JobService) + + job_rest_obj = pipeline._to_rest_object() + assert job_rest_obj.properties.jobs["node"]["services"] == services + + recovered_obj = PipelineJob._from_rest_object(job_rest_obj) + node_services = recovered_obj.jobs["node"].services + + assert len(node_services) == 3 + for name, service in node_services.items(): + assert isinstance(service, JobService) + + # test set services in pipeline + new_services = {"my_jupyter": {"job_service_type": "Jupyter"}} + + @dsl.pipeline() + def sample_pipeline_with_new_services(): + node = command_func() + node.services = new_services + + pipeline = sample_pipeline_with_new_services() + node_services = pipeline.jobs["node"].services + + assert len(node_services) == 1 + for name, service in node_services.items(): + assert isinstance(service, JobService) + + job_rest_obj = pipeline._to_rest_object() + assert job_rest_obj.properties.jobs["node"]["services"] == new_services + + def test_pipeline_with_pipeline_component_entity(self): + path = "./tests/test_configs/components/helloworld_component.yml" + component_func1 = load_component(path) + data = Data(name="test", version="1", type=AssetTypes.MLTABLE) + + @dsl.pipeline + def sub_pipeline(component_in_number, component_in_path): + node1 = component_func1(component_in_number=component_in_number, component_in_path=component_in_path) + return {"pipeline_out": node1.outputs.component_out_path} + + @dsl.pipeline + def root_pipeline(component_in_number, component_in_path): + node1 = sub_pipeline(component_in_number=component_in_number, component_in_path=component_in_path) + sub_pipeline(component_in_number=2, component_in_path=data) + return {"pipeline_out": node1.outputs.pipeline_out} + + pipeline = root_pipeline(1, data) + pipeline_dict = pipeline._to_dict() + assert pipeline_dict["jobs"]["node1"]["inputs"] == { + "component_in_number": {"path": "${{parent.inputs.component_in_number}}"}, + "component_in_path": {"path": "${{parent.inputs.component_in_path}}"}, + } + assert pipeline_dict["jobs"]["node1_1"]["inputs"] == { + "component_in_number": 2, + "component_in_path": {"type": "mltable", "path": "azureml:test:1"}, + } diff --git a/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_init_finalize_job.py b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_init_finalize_job.py new file mode 100644 index 000000000000..2ae23a843417 --- /dev/null +++ b/sdk/ml/azure-ai-ml/tests/dsl/unittests/test_init_finalize_job.py @@ -0,0 +1,283 @@ +from functools import partial +from pathlib import Path + +import pydash +import pytest +from azure.ai.ml import Input, dsl, load_component +from azure.ai.ml.constants._common import ( + AssetTypes, + InputOutputModes, +) +from azure.ai.ml.entities import PipelineJob +from azure.ai.ml.entities._builders import Spark + +from .._util import _DSL_TIMEOUT_SECOND + +tests_root_dir = Path(__file__).parent.parent.parent +components_dir = tests_root_dir / "test_configs/components/" + + +@pytest.mark.usefixtures("enable_pipeline_private_preview_features") +@pytest.mark.timeout(_DSL_TIMEOUT_SECOND) +@pytest.mark.unittest +class TestInitFinalizeJob: + component_func = partial( + load_component(str(components_dir / "echo_string_component.yml")), + component_in_string="not important", + ) + hello_world_func = load_component(str(components_dir / "helloworld_component.yml")) + + def test_init_finalize_job(self) -> None: + from azure.ai.ml._internal.dsl import set_pipeline_settings + from azure.ai.ml.dsl import pipeline + + def assert_pipeline_job_init_finalize_job(pipeline_job: PipelineJob): + assert pipeline_job._validate_init_finalize_job().passed + assert pipeline_job.settings.on_init == "init_job" + assert pipeline_job.settings.on_finalize == "finalize_job" + pipeline_job_dict = pipeline_job._to_rest_object().as_dict() + assert pipeline_job_dict["properties"]["settings"]["on_init"] == "init_job" + assert pipeline_job_dict["properties"]["settings"]["on_finalize"] == "finalize_job" + + # pipeline.settings.on_init/on_finalize + @pipeline() + def job_settings_func(): + init_job = self.component_func() # noqa: F841 + work1 = self.component_func() # noqa: F841 + work2 = self.component_func() # noqa: F841 + finalize_job = self.component_func() # noqa: F841 + + pipeline1 = job_settings_func() + pipeline1.settings.on_init = "init_job" + pipeline1.settings.on_finalize = "finalize_job" + assert_pipeline_job_init_finalize_job(pipeline1) + + # dsl.settings() + @pipeline() + def dsl_settings_func(): + init_job = self.component_func() + work1 = self.component_func() # noqa: F841 + work2 = self.component_func() # noqa: F841 + finalize_job = self.component_func() # noqa: F841 + # `set_pipeline_settings` can receive either `BaseNode` or str, both should work + set_pipeline_settings(on_init=init_job, on_finalize="finalize_job") + + pipeline2 = dsl_settings_func() + assert_pipeline_job_init_finalize_job(pipeline2) + + # @pipeline(on_init, on_finalize) + @pipeline( + on_init="init_job", + on_finalize="finalize_job", + ) + def in_decorator_func(): + init_job = self.component_func() # noqa: F841 + work1 = self.component_func() # noqa: F841 + work2 = self.component_func() # noqa: F841 + finalize_job = self.component_func() # noqa: F841 + + pipeline3 = in_decorator_func() + assert_pipeline_job_init_finalize_job(pipeline3) + + def test_invalid_init_finalize_job(self) -> None: + # invalid case: job name not exists + @dsl.pipeline() + def invalid_init_finalize_job_func(): + self.component_func() + + invalid_pipeline1 = invalid_init_finalize_job_func() + invalid_pipeline1.settings.on_init = "init_job" + invalid_pipeline1.settings.on_finalize = "finalize_job" + validation_result1 = invalid_pipeline1._validate_init_finalize_job() + assert not validation_result1.passed + assert validation_result1.error_messages["settings.on_init"] == "On_init job name init_job not exists in jobs." + assert ( + validation_result1.error_messages["settings.on_finalize"] + == "On_finalize job name finalize_job not exists in jobs." + ) + + # invalid case: no normal node, on_init/on_finalize job is not isolated + @dsl.pipeline() + def init_finalize_with_invalid_connection_func(int_param: int, str_param: str): + node1 = self.hello_world_func(component_in_number=int_param, component_in_path=str_param) + node2 = self.hello_world_func( # noqa: F841 + component_in_number=int_param, + component_in_path=node1.outputs.component_out_path, + ) + + invalid_pipeline2 = init_finalize_with_invalid_connection_func(int_param=0, str_param="str") + invalid_pipeline2.settings.on_init = "node2" + invalid_pipeline2.settings.on_finalize = "node1" + validation_result2 = invalid_pipeline2._validate_init_finalize_job() + assert not validation_result2.passed + assert validation_result2.error_messages["jobs"] == "No other job except for on_init/on_finalize job." + assert ( + validation_result2.error_messages["settings.on_init"] + == "On_init job should not have connection to other execution node." + ) + assert ( + validation_result2.error_messages["settings.on_finalize"] + == "On_finalize job should not have connection to other execution node." + ) + + # invalid case: call `set_pipeline_settings` out of `pipeline` decorator + from azure.ai.ml._internal.dsl import set_pipeline_settings + from azure.ai.ml.exceptions import UserErrorException + + with pytest.raises(UserErrorException) as e: + set_pipeline_settings(on_init="init_job", on_finalize="finalize_job") + assert str(e.value) == "Please call `set_pipeline_settings` inside a `pipeline` decorated function." + + # invalid case: set on_init for pipeline component + @dsl.pipeline + def subgraph_func(): + node = self.component_func() + set_pipeline_settings(on_init=node) # set on_init for subgraph (pipeline component) + + @dsl.pipeline + def subgraph_with_init_func(): + subgraph_func() + self.component_func() + + with pytest.raises(UserErrorException) as e: + subgraph_with_init_func() + assert str(e.value) == "On_init/on_finalize is not supported for pipeline component." + + def test_init_finalize_job_with_subgraph(self, caplog) -> None: + from azure.ai.ml._internal.dsl import set_pipeline_settings + + # happy path + @dsl.pipeline() + def subgraph_func(): + node = self.component_func() + node.compute = "cpu-cluster" + + @dsl.pipeline() + def subgraph_init_finalize_job_func(): + init_job = subgraph_func() + subgraph_work = subgraph_func() # noqa: F841 + finalize_job = subgraph_func() + set_pipeline_settings(on_init=init_job, on_finalize=finalize_job) + + valid_pipeline = subgraph_init_finalize_job_func() + assert valid_pipeline._validate().passed + assert valid_pipeline.settings.on_init == "init_job" + assert valid_pipeline.settings.on_finalize == "finalize_job" + + def test_dsl_pipeline_with_spark_hobo(self) -> None: + add_greeting_column_func = load_component( + "./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/add_greeting_column_component.yml" + ) + count_by_row_func = load_component( + "./tests/test_configs/dsl_pipeline/spark_job_in_pipeline/count_by_row_component.yml" + ) + + @dsl.pipeline(description="submit a pipeline with spark job") + def spark_pipeline_from_yaml(iris_data): + add_greeting_column = add_greeting_column_func(file_input=iris_data) + add_greeting_column.resources = {"instance_type": "Standard_E8S_V3", "runtime_version": "3.1.0"} + count_by_row = count_by_row_func(file_input=iris_data) + count_by_row.resources = {"instance_type": "Standard_E8S_V3", "runtime_version": "3.1.0"} + count_by_row.identity = {"type": "managed"} + + return {"output": count_by_row.outputs.output} + + dsl_pipeline: PipelineJob = spark_pipeline_from_yaml( + iris_data=Input( + path="https://azuremlexamples.blob.core.windows.net/datasets/iris.csv", + type=AssetTypes.URI_FILE, + mode=InputOutputModes.DIRECT, + ), + ) + dsl_pipeline.outputs.output.mode = "Direct" + + spark_node = dsl_pipeline.jobs["add_greeting_column"] + job_data_path_input = spark_node.inputs["file_input"]._meta + assert job_data_path_input + # spark_node.component._id = "azureml:test_component:1" + spark_node_dict = spark_node._to_dict() + + spark_node_rest_obj = spark_node._to_rest_object() + regenerated_spark_node = Spark._from_rest_object(spark_node_rest_obj) + + spark_node_dict_from_rest = regenerated_spark_node._to_dict() + omit_fields = [] + assert pydash.omit(spark_node_dict, *omit_fields) == pydash.omit(spark_node_dict_from_rest, *omit_fields) + omit_fields = [ + "jobs.add_greeting_column.componentId", + "jobs.count_by_row.componentId", + "jobs.add_greeting_column.properties", + "jobs.count_by_row.properties", + ] + actual_job = pydash.omit(dsl_pipeline._to_rest_object().properties.as_dict(), *omit_fields) + assert actual_job == { + "description": "submit a pipeline with spark job", + "properties": {}, + "tags": {}, + "display_name": "spark_pipeline_from_yaml", + "is_archived": False, + "job_type": "Pipeline", + "inputs": { + "iris_data": { + "mode": "Direct", + "uri": "https://azuremlexamples.blob.core.windows.net/datasets/iris.csv", + "job_input_type": "uri_file", + } + }, + "jobs": { + "add_greeting_column": { + "type": "spark", + "resources": {"instance_type": "Standard_E8S_V3", "runtime_version": "3.1.0"}, + "entry": {"file": "add_greeting_column.py", "spark_job_entry_type": "SparkJobPythonEntry"}, + "py_files": ["utils.zip"], + "files": ["my_files.txt"], + "archives": None, + "jars": None, + "identity": {"identity_type": "UserIdentity"}, + "conf": { + "spark.driver.cores": 2, + "spark.driver.memory": "1g", + "spark.executor.cores": 1, + "spark.executor.memory": "1g", + "spark.executor.instances": 1, + }, + "args": "--file_input ${{inputs.file_input}}", + "name": "add_greeting_column", + "display_name": None, + "tags": {}, + "computeId": None, + "inputs": { + "file_input": {"job_input_type": "literal", "value": "${{parent.inputs.iris_data}}"}, + }, + "outputs": {}, + "_source": "YAML.COMPONENT", + }, + "count_by_row": { + "_source": "YAML.COMPONENT", + "archives": None, + "args": "--file_input ${{inputs.file_input}} " "--output ${{outputs.output}}", + "computeId": None, + "conf": { + "spark.driver.cores": 2, + "spark.driver.memory": "1g", + "spark.executor.cores": 1, + "spark.executor.instances": 1, + "spark.executor.memory": "1g", + }, + "display_name": None, + "entry": {"file": "count_by_row.py", "spark_job_entry_type": "SparkJobPythonEntry"}, + "files": ["my_files.txt"], + "identity": {"identity_type": "Managed"}, + "inputs": {"file_input": {"job_input_type": "literal", "value": "${{parent.inputs.iris_data}}"}}, + "jars": ["scalaproj.jar"], + "name": "count_by_row", + "outputs": {"output": {"type": "literal", "value": "${{parent.outputs.output}}"}}, + "py_files": None, + "resources": {"instance_type": "Standard_E8S_V3", "runtime_version": "3.1.0"}, + "tags": {}, + "type": "spark", + }, + }, + "outputs": {"output": {"job_output_type": "uri_folder", "mode": "Direct"}}, + "settings": {"_source": "DSL"}, + } From 734f717c26bf378e9b8e8b3f4489f1fb7c9886c2 Mon Sep 17 00:00:00 2001 From: zhangxingzhi Date: Wed, 19 Oct 2022 15:31:26 +0800 Subject: [PATCH 4/5] feat: load labelled arm id --- .../azure/ai/ml/_utils/_arm_id_utils.py | 43 + .../azure/ai/ml/constants/_common.py | 1 + .../ml/operations/_operation_orchestrator.py | 9 +- sdk/ml/azure-ai-ml/tests/conftest.py | 2 +- .../tests/dsl/e2etests/test_dsl_pipeline.py | 22 + ...t_dsl_pipeline_with_default_component.json | 837 ++++++++++++++++++ 6 files changed, 912 insertions(+), 2 deletions(-) create mode 100644 sdk/ml/azure-ai-ml/tests/recordings/dsl/e2etests/test_dsl_pipeline.pyTestDSLPipelinetest_dsl_pipeline_with_default_component.json diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_arm_id_utils.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_arm_id_utils.py index b6bf52bceaa0..90f77222a915 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_arm_id_utils.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_arm_id_utils.py @@ -81,6 +81,49 @@ def get_datastore_arm_id(datastore_name: str = None, operation_scope: OperationS ) +class AMLLabelledArmId(object): + """Parser for versioned arm id: e.g. /subscription/.../code/my- + code/labels/default. + + :param arm_id: The labelled ARM id. + :type arm_id: str + :raises ~azure.ai.ml.exceptions.ValidationException: Raised if the ARM id is incorrectly formatted. + """ + + REGEX_PATTERN = ( + "^/?subscriptions/([^/]+)/resourceGroups/([" + "^/]+)/providers/Microsoft.MachineLearningServices/workspaces/([^/]+)/([^/]+)/([^/]+)/labels/([" + "^/]+)" + ) + + def __init__(self, arm_id=None): + self.is_registry_id = None + if arm_id: + match = re.match(AMLLabelledArmId.REGEX_PATTERN, arm_id) + if match: + self.subscription_id = match.group(1) + self.resource_group_name = match.group(2) + self.workspace_name = match.group(3) + self.asset_type = match.group(4) + self.asset_name = match.group(5) + self.asset_label = match.group(6) + else: + match = re.match(REGISTRY_VERSION_PATTERN, arm_id) + if match: + self.asset_name = match.group(3) + self.asset_label = match.group(4) + self.is_registry_id = True + else: + msg = "Invalid AzureML ARM versioned Id {}" + raise ValidationException( + message=msg.format(arm_id), + no_personal_data_message=msg.format("[arm_id]"), + error_type=ValidationErrorType.INVALID_VALUE, + error_category=ErrorCategory.USER_ERROR, + target=ErrorTarget.ARM_RESOURCE, + ) + + class AMLNamedArmId: """Parser for named arm id (no version): e.g. /subscription/.../compute/cpu-cluster. diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_common.py b/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_common.py index 31f900cd62fa..74e80ff6cbc4 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_common.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_common.py @@ -37,6 +37,7 @@ ) ASSET_ID_FORMAT = "azureml://locations/{}/workspaces/{}/{}/{}/versions/{}" VERSIONED_RESOURCE_NAME = "{}:{}" +LABELLED_RESOURCE_NAME = "{}@{}" PYTHON = "python" AML_TOKEN_YAML = "aml_token" AAD_TOKEN_YAML = "aad_token" diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_operation_orchestrator.py b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_operation_orchestrator.py index d04fcde18cae..081c17044fe9 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_operation_orchestrator.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_operation_orchestrator.py @@ -15,6 +15,7 @@ from azure.ai.ml._utils._arm_id_utils import ( AMLNamedArmId, AMLVersionedArmId, + AMLLabelledArmId, get_arm_id_with_version, is_ARM_id_for_resource, is_registry_id_for_resource, @@ -23,7 +24,6 @@ ) from azure.ai.ml._utils._asset_utils import _resolve_label_to_asset from azure.ai.ml._utils._storage_utils import AzureMLDatastorePathUri -from azure.ai.ml._utils.utils import is_private_preview_enabled # pylint: disable=unused-import from azure.ai.ml.constants._common import ( ARM_ID_PREFIX, AZUREML_RESOURCE_PROVIDER, @@ -36,6 +36,7 @@ NAMED_RESOURCE_ID_FORMAT, VERSIONED_RESOURCE_ID_FORMAT, VERSIONED_RESOURCE_NAME, + LABELLED_RESOURCE_NAME, AzureMLResourceType, ) from azure.ai.ml.entities import Component @@ -396,6 +397,12 @@ def resolve_azureml_id(self, arm_id: str = None, **kwargs) -> str: return VERSIONED_RESOURCE_NAME.format(arm_id_obj.asset_name, arm_id_obj.asset_version) except ValidationException: pass # fall back to named arm id + try: + arm_id_obj = AMLLabelledArmId(arm_id) + if self._match(arm_id_obj): + return LABELLED_RESOURCE_NAME.format(arm_id_obj.asset_name, arm_id_obj.asset_label) + except ValidationException: + pass # fall back to named arm id try: arm_id_obj = AMLNamedArmId(arm_id) if self._match(arm_id_obj): diff --git a/sdk/ml/azure-ai-ml/tests/conftest.py b/sdk/ml/azure-ai-ml/tests/conftest.py index e6e82d433fb2..2bf7d45a3184 100644 --- a/sdk/ml/azure-ai-ml/tests/conftest.py +++ b/sdk/ml/azure-ai-ml/tests/conftest.py @@ -529,7 +529,7 @@ def enable_pipeline_private_preview_features(mocker: MockFixture): @pytest.fixture() def enable_environment_id_arm_expansion(mocker: MockFixture): - mocker.patch("azure.ai.ml.operations._operation_orchestrator.is_private_preview_enabled", return_value=False) + mocker.patch("azure.ai.ml._utils.utils.is_private_preview_enabled", return_value=False) @pytest.fixture(autouse=True) diff --git a/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_dsl_pipeline.py b/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_dsl_pipeline.py index 6e0e609d38db..e2e8c0107c30 100644 --- a/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_dsl_pipeline.py +++ b/sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_dsl_pipeline.py @@ -2410,3 +2410,25 @@ def pipeline_with_group(group: ParamClass): } assert actual_job["inputs"] == expected_job_inputs assert actual_job["jobs"]["microsoft_samples_command_component_basic_inputs"]["inputs"] == expected_node_inputs + + def test_dsl_pipeline_with_default_component( + self, + client: MLClient, + randstr: Callable[[str], str], + ) -> None: + yaml_path: str = "./tests/test_configs/components/helloworld_component.yml" + component_name = randstr("component_name") + component: Component = load_component(source=yaml_path, params_override=[{"name": component_name}]) + client.components.create_or_update(component) + + default_component_func = client.components.get(component_name) + + @dsl.pipeline() + def pipeline_with_default_component(): + node1 = default_component_func(component_in_path=job_input) + node1.compute = "cpu-cluster" + + # component from client.components.get + pipeline_job = client.jobs.create_or_update(pipeline_with_default_component()) + created_pipeline_job: PipelineJob = client.jobs.get(pipeline_job.name) + assert created_pipeline_job.jobs["node1"].component == f"{component_name}@default" diff --git a/sdk/ml/azure-ai-ml/tests/recordings/dsl/e2etests/test_dsl_pipeline.pyTestDSLPipelinetest_dsl_pipeline_with_default_component.json b/sdk/ml/azure-ai-ml/tests/recordings/dsl/e2etests/test_dsl_pipeline.pyTestDSLPipelinetest_dsl_pipeline_with_default_component.json new file mode 100644 index 000000000000..72d669035a24 --- /dev/null +++ b/sdk/ml/azure-ai-ml/tests/recordings/dsl/e2etests/test_dsl_pipeline.pyTestDSLPipelinetest_dsl_pipeline_with_default_component.json @@ -0,0 +1,837 @@ +{ + "Entries": [ + { + "RequestUri": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/datastores/workspaceblobstore?api-version=2022-05-01", + "RequestMethod": "GET", + "RequestHeaders": { + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "User-Agent": "azure-ai-ml/1.1.0 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)" + }, + "RequestBody": null, + "StatusCode": 200, + "ResponseHeaders": { + "Cache-Control": "no-cache", + "Content-Encoding": "gzip", + "Content-Type": "application/json; charset=utf-8", + "Date": "Wed, 19 Oct 2022 06:59:06 GMT", + "Expires": "-1", + "Pragma": "no-cache", + "Request-Context": "appId=cid-v1:512cc15a-13b5-415b-bfd0-dce7accb6bb1", + "Server-Timing": "traceparent;desc=\u002200-8988bcbd5dce49a27023d2e8a6cd2458-3ff442b4228c8a9a-01\u0022", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains", + "Transfer-Encoding": "chunked", + "Vary": [ + "Accept-Encoding", + "Accept-Encoding" + ], + "x-aml-cluster": "vienna-test-westus2-01", + "X-Content-Type-Options": "nosniff", + "x-ms-correlation-request-id": "9539b78b-0852-47f9-9925-ad1d853a4d39", + "x-ms-ratelimit-remaining-subscription-reads": "11996", + "x-ms-response-type": "standard", + "x-ms-routing-request-id": "JAPANEAST:20221019T065906Z:9539b78b-0852-47f9-9925-ad1d853a4d39", + "x-request-time": "0.951" + }, + "ResponseBody": { + "id": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/datastores/workspaceblobstore", + "name": "workspaceblobstore", + "type": "Microsoft.MachineLearningServices/workspaces/datastores", + "properties": { + "description": null, + "tags": null, + "properties": null, + "isDefault": true, + "credentials": { + "credentialsType": "AccountKey" + }, + "datastoreType": "AzureBlob", + "accountName": "sagvgsoim6nmhbq", + "containerName": "azureml-blobstore-e61cd5e2-512f-475e-9842-5e2a973993b8", + "endpoint": "core.windows.net", + "protocol": "https", + "serviceDataAccessAuthIdentity": "WorkspaceSystemAssignedIdentity" + }, + "systemData": { + "createdAt": "2022-09-22T09:02:03.2629568\u002B00:00", + "createdBy": "779301c0-18b2-4cdc-801b-a0a3368fee0a", + "createdByType": "Application", + "lastModifiedAt": "2022-09-22T09:02:04.166989\u002B00:00", + "lastModifiedBy": "779301c0-18b2-4cdc-801b-a0a3368fee0a", + "lastModifiedByType": "Application" + } + } + }, + { + "RequestUri": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/datastores/workspaceblobstore/listSecrets?api-version=2022-05-01", + "RequestMethod": "POST", + "RequestHeaders": { + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "Content-Length": "0", + "User-Agent": "azure-ai-ml/1.1.0 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)" + }, + "RequestBody": null, + "StatusCode": 200, + "ResponseHeaders": { + "Cache-Control": "no-cache", + "Content-Encoding": "gzip", + "Content-Type": "application/json; charset=utf-8", + "Date": "Wed, 19 Oct 2022 06:59:07 GMT", + "Expires": "-1", + "Pragma": "no-cache", + "Request-Context": "appId=cid-v1:512cc15a-13b5-415b-bfd0-dce7accb6bb1", + "Server-Timing": "traceparent;desc=\u002200-c59051a6fa42566960cd0292c3d17a89-296dbabd177fe6e9-01\u0022", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains", + "Transfer-Encoding": "chunked", + "Vary": "Accept-Encoding", + "x-aml-cluster": "vienna-test-westus2-01", + "X-Content-Type-Options": "nosniff", + "x-ms-correlation-request-id": "ec0238cb-676c-4a76-aabc-2d2ded86feaa", + "x-ms-ratelimit-remaining-subscription-writes": "1198", + "x-ms-response-type": "standard", + "x-ms-routing-request-id": "JAPANEAST:20221019T065907Z:ec0238cb-676c-4a76-aabc-2d2ded86feaa", + "x-request-time": "0.486" + }, + "ResponseBody": { + "secretsType": "AccountKey", + "key": "dGhpcyBpcyBmYWtlIGtleQ==" + } + }, + { + "RequestUri": "https://sagvgsoim6nmhbq.blob.core.windows.net/azureml-blobstore-e61cd5e2-512f-475e-9842-5e2a973993b8/LocalUpload/00000000000000000000000000000000/COMPONENT_PLACEHOLDER", + "RequestMethod": "HEAD", + "RequestHeaders": { + "Accept": "application/xml", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "User-Agent": "azsdk-python-storage-blob/12.14.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)", + "x-ms-date": "Wed, 19 Oct 2022 06:59:07 GMT", + "x-ms-version": "2021-08-06" + }, + "RequestBody": null, + "StatusCode": 200, + "ResponseHeaders": { + "Accept-Ranges": "bytes", + "Content-Length": "35", + "Content-MD5": "L/DnSpFIn\u002BjaQWc\u002BsUQdcw==", + "Content-Type": "application/octet-stream", + "Date": "Wed, 19 Oct 2022 06:59:08 GMT", + "ETag": "\u00220x8DA9D48E17467D7\u0022", + "Last-Modified": "Fri, 23 Sep 2022 09:49:17 GMT", + "Server": [ + "Windows-Azure-Blob/1.0", + "Microsoft-HTTPAPI/2.0" + ], + "Vary": "Origin", + "x-ms-access-tier": "Hot", + "x-ms-access-tier-inferred": "true", + "x-ms-blob-type": "BlockBlob", + "x-ms-creation-time": "Fri, 23 Sep 2022 09:49:16 GMT", + "x-ms-lease-state": "available", + "x-ms-lease-status": "unlocked", + "x-ms-meta-name": "9c9cfba9-82bd-45db-ad06-07009d1d9672", + "x-ms-meta-upload_status": "completed", + "x-ms-meta-version": "1", + "x-ms-server-encrypted": "true", + "x-ms-version": "2021-08-06" + }, + "ResponseBody": null + }, + { + "RequestUri": "https://sagvgsoim6nmhbq.blob.core.windows.net/azureml-blobstore-e61cd5e2-512f-475e-9842-5e2a973993b8/az-ml-artifacts/00000000000000000000000000000000/COMPONENT_PLACEHOLDER", + "RequestMethod": "HEAD", + "RequestHeaders": { + "Accept": "application/xml", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "User-Agent": "azsdk-python-storage-blob/12.14.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)", + "x-ms-date": "Wed, 19 Oct 2022 06:59:08 GMT", + "x-ms-version": "2021-08-06" + }, + "RequestBody": null, + "StatusCode": 404, + "ResponseHeaders": { + "Date": "Wed, 19 Oct 2022 06:59:08 GMT", + "Server": [ + "Windows-Azure-Blob/1.0", + "Microsoft-HTTPAPI/2.0" + ], + "Transfer-Encoding": "chunked", + "Vary": "Origin", + "x-ms-error-code": "BlobNotFound", + "x-ms-version": "2021-08-06" + }, + "ResponseBody": null + }, + { + "RequestUri": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/codes/9c9cfba9-82bd-45db-ad06-07009d1d9672/versions/1?api-version=2022-05-01", + "RequestMethod": "PUT", + "RequestHeaders": { + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "Content-Length": "288", + "Content-Type": "application/json", + "User-Agent": "azure-ai-ml/1.1.0 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)" + }, + "RequestBody": { + "properties": { + "properties": { + "hash_sha256": "0000000000000", + "hash_version": "0000000000000" + }, + "isAnonymous": true, + "isArchived": false, + "codeUri": "https://sagvgsoim6nmhbq.blob.core.windows.net/azureml-blobstore-e61cd5e2-512f-475e-9842-5e2a973993b8/LocalUpload/00000000000000000000000000000000" + } + }, + "StatusCode": 200, + "ResponseHeaders": { + "Cache-Control": "no-cache", + "Content-Encoding": "gzip", + "Content-Type": "application/json; charset=utf-8", + "Date": "Wed, 19 Oct 2022 06:59:09 GMT", + "Expires": "-1", + "Pragma": "no-cache", + "Request-Context": "appId=cid-v1:512cc15a-13b5-415b-bfd0-dce7accb6bb1", + "Server-Timing": "traceparent;desc=\u002200-95db65ac332ee29d02e0fb62cb03e49f-75a4383ca8c0cd59-01\u0022", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains", + "Transfer-Encoding": "chunked", + "Vary": [ + "Accept-Encoding", + "Accept-Encoding" + ], + "x-aml-cluster": "vienna-test-westus2-01", + "X-Content-Type-Options": "nosniff", + "x-ms-correlation-request-id": "61765ca3-aeb5-4d81-b5bd-16f426f05cde", + "x-ms-ratelimit-remaining-subscription-writes": "1198", + "x-ms-response-type": "standard", + "x-ms-routing-request-id": "JAPANEAST:20221019T065910Z:61765ca3-aeb5-4d81-b5bd-16f426f05cde", + "x-request-time": "0.915" + }, + "ResponseBody": { + "id": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/codes/9c9cfba9-82bd-45db-ad06-07009d1d9672/versions/1", + "name": "1", + "type": "Microsoft.MachineLearningServices/workspaces/codes/versions", + "properties": { + "description": null, + "tags": {}, + "properties": { + "hash_sha256": "0000000000000", + "hash_version": "0000000000000" + }, + "isArchived": false, + "isAnonymous": false, + "codeUri": "https://sagvgsoim6nmhbq.blob.core.windows.net/azureml-blobstore-e61cd5e2-512f-475e-9842-5e2a973993b8/LocalUpload/00000000000000000000000000000000" + }, + "systemData": { + "createdAt": "2022-09-23T09:49:20.984936\u002B00:00", + "createdBy": "Ying Chen", + "createdByType": "User", + "lastModifiedAt": "2022-10-19T06:59:10.1185461\u002B00:00", + "lastModifiedBy": "Xingzhi Zhang", + "lastModifiedByType": "User" + } + } + }, + { + "RequestUri": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/components/test_691263607553/versions/0.0.1?api-version=2022-05-01", + "RequestMethod": "PUT", + "RequestHeaders": { + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "Content-Length": "1286", + "Content-Type": "application/json", + "User-Agent": "azure-ai-ml/1.1.0 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)" + }, + "RequestBody": { + "properties": { + "description": "This is the basic command component", + "properties": {}, + "tags": { + "tag": "tagvalue", + "owner": "sdkteam" + }, + "isAnonymous": false, + "isArchived": false, + "componentSpec": { + "command": "echo Hello World \u0026 echo $[[${{inputs.component_in_number}}]] \u0026 echo ${{inputs.component_in_path}} \u0026 echo ${{outputs.component_out_path}} \u003E ${{outputs.component_out_path}}/component_in_number", + "code": "azureml:/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/codes/9c9cfba9-82bd-45db-ad06-07009d1d9672/versions/1", + "environment": "azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:1", + "name": "test_691263607553", + "description": "This is the basic command component", + "tags": { + "tag": "tagvalue", + "owner": "sdkteam" + }, + "version": "0.0.1", + "$schema": "https://azuremlschemas.azureedge.net/development/commandComponent.schema.json", + "display_name": "CommandComponentBasic", + "is_deterministic": true, + "inputs": { + "component_in_number": { + "type": "number", + "optional": true, + "default": "10.99", + "description": "A number" + }, + "component_in_path": { + "type": "uri_folder", + "description": "A path" + } + }, + "outputs": { + "component_out_path": { + "type": "uri_folder" + } + }, + "type": "command", + "_source": "YAML.COMPONENT" + } + } + }, + "StatusCode": 201, + "ResponseHeaders": { + "Cache-Control": "no-cache", + "Content-Length": "2217", + "Content-Type": "application/json; charset=utf-8", + "Date": "Wed, 19 Oct 2022 06:59:14 GMT", + "Expires": "-1", + "Location": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/components/test_691263607553/versions/0.0.1?api-version=2022-05-01", + "Pragma": "no-cache", + "Request-Context": "appId=cid-v1:512cc15a-13b5-415b-bfd0-dce7accb6bb1", + "Server-Timing": "traceparent;desc=\u002200-5fa3a1c92bfa8b0cafdbf672a6d428cf-e9463c1292994c18-01\u0022", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains", + "x-aml-cluster": "vienna-test-westus2-01", + "X-Content-Type-Options": "nosniff", + "x-ms-correlation-request-id": "91b71f1c-4be2-4d7d-8d28-da616d5c2c9c", + "x-ms-ratelimit-remaining-subscription-writes": "1197", + "x-ms-response-type": "standard", + "x-ms-routing-request-id": "JAPANEAST:20221019T065914Z:91b71f1c-4be2-4d7d-8d28-da616d5c2c9c", + "x-request-time": "3.668" + }, + "ResponseBody": { + "id": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/components/test_691263607553/versions/0.0.1", + "name": "0.0.1", + "type": "Microsoft.MachineLearningServices/workspaces/components/versions", + "properties": { + "description": null, + "tags": { + "tag": "tagvalue", + "owner": "sdkteam" + }, + "properties": {}, + "isArchived": false, + "isAnonymous": false, + "componentSpec": { + "name": "test_691263607553", + "version": "0.0.1", + "display_name": "CommandComponentBasic", + "is_deterministic": "True", + "type": "command", + "description": "This is the basic command component", + "tags": { + "tag": "tagvalue", + "owner": "sdkteam" + }, + "inputs": { + "component_in_path": { + "type": "uri_folder", + "optional": "False", + "description": "A path" + }, + "component_in_number": { + "type": "number", + "optional": "True", + "default": "10.99", + "description": "A number" + } + }, + "outputs": { + "component_out_path": { + "type": "uri_folder" + } + }, + "code": "azureml:/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/codes/9c9cfba9-82bd-45db-ad06-07009d1d9672/versions/1", + "environment": "azureml://registries/azureml-dev/environments/AzureML-sklearn-0.24-ubuntu18.04-py37-cpu/versions/1", + "resources": { + "instance_count": "1" + }, + "command": "echo Hello World \u0026 echo $[[${{inputs.component_in_number}}]] \u0026 echo ${{inputs.component_in_path}} \u0026 echo ${{outputs.component_out_path}} \u003E ${{outputs.component_out_path}}/component_in_number", + "$schema": "https://azuremlschemas.azureedge.net/development/commandComponent.schema.json" + } + }, + "systemData": { + "createdAt": "2022-10-19T06:59:13.3917039\u002B00:00", + "createdBy": "Xingzhi Zhang", + "createdByType": "User", + "lastModifiedAt": "2022-10-19T06:59:14.041114\u002B00:00", + "lastModifiedBy": "Xingzhi Zhang", + "lastModifiedByType": "User" + } + } + }, + { + "RequestUri": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/components/test_691263607553/versions/azureml_default?api-version=2022-05-01", + "RequestMethod": "GET", + "RequestHeaders": { + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "User-Agent": "azure-ai-ml/1.1.0 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)" + }, + "RequestBody": null, + "StatusCode": 200, + "ResponseHeaders": { + "Cache-Control": "no-cache", + "Content-Encoding": "gzip", + "Content-Type": "application/json; charset=utf-8", + "Date": "Wed, 19 Oct 2022 06:59:14 GMT", + "Expires": "-1", + "Pragma": "no-cache", + "Request-Context": "appId=cid-v1:512cc15a-13b5-415b-bfd0-dce7accb6bb1", + "Server-Timing": "traceparent;desc=\u002200-4601197b72d910404cc4488097110645-cac9279380ddcf04-01\u0022", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains", + "Transfer-Encoding": "chunked", + "Vary": [ + "Accept-Encoding", + "Accept-Encoding" + ], + "x-aml-cluster": "vienna-test-westus2-01", + "X-Content-Type-Options": "nosniff", + "x-ms-correlation-request-id": "643d1d30-4ac6-4f97-ab3f-566489848f55", + "x-ms-ratelimit-remaining-subscription-reads": "11995", + "x-ms-response-type": "standard", + "x-ms-routing-request-id": "JAPANEAST:20221019T065915Z:643d1d30-4ac6-4f97-ab3f-566489848f55", + "x-request-time": "0.443" + }, + "ResponseBody": { + "id": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/components/test_691263607553/labels/default", + "name": "0.0.1", + "type": "Microsoft.MachineLearningServices/workspaces/components/versions", + "properties": { + "description": null, + "tags": { + "tag": "tagvalue", + "owner": "sdkteam" + }, + "properties": {}, + "isArchived": false, + "isAnonymous": false, + "componentSpec": { + "name": "test_691263607553", + "version": "0.0.1", + "display_name": "CommandComponentBasic", + "is_deterministic": "True", + "type": "command", + "description": "This is the basic command component", + "tags": { + "tag": "tagvalue", + "owner": "sdkteam" + }, + "inputs": { + "component_in_path": { + "type": "uri_folder", + "optional": "False", + "description": "A path" + }, + "component_in_number": { + "type": "number", + "optional": "True", + "default": "10.99", + "description": "A number" + } + }, + "outputs": { + "component_out_path": { + "type": "uri_folder" + } + }, + "code": "azureml:/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/codes/9c9cfba9-82bd-45db-ad06-07009d1d9672/versions/1", + "environment": "azureml://registries/azureml-dev/environments/AzureML-sklearn-0.24-ubuntu18.04-py37-cpu/versions/1", + "resources": { + "instance_count": "1" + }, + "command": "echo Hello World \u0026 echo $[[${{inputs.component_in_number}}]] \u0026 echo ${{inputs.component_in_path}} \u0026 echo ${{outputs.component_out_path}} \u003E ${{outputs.component_out_path}}/component_in_number", + "$schema": "https://azuremlschemas.azureedge.net/development/commandComponent.schema.json" + } + }, + "systemData": { + "createdAt": "2022-10-19T06:59:13.3917039\u002B00:00", + "createdBy": "Xingzhi Zhang", + "createdByType": "User", + "lastModifiedAt": "2022-10-19T06:59:14.041114\u002B00:00", + "lastModifiedBy": "Xingzhi Zhang", + "lastModifiedByType": "User" + } + } + }, + { + "RequestUri": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/computes/cpu-cluster?api-version=2022-01-01-preview", + "RequestMethod": "GET", + "RequestHeaders": { + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "User-Agent": "azure-ai-ml/1.1.0 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)" + }, + "RequestBody": null, + "StatusCode": 200, + "ResponseHeaders": { + "Cache-Control": "no-cache", + "Content-Encoding": "gzip", + "Content-Type": "application/json; charset=utf-8", + "Date": "Wed, 19 Oct 2022 06:59:17 GMT", + "Expires": "-1", + "Pragma": "no-cache", + "Request-Context": "appId=cid-v1:512cc15a-13b5-415b-bfd0-dce7accb6bb1", + "Server-Timing": "traceparent;desc=\u002200-f6b76e73d7305da63ab4f232c946dfd1-cd6ae98e7a0bf90d-01\u0022", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains", + "Transfer-Encoding": "chunked", + "Vary": [ + "Accept-Encoding", + "Accept-Encoding" + ], + "x-aml-cluster": "vienna-test-westus2-01", + "X-Content-Type-Options": "nosniff", + "x-ms-correlation-request-id": "d48ecee0-b13f-4bb7-bd75-4967df2fca32", + "x-ms-ratelimit-remaining-subscription-reads": "11994", + "x-ms-response-type": "standard", + "x-ms-routing-request-id": "JAPANEAST:20221019T065917Z:d48ecee0-b13f-4bb7-bd75-4967df2fca32", + "x-request-time": "0.233" + }, + "ResponseBody": { + "id": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/computes/cpu-cluster", + "name": "cpu-cluster", + "type": "Microsoft.MachineLearningServices/workspaces/computes", + "location": "centraluseuap", + "tags": {}, + "properties": { + "createdOn": "2022-09-22T09:02:22.1899959\u002B00:00", + "modifiedOn": "2022-09-23T03:28:18.0066218\u002B00:00", + "disableLocalAuth": false, + "description": null, + "resourceId": null, + "computeType": "AmlCompute", + "computeLocation": "centraluseuap", + "provisioningState": "Succeeded", + "provisioningErrors": null, + "isAttachedCompute": false, + "properties": { + "vmSize": "STANDARD_DS2_V2", + "vmPriority": "Dedicated", + "scaleSettings": { + "maxNodeCount": 4, + "minNodeCount": 1, + "nodeIdleTimeBeforeScaleDown": "PT2M" + }, + "subnet": null, + "currentNodeCount": 3, + "targetNodeCount": 4, + "nodeStateCounts": { + "preparingNodeCount": 0, + "runningNodeCount": 3, + "idleNodeCount": 0, + "unusableNodeCount": 0, + "leavingNodeCount": 0, + "preemptedNodeCount": 0 + }, + "allocationState": "Resizing", + "allocationStateTransitionTime": "2022-10-19T06:57:37.473\u002B00:00", + "errors": null, + "remoteLoginPortPublicAccess": "Enabled", + "osType": "Linux", + "virtualMachineImage": null, + "isolatedNetwork": false, + "propertyBag": {} + } + } + } + }, + { + "RequestUri": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/jobs/000000000000000000000?api-version=2022-10-01-preview", + "RequestMethod": "PUT", + "RequestHeaders": { + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "Content-Length": "1013", + "Content-Type": "application/json", + "User-Agent": "azure-ai-ml/1.1.0 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)" + }, + "RequestBody": { + "properties": { + "properties": {}, + "tags": {}, + "displayName": "pipeline_with_default_component", + "experimentName": "azure-ai-ml", + "isArchived": false, + "jobType": "Pipeline", + "inputs": {}, + "jobs": { + "node1": { + "resources": { + "instance_count": 1, + "properties": {} + }, + "distribution": null, + "limits": null, + "environment_variables": {}, + "name": "node1", + "type": "command", + "display_name": null, + "tags": {}, + "computeId": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/computes/cpu-cluster", + "inputs": { + "component_in_path": { + "uri": "https://dprepdata.blob.core.windows.net/demo/Titanic.csv", + "job_input_type": "uri_file" + } + }, + "outputs": {}, + "properties": {}, + "_source": "REMOTE.WORKSPACE.COMPONENT", + "componentId": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/components/test_691263607553/labels/default" + } + }, + "outputs": {}, + "settings": { + "_source": "DSL" + } + } + }, + "StatusCode": 201, + "ResponseHeaders": { + "Cache-Control": "no-cache", + "Content-Length": "3009", + "Content-Type": "application/json; charset=utf-8", + "Date": "Wed, 19 Oct 2022 06:59:26 GMT", + "Expires": "-1", + "Location": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/jobs/000000000000000000000?api-version=2022-10-01-preview", + "Pragma": "no-cache", + "Request-Context": "appId=cid-v1:512cc15a-13b5-415b-bfd0-dce7accb6bb1", + "Server-Timing": "traceparent;desc=\u002200-7a3db1f037e0546793ec5f6122254499-fca0afdd2ae486ca-01\u0022", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains", + "x-aml-cluster": "vienna-test-westus2-01", + "X-Content-Type-Options": "nosniff", + "x-ms-correlation-request-id": "d06c8736-f92c-483e-9bb2-ec9819815c1c", + "x-ms-ratelimit-remaining-subscription-writes": "1196", + "x-ms-response-type": "standard", + "x-ms-routing-request-id": "JAPANEAST:20221019T065926Z:d06c8736-f92c-483e-9bb2-ec9819815c1c", + "x-request-time": "5.373" + }, + "ResponseBody": { + "id": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/jobs/000000000000000000000", + "name": "000000000000000000000", + "type": "Microsoft.MachineLearningServices/workspaces/jobs", + "properties": { + "description": null, + "tags": {}, + "properties": { + "azureml.DevPlatv2": "true", + "azureml.runsource": "azureml.PipelineRun", + "runSource": "MFE", + "runType": "HTTP", + "azureml.parameters": "{}", + "azureml.continue_on_step_failure": "False", + "azureml.continue_on_failed_optional_input": "True", + "azureml.defaultDataStoreName": "workspaceblobstore", + "azureml.pipelineComponent": "pipelinerun" + }, + "displayName": "pipeline_with_default_component", + "status": "Preparing", + "experimentName": "azure-ai-ml", + "services": { + "Tracking": { + "jobServiceType": "Tracking", + "port": null, + "endpoint": "azureml://master.api.azureml-test.ms/mlflow/v1.0/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000?", + "status": null, + "errorMessage": null, + "properties": null, + "nodes": null + }, + "Studio": { + "jobServiceType": "Studio", + "port": null, + "endpoint": "https://ml.azure.com/runs/000000000000000000000?wsid=/subscriptions/00000000-0000-0000-0000-000000000/resourcegroups/00000/workspaces/00000", + "status": null, + "errorMessage": null, + "properties": null, + "nodes": null + } + }, + "computeId": null, + "isArchived": false, + "identity": null, + "componentId": null, + "jobType": "Pipeline", + "settings": { + "_source": "DSL" + }, + "jobs": { + "node1": { + "resources": { + "instance_count": 1, + "properties": {} + }, + "distribution": null, + "limits": null, + "environment_variables": {}, + "name": "node1", + "type": "command", + "display_name": null, + "tags": {}, + "computeId": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/computes/cpu-cluster", + "inputs": { + "component_in_path": { + "uri": "https://dprepdata.blob.core.windows.net/demo/Titanic.csv", + "job_input_type": "uri_file" + } + }, + "outputs": {}, + "properties": {}, + "_source": "REMOTE.WORKSPACE.COMPONENT", + "componentId": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/components/test_691263607553/labels/default" + } + }, + "inputs": {}, + "outputs": {}, + "sourceJobId": null + }, + "systemData": { + "createdAt": "2022-10-19T06:59:25.138133\u002B00:00", + "createdBy": "Xingzhi Zhang", + "createdByType": "User" + } + } + }, + { + "RequestUri": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/jobs/000000000000000000000?api-version=2022-10-01-preview", + "RequestMethod": "GET", + "RequestHeaders": { + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "User-Agent": "azure-ai-ml/1.1.0 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)" + }, + "RequestBody": null, + "StatusCode": 200, + "ResponseHeaders": { + "Cache-Control": "no-cache", + "Content-Encoding": "gzip", + "Content-Type": "application/json; charset=utf-8", + "Date": "Wed, 19 Oct 2022 06:59:28 GMT", + "Expires": "-1", + "Pragma": "no-cache", + "Request-Context": "appId=cid-v1:512cc15a-13b5-415b-bfd0-dce7accb6bb1", + "Server-Timing": "traceparent;desc=\u002200-6c6f2fcbe84cf7a89cb20875992681e8-8520486b8373cb08-01\u0022", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains", + "Transfer-Encoding": "chunked", + "Vary": [ + "Accept-Encoding", + "Accept-Encoding" + ], + "x-aml-cluster": "vienna-test-westus2-01", + "X-Content-Type-Options": "nosniff", + "x-ms-correlation-request-id": "5baafb4d-9b26-4c22-af63-0d0169294f7b", + "x-ms-ratelimit-remaining-subscription-reads": "11993", + "x-ms-response-type": "standard", + "x-ms-routing-request-id": "JAPANEAST:20221019T065928Z:5baafb4d-9b26-4c22-af63-0d0169294f7b", + "x-request-time": "0.094" + }, + "ResponseBody": { + "id": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/jobs/000000000000000000000", + "name": "000000000000000000000", + "type": "Microsoft.MachineLearningServices/workspaces/jobs", + "properties": { + "description": null, + "tags": {}, + "properties": { + "azureml.DevPlatv2": "true", + "azureml.runsource": "azureml.PipelineRun", + "runSource": "MFE", + "runType": "HTTP", + "azureml.parameters": "{}", + "azureml.continue_on_step_failure": "False", + "azureml.continue_on_failed_optional_input": "True", + "azureml.defaultDataStoreName": "workspaceblobstore", + "azureml.pipelineComponent": "pipelinerun" + }, + "displayName": "pipeline_with_default_component", + "status": "Running", + "experimentName": "azure-ai-ml", + "services": { + "Tracking": { + "jobServiceType": "Tracking", + "port": null, + "endpoint": "azureml://master.api.azureml-test.ms/mlflow/v1.0/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000?", + "status": null, + "errorMessage": null, + "properties": null, + "nodes": null + }, + "Studio": { + "jobServiceType": "Studio", + "port": null, + "endpoint": "https://ml.azure.com/runs/000000000000000000000?wsid=/subscriptions/00000000-0000-0000-0000-000000000/resourcegroups/00000/workspaces/00000", + "status": null, + "errorMessage": null, + "properties": null, + "nodes": null + } + }, + "computeId": null, + "isArchived": false, + "identity": null, + "componentId": null, + "jobType": "Pipeline", + "settings": { + "_source": "DSL" + }, + "jobs": { + "node1": { + "resources": { + "instance_count": 1, + "properties": {} + }, + "distribution": null, + "limits": null, + "environment_variables": {}, + "name": "node1", + "type": "command", + "display_name": null, + "tags": {}, + "computeId": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/computes/cpu-cluster", + "inputs": { + "component_in_path": { + "uri": "https://dprepdata.blob.core.windows.net/demo/Titanic.csv", + "job_input_type": "uri_file" + } + }, + "outputs": {}, + "properties": {}, + "_source": "REMOTE.WORKSPACE.COMPONENT", + "componentId": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/components/test_691263607553/labels/default" + } + }, + "inputs": {}, + "outputs": {}, + "sourceJobId": null + }, + "systemData": { + "createdAt": "2022-10-19T06:59:25.138133\u002B00:00", + "createdBy": "Xingzhi Zhang", + "createdByType": "User" + } + } + } + ], + "Variables": { + "component_name": "test_691263607553" + } +} From cbf22bb2f69564bebe4396325bc00548e2b7c300 Mon Sep 17 00:00:00 2001 From: zhangxingzhi Date: Wed, 19 Oct 2022 20:19:46 +0800 Subject: [PATCH 5/5] feat: support name@default use name@label for node component in pipeline yaml --- .../azure/ai/ml/constants/_common.py | 1 + .../ml/operations/_operation_orchestrator.py | 15 + .../e2etests/test_pipeline_job.py | 11 + ..._pipeline_node_with_default_component.json | 539 ++++++++++++++++++ ...ld_pipeline_job_with_default_component.yml | 32 ++ 5 files changed, 598 insertions(+) create mode 100644 sdk/ml/azure-ai-ml/tests/recordings/pipeline_job/e2etests/test_pipeline_job.pyTestPipelineJobtest_pipeline_node_with_default_component.json create mode 100644 sdk/ml/azure-ai-ml/tests/test_configs/pipeline_jobs/helloworld_pipeline_job_with_default_component.yml diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_common.py b/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_common.py index 74e80ff6cbc4..13e68037c07f 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_common.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/constants/_common.py @@ -29,6 +29,7 @@ NAMED_RESOURCE_ID_FORMAT = "/subscriptions/{}/resourceGroups/{}/providers/{}/workspaces/{}/{}/{}" LEVEL_ONE_NAMED_RESOURCE_ID_FORMAT = "/subscriptions/{}/resourceGroups/{}/providers/{}/{}/{}" VERSIONED_RESOURCE_ID_FORMAT = "/subscriptions/{}/resourceGroups/{}/providers/{}/workspaces/{}/{}/{}/versions/{}" +LABELLED_RESOURCE_ID_FORMAT = "/subscriptions/{}/resourceGroups/{}/providers/{}/workspaces/{}/{}/{}/labels/{}" DATASTORE_RESOURCE_ID = ( "/subscriptions/{}/resourceGroups/{}/providers/Microsoft.MachineLearningServices/workspaces/{}/datastores/{}" ) diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_operation_orchestrator.py b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_operation_orchestrator.py index 081c17044fe9..1161bc6955f5 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_operation_orchestrator.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_operation_orchestrator.py @@ -38,6 +38,8 @@ VERSIONED_RESOURCE_NAME, LABELLED_RESOURCE_NAME, AzureMLResourceType, + LABELLED_RESOURCE_ID_FORMAT, + DEFAULT_LABEL_NAME, ) from azure.ai.ml.entities import Component from azure.ai.ml.entities._assets import Code, Data, Environment, Model @@ -150,6 +152,19 @@ def get_asset_arm_id( "CLI and SDK. Learn more at aka.ms/curatedenv" ) return f"azureml:{asset}" + + name, label = parse_name_label(asset) + # TODO: remove this condition after label is fully supported for all versioned resources + if label == DEFAULT_LABEL_NAME and azureml_type == AzureMLResourceType.COMPONENT: + return LABELLED_RESOURCE_ID_FORMAT.format( + self._operation_scope.subscription_id, + self._operation_scope.resource_group_name, + AZUREML_RESOURCE_PROVIDER, + self._operation_scope.workspace_name, + azureml_type, + name, + label, + ) name, version = self._resolve_name_version_from_name_label(asset, azureml_type) if not version: name, version = parse_prefixed_name_version(asset) diff --git a/sdk/ml/azure-ai-ml/tests/pipeline_job/e2etests/test_pipeline_job.py b/sdk/ml/azure-ai-ml/tests/pipeline_job/e2etests/test_pipeline_job.py index 0bc14d1fd684..c92ff57eeb1f 100644 --- a/sdk/ml/azure-ai-ml/tests/pipeline_job/e2etests/test_pipeline_job.py +++ b/sdk/ml/azure-ai-ml/tests/pipeline_job/e2etests/test_pipeline_job.py @@ -1542,6 +1542,17 @@ def test_remote_pipeline_component_job(self, client: MLClient, randstr: Callable # assert pipeline_dict["outputs"] == {"output_path": {"mode": "ReadWriteMount", "job_output_type": "uri_folder"}} assert pipeline_dict["settings"] == {"default_compute": "cpu-cluster", "_source": "REMOTE.WORKSPACE.COMPONENT"} + def test_pipeline_node_with_default_component(self, client: MLClient, randstr: Callable[[str], str]): + params_override = [{"name": randstr("job_name")}] + pipeline_job = load_job( + "./tests/test_configs/pipeline_jobs/helloworld_pipeline_job_with_default_component.yml", + params_override=params_override, + ) + + created_pipeline_job = client.jobs.create_or_update(pipeline_job) + assert created_pipeline_job.jobs["hello_world_component"].component == \ + "microsoftsamples_command_component_basic@default" + @pytest.mark.usefixtures( "recorded_test", diff --git a/sdk/ml/azure-ai-ml/tests/recordings/pipeline_job/e2etests/test_pipeline_job.pyTestPipelineJobtest_pipeline_node_with_default_component.json b/sdk/ml/azure-ai-ml/tests/recordings/pipeline_job/e2etests/test_pipeline_job.pyTestPipelineJobtest_pipeline_node_with_default_component.json new file mode 100644 index 000000000000..cd2a3dabc779 --- /dev/null +++ b/sdk/ml/azure-ai-ml/tests/recordings/pipeline_job/e2etests/test_pipeline_job.pyTestPipelineJobtest_pipeline_node_with_default_component.json @@ -0,0 +1,539 @@ +{ + "Entries": [ + { + "RequestUri": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/computes/cpu-cluster?api-version=2022-01-01-preview", + "RequestMethod": "GET", + "RequestHeaders": { + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "User-Agent": "azure-ai-ml/1.1.0 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)" + }, + "RequestBody": null, + "StatusCode": 200, + "ResponseHeaders": { + "Cache-Control": "no-cache", + "Content-Encoding": "gzip", + "Content-Type": "application/json; charset=utf-8", + "Date": "Wed, 19 Oct 2022 11:37:50 GMT", + "Expires": "-1", + "Pragma": "no-cache", + "Request-Context": "appId=cid-v1:512cc15a-13b5-415b-bfd0-dce7accb6bb1", + "Server-Timing": "traceparent;desc=\u002200-b957534e088009d425de46f31470b3d1-812e3a4b92f81bab-01\u0022", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains", + "Transfer-Encoding": "chunked", + "Vary": [ + "Accept-Encoding", + "Accept-Encoding" + ], + "x-aml-cluster": "vienna-test-westus2-01", + "X-Content-Type-Options": "nosniff", + "x-ms-correlation-request-id": "c436c071-f2e5-48c2-ba03-578dfdbe4d7a", + "x-ms-ratelimit-remaining-subscription-reads": "11997", + "x-ms-response-type": "standard", + "x-ms-routing-request-id": "JAPANEAST:20221019T113751Z:c436c071-f2e5-48c2-ba03-578dfdbe4d7a", + "x-request-time": "0.254" + }, + "ResponseBody": { + "id": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/computes/cpu-cluster", + "name": "cpu-cluster", + "type": "Microsoft.MachineLearningServices/workspaces/computes", + "location": "centraluseuap", + "tags": {}, + "properties": { + "createdOn": "2022-09-22T09:02:22.1899959\u002B00:00", + "modifiedOn": "2022-09-23T03:28:18.0066218\u002B00:00", + "disableLocalAuth": false, + "description": null, + "resourceId": null, + "computeType": "AmlCompute", + "computeLocation": "centraluseuap", + "provisioningState": "Succeeded", + "provisioningErrors": null, + "isAttachedCompute": false, + "properties": { + "vmSize": "STANDARD_DS2_V2", + "vmPriority": "Dedicated", + "scaleSettings": { + "maxNodeCount": 4, + "minNodeCount": 1, + "nodeIdleTimeBeforeScaleDown": "PT2M" + }, + "subnet": null, + "currentNodeCount": 2, + "targetNodeCount": 2, + "nodeStateCounts": { + "preparingNodeCount": 0, + "runningNodeCount": 2, + "idleNodeCount": 0, + "unusableNodeCount": 0, + "leavingNodeCount": 0, + "preemptedNodeCount": 0 + }, + "allocationState": "Steady", + "allocationStateTransitionTime": "2022-10-19T07:15:47.741\u002B00:00", + "errors": null, + "remoteLoginPortPublicAccess": "Enabled", + "osType": "Linux", + "virtualMachineImage": null, + "isolatedNetwork": false, + "propertyBag": {} + } + } + } + }, + { + "RequestUri": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/computes/cpu-cluster?api-version=2022-01-01-preview", + "RequestMethod": "GET", + "RequestHeaders": { + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "User-Agent": "azure-ai-ml/1.1.0 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)" + }, + "RequestBody": null, + "StatusCode": 200, + "ResponseHeaders": { + "Cache-Control": "no-cache", + "Content-Encoding": "gzip", + "Content-Type": "application/json; charset=utf-8", + "Date": "Wed, 19 Oct 2022 11:37:51 GMT", + "Expires": "-1", + "Pragma": "no-cache", + "Request-Context": "appId=cid-v1:512cc15a-13b5-415b-bfd0-dce7accb6bb1", + "Server-Timing": "traceparent;desc=\u002200-22cef4ebbd77016773f16803bd5b4cf8-ffa4aabd9c7286b1-01\u0022", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains", + "Transfer-Encoding": "chunked", + "Vary": [ + "Accept-Encoding", + "Accept-Encoding" + ], + "x-aml-cluster": "vienna-test-westus2-01", + "X-Content-Type-Options": "nosniff", + "x-ms-correlation-request-id": "91af9ddb-50a4-4c5a-af34-9fda9a68fcf6", + "x-ms-ratelimit-remaining-subscription-reads": "11996", + "x-ms-response-type": "standard", + "x-ms-routing-request-id": "JAPANEAST:20221019T113752Z:91af9ddb-50a4-4c5a-af34-9fda9a68fcf6", + "x-request-time": "0.231" + }, + "ResponseBody": { + "id": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/computes/cpu-cluster", + "name": "cpu-cluster", + "type": "Microsoft.MachineLearningServices/workspaces/computes", + "location": "centraluseuap", + "tags": {}, + "properties": { + "createdOn": "2022-09-22T09:02:22.1899959\u002B00:00", + "modifiedOn": "2022-09-23T03:28:18.0066218\u002B00:00", + "disableLocalAuth": false, + "description": null, + "resourceId": null, + "computeType": "AmlCompute", + "computeLocation": "centraluseuap", + "provisioningState": "Succeeded", + "provisioningErrors": null, + "isAttachedCompute": false, + "properties": { + "vmSize": "STANDARD_DS2_V2", + "vmPriority": "Dedicated", + "scaleSettings": { + "maxNodeCount": 4, + "minNodeCount": 1, + "nodeIdleTimeBeforeScaleDown": "PT2M" + }, + "subnet": null, + "currentNodeCount": 2, + "targetNodeCount": 2, + "nodeStateCounts": { + "preparingNodeCount": 0, + "runningNodeCount": 2, + "idleNodeCount": 0, + "unusableNodeCount": 0, + "leavingNodeCount": 0, + "preemptedNodeCount": 0 + }, + "allocationState": "Steady", + "allocationStateTransitionTime": "2022-10-19T07:15:47.741\u002B00:00", + "errors": null, + "remoteLoginPortPublicAccess": "Enabled", + "osType": "Linux", + "virtualMachineImage": null, + "isolatedNetwork": false, + "propertyBag": {} + } + } + } + }, + { + "RequestUri": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/datastores/workspaceblobstore?api-version=2022-05-01", + "RequestMethod": "GET", + "RequestHeaders": { + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "User-Agent": "azure-ai-ml/1.1.0 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)" + }, + "RequestBody": null, + "StatusCode": 200, + "ResponseHeaders": { + "Cache-Control": "no-cache", + "Content-Encoding": "gzip", + "Content-Type": "application/json; charset=utf-8", + "Date": "Wed, 19 Oct 2022 11:37:53 GMT", + "Expires": "-1", + "Pragma": "no-cache", + "Request-Context": "appId=cid-v1:512cc15a-13b5-415b-bfd0-dce7accb6bb1", + "Server-Timing": "traceparent;desc=\u002200-15d63bf9b032479a836cc5cf8d839e7d-b045a985c6304ef8-01\u0022", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains", + "Transfer-Encoding": "chunked", + "Vary": [ + "Accept-Encoding", + "Accept-Encoding" + ], + "x-aml-cluster": "vienna-test-westus2-01", + "X-Content-Type-Options": "nosniff", + "x-ms-correlation-request-id": "938f1d87-f4b2-4f73-bec2-a0208b16dc64", + "x-ms-ratelimit-remaining-subscription-reads": "11995", + "x-ms-response-type": "standard", + "x-ms-routing-request-id": "JAPANEAST:20221019T113754Z:938f1d87-f4b2-4f73-bec2-a0208b16dc64", + "x-request-time": "0.121" + }, + "ResponseBody": { + "id": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/datastores/workspaceblobstore", + "name": "workspaceblobstore", + "type": "Microsoft.MachineLearningServices/workspaces/datastores", + "properties": { + "description": null, + "tags": null, + "properties": null, + "isDefault": true, + "credentials": { + "credentialsType": "AccountKey" + }, + "datastoreType": "AzureBlob", + "accountName": "sagvgsoim6nmhbq", + "containerName": "azureml-blobstore-e61cd5e2-512f-475e-9842-5e2a973993b8", + "endpoint": "core.windows.net", + "protocol": "https", + "serviceDataAccessAuthIdentity": "WorkspaceSystemAssignedIdentity" + }, + "systemData": { + "createdAt": "2022-09-22T09:02:03.2629568\u002B00:00", + "createdBy": "779301c0-18b2-4cdc-801b-a0a3368fee0a", + "createdByType": "Application", + "lastModifiedAt": "2022-09-22T09:02:04.166989\u002B00:00", + "lastModifiedBy": "779301c0-18b2-4cdc-801b-a0a3368fee0a", + "lastModifiedByType": "Application" + } + } + }, + { + "RequestUri": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/datastores/workspaceblobstore/listSecrets?api-version=2022-05-01", + "RequestMethod": "POST", + "RequestHeaders": { + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "Content-Length": "0", + "User-Agent": "azure-ai-ml/1.1.0 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)" + }, + "RequestBody": null, + "StatusCode": 200, + "ResponseHeaders": { + "Cache-Control": "no-cache", + "Content-Encoding": "gzip", + "Content-Type": "application/json; charset=utf-8", + "Date": "Wed, 19 Oct 2022 11:37:54 GMT", + "Expires": "-1", + "Pragma": "no-cache", + "Request-Context": "appId=cid-v1:512cc15a-13b5-415b-bfd0-dce7accb6bb1", + "Server-Timing": "traceparent;desc=\u002200-0df175d1c657e53fbfe5fd845c3d2157-e0fd3e45d1626f54-01\u0022", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains", + "Transfer-Encoding": "chunked", + "Vary": "Accept-Encoding", + "x-aml-cluster": "vienna-test-westus2-01", + "X-Content-Type-Options": "nosniff", + "x-ms-correlation-request-id": "0fd4f51b-532d-4f42-93b3-31595a9b7f65", + "x-ms-ratelimit-remaining-subscription-writes": "1199", + "x-ms-response-type": "standard", + "x-ms-routing-request-id": "JAPANEAST:20221019T113755Z:0fd4f51b-532d-4f42-93b3-31595a9b7f65", + "x-request-time": "0.521" + }, + "ResponseBody": { + "secretsType": "AccountKey", + "key": "dGhpcyBpcyBmYWtlIGtleQ==" + } + }, + { + "RequestUri": "https://sagvgsoim6nmhbq.blob.core.windows.net/azureml-blobstore-e61cd5e2-512f-475e-9842-5e2a973993b8/LocalUpload/00000000000000000000000000000000/data/sample1.csv", + "RequestMethod": "HEAD", + "RequestHeaders": { + "Accept": "application/xml", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "User-Agent": "azsdk-python-storage-blob/12.14.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)", + "x-ms-date": "Wed, 19 Oct 2022 11:37:55 GMT", + "x-ms-version": "2021-08-06" + }, + "RequestBody": null, + "StatusCode": 200, + "ResponseHeaders": { + "Accept-Ranges": "bytes", + "Content-Length": "508", + "Content-MD5": "dUQjYq1qrTeqLOaZ4N2AUQ==", + "Content-Type": "application/octet-stream", + "Date": "Wed, 19 Oct 2022 11:37:55 GMT", + "ETag": "\u00220x8DA9D48AFBCE5A6\u0022", + "Last-Modified": "Fri, 23 Sep 2022 09:47:53 GMT", + "Server": [ + "Windows-Azure-Blob/1.0", + "Microsoft-HTTPAPI/2.0" + ], + "Vary": "Origin", + "x-ms-access-tier": "Hot", + "x-ms-access-tier-inferred": "true", + "x-ms-blob-type": "BlockBlob", + "x-ms-creation-time": "Fri, 23 Sep 2022 09:47:53 GMT", + "x-ms-lease-state": "available", + "x-ms-lease-status": "unlocked", + "x-ms-meta-name": "da405283-c0d4-42bf-9cd0-2d052c9da84b", + "x-ms-meta-upload_status": "completed", + "x-ms-meta-version": "bcdecfd5-08fc-40e1-af7f-364ca3525a76", + "x-ms-server-encrypted": "true", + "x-ms-version": "2021-08-06" + }, + "ResponseBody": null + }, + { + "RequestUri": "https://sagvgsoim6nmhbq.blob.core.windows.net/azureml-blobstore-e61cd5e2-512f-475e-9842-5e2a973993b8/az-ml-artifacts/00000000000000000000000000000000/data/sample1.csv", + "RequestMethod": "HEAD", + "RequestHeaders": { + "Accept": "application/xml", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "User-Agent": "azsdk-python-storage-blob/12.14.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)", + "x-ms-date": "Wed, 19 Oct 2022 11:37:56 GMT", + "x-ms-version": "2021-08-06" + }, + "RequestBody": null, + "StatusCode": 404, + "ResponseHeaders": { + "Date": "Wed, 19 Oct 2022 11:37:55 GMT", + "Server": [ + "Windows-Azure-Blob/1.0", + "Microsoft-HTTPAPI/2.0" + ], + "Transfer-Encoding": "chunked", + "Vary": "Origin", + "x-ms-error-code": "BlobNotFound", + "x-ms-version": "2021-08-06" + }, + "ResponseBody": null + }, + { + "RequestUri": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/jobs/test_357389688051?api-version=2022-10-01-preview", + "RequestMethod": "PUT", + "RequestHeaders": { + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "Content-Length": "1694", + "Content-Type": "application/json", + "User-Agent": "azure-ai-ml/1.1.0 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.9.10 (Windows-10-10.0.22621-SP0)" + }, + "RequestBody": { + "properties": { + "description": "The hello world pipeline job", + "properties": {}, + "tags": { + "tag": "tagvalue", + "owner": "sdkteam" + }, + "computeId": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/computes/cpu-cluster", + "displayName": "test_357389688051", + "experimentName": "my_first_experiment", + "isArchived": false, + "jobType": "Pipeline", + "inputs": { + "job_in_number": { + "jobInputType": "literal", + "value": "10" + }, + "job_in_other_number": { + "jobInputType": "literal", + "value": "15" + }, + "job_in_path": { + "mode": "ReadOnlyMount", + "uri": "azureml://datastores/workspaceblobstore/paths/LocalUpload/00000000000000000000000000000000/data/", + "jobInputType": "uri_folder" + } + }, + "jobs": { + "hello_world_component": { + "resources": null, + "distribution": null, + "limits": null, + "environment_variables": {}, + "name": "hello_world_component", + "type": "command", + "display_name": null, + "tags": {}, + "computeId": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/computes/cpu-cluster", + "inputs": { + "component_in_number": { + "job_input_type": "literal", + "value": "${{parent.inputs.job_in_number}}" + }, + "component_in_path": { + "job_input_type": "literal", + "value": "${{parent.inputs.job_in_path}}" + } + }, + "outputs": {}, + "properties": {}, + "_source": "REMOTE.WORKSPACE.COMPONENT", + "comment": "arbitrary string", + "componentId": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/components/microsoftsamples_command_component_basic/labels/default" + } + }, + "outputs": {}, + "settings": { + "_source": "YAML.JOB" + } + } + }, + "StatusCode": 201, + "ResponseHeaders": { + "Cache-Control": "no-cache", + "Content-Length": "3978", + "Content-Type": "application/json; charset=utf-8", + "Date": "Wed, 19 Oct 2022 11:38:04 GMT", + "Expires": "-1", + "Location": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/jobs/test_357389688051?api-version=2022-10-01-preview", + "Pragma": "no-cache", + "Request-Context": "appId=cid-v1:512cc15a-13b5-415b-bfd0-dce7accb6bb1", + "Server-Timing": "traceparent;desc=\u002200-977cbdc014eb42a307c42620d93743e0-e5eac8f59cd29cc7-01\u0022", + "Strict-Transport-Security": "max-age=31536000; includeSubDomains", + "x-aml-cluster": "vienna-test-westus2-01", + "X-Content-Type-Options": "nosniff", + "x-ms-correlation-request-id": "8bd33c3b-9ebe-4c49-809b-16a9b7ff40a9", + "x-ms-ratelimit-remaining-subscription-writes": "1199", + "x-ms-response-type": "standard", + "x-ms-routing-request-id": "JAPANEAST:20221019T113805Z:8bd33c3b-9ebe-4c49-809b-16a9b7ff40a9", + "x-request-time": "5.032" + }, + "ResponseBody": { + "id": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/jobs/test_357389688051", + "name": "test_357389688051", + "type": "Microsoft.MachineLearningServices/workspaces/jobs", + "properties": { + "description": "The hello world pipeline job", + "tags": { + "tag": "tagvalue", + "owner": "sdkteam" + }, + "properties": { + "azureml.DevPlatv2": "true", + "azureml.runsource": "azureml.PipelineRun", + "runSource": "MFE", + "runType": "HTTP", + "azureml.parameters": "{\u0022job_in_number\u0022:\u002210\u0022,\u0022job_in_other_number\u0022:\u002215\u0022}", + "azureml.continue_on_step_failure": "False", + "azureml.continue_on_failed_optional_input": "True", + "azureml.defaultComputeName": "cpu-cluster", + "azureml.defaultDataStoreName": "workspaceblobstore", + "azureml.pipelineComponent": "pipelinerun" + }, + "displayName": "test_357389688051", + "status": "Preparing", + "experimentName": "my_first_experiment", + "services": { + "Tracking": { + "jobServiceType": "Tracking", + "port": null, + "endpoint": "azureml://master.api.azureml-test.ms/mlflow/v1.0/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000?", + "status": null, + "errorMessage": null, + "properties": null, + "nodes": null + }, + "Studio": { + "jobServiceType": "Studio", + "port": null, + "endpoint": "https://ml.azure.com/runs/test_357389688051?wsid=/subscriptions/00000000-0000-0000-0000-000000000/resourcegroups/00000/workspaces/00000", + "status": null, + "errorMessage": null, + "properties": null, + "nodes": null + } + }, + "computeId": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/computes/cpu-cluster", + "isArchived": false, + "identity": null, + "componentId": null, + "jobType": "Pipeline", + "settings": { + "_source": "YAML.JOB" + }, + "jobs": { + "hello_world_component": { + "resources": null, + "distribution": null, + "limits": null, + "environment_variables": {}, + "name": "hello_world_component", + "type": "command", + "display_name": null, + "tags": {}, + "computeId": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/computes/cpu-cluster", + "inputs": { + "component_in_number": { + "job_input_type": "literal", + "value": "${{parent.inputs.job_in_number}}" + }, + "component_in_path": { + "job_input_type": "literal", + "value": "${{parent.inputs.job_in_path}}" + } + }, + "outputs": {}, + "properties": {}, + "_source": "REMOTE.WORKSPACE.COMPONENT", + "comment": "arbitrary string", + "componentId": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/components/microsoftsamples_command_component_basic/labels/default" + } + }, + "inputs": { + "job_in_number": { + "description": null, + "jobInputType": "literal", + "value": "10" + }, + "job_in_other_number": { + "description": null, + "jobInputType": "literal", + "value": "15" + }, + "job_in_path": { + "description": null, + "uri": "azureml://datastores/workspaceblobstore/paths/LocalUpload/00000000000000000000000000000000/data/", + "mode": "ReadOnlyMount", + "jobInputType": "uri_folder" + } + }, + "outputs": {}, + "sourceJobId": null + }, + "systemData": { + "createdAt": "2022-10-19T11:38:03.0897111\u002B00:00", + "createdBy": "Xingzhi Zhang", + "createdByType": "User" + } + } + } + ], + "Variables": { + "job_name": "test_357389688051" + } +} diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/pipeline_jobs/helloworld_pipeline_job_with_default_component.yml b/sdk/ml/azure-ai-ml/tests/test_configs/pipeline_jobs/helloworld_pipeline_job_with_default_component.yml new file mode 100644 index 000000000000..6cbab73ae134 --- /dev/null +++ b/sdk/ml/azure-ai-ml/tests/test_configs/pipeline_jobs/helloworld_pipeline_job_with_default_component.yml @@ -0,0 +1,32 @@ +type: pipeline + +# name: microsoft.samples.PipelineJobSampleToDefineScope #follow up on schema validation error +name: simplepipelinejob +description: The hello world pipeline job +tags: + tag: tagvalue + owner: sdkteam + +# Check if supported ... +experiment_name: my_first_experiment + +compute: azureml:cpu-cluster + +inputs: + # examples of inputs that take values such as int, string, etc. + job_in_number: 10 + job_in_other_number: + value: 15 + job_in_path: + path: ../data + mode: ro_mount + +jobs: + hello_world_component: + type: command + comment: arbitrary string + component: azureml:microsoftsamples_command_component_basic@default + compute: azureml:cpu-cluster + inputs: + component_in_number: ${{parent.inputs.job_in_number}} + component_in_path: ${{parent.inputs.job_in_path}}