Azure · wangchao1230 · Oct 19, 2022 · Oct 18, 2022 · Oct 18, 2022 · Oct 18, 2022
@@ -40,7 +40,7 @@ def make(self, data, **kwargs):  # pylint: disable=unused-argument, no-self-use
         # dict to node object
         from azure.ai.ml.entities._job.pipeline._load_component import pipeline_node_factory
 
-        return pipeline_node_factory.load_from_dict(data)  # pylint: disable=E1125, too-many-function-args
+        return pipeline_node_factory.load_from_dict(data=data)
 
     @pre_dump
     def resolve_inputs_outputs(self, job, **kwargs):  # pylint: disable=unused-argument, no-self-use

@@ -264,9 +264,9 @@ def command(self, value: str) -> None:
         if isinstance(self.component, Component):
             self.component.command = value
         else:
-            msg = "Can't set command property for a registered component {}"
+            msg = "Can't set command property for a registered component {}. Tried to set it to {}."
             raise ValidationException(
-                message=msg.format(self.component),
+                message=msg.format(self.component, value),
                 no_personal_data_message=msg,
                 target=ErrorTarget.COMMAND_JOB,
                 error_category=ErrorCategory.USER_ERROR,

@@ -20,6 +20,7 @@
 from azure.ai.ml.entities._builders.do_while import DoWhile
 from azure.ai.ml.entities._builders.pipeline import Pipeline
 from azure.ai.ml.entities._component.component import Component
+from azure.ai.ml.entities._component.component_factory import component_factory
 from azure.ai.ml.entities._job.automl.automl_job import AutoMLJob
 from azure.ai.ml.entities._util import extract_label
 from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException
@@ -172,7 +173,19 @@ def load_from_dict(self, *, data: dict, _type: str = None) -> Union[BaseNode, Au
         else:
             data[CommonYamlFields.TYPE] = _type
 
-        new_instance = self.get_create_instance_func(_type)()
+        new_instance: Union[BaseNode, AutoMLJob] = self.get_create_instance_func(_type)()
+
+        if isinstance(new_instance, BaseNode):
+            # parse component
+            component_key = new_instance._get_component_attr_name()
+            if component_key in data and isinstance(data[component_key], dict):
+                data[component_key] = component_factory.load_from_dict(
+                    data=data[component_key],
+                    context={
+                        BASE_PATH_CONTEXT_KEY: data[component_key].get(BASE_PATH_CONTEXT_KEY, None),
+                    }
+                )
+
         new_instance.__init__(**data)
         return new_instance
 

@@ -8,7 +8,7 @@
 import pytest
 import yaml
 
-from azure.ai.ml import Input, load_component
+from azure.ai.ml import Input, load_component, load_job
 from azure.ai.ml._internal import (
     AISuperComputerConfiguration,
     AISuperComputerScalePolicy,
@@ -592,3 +592,18 @@ def test_pipeline_with_setting_node_output_directly(self) -> None:
         copy_file.outputs.output_dir.path = "path_on_datastore"
         assert copy_file.outputs.output_dir.path == "path_on_datastore"
         assert copy_file.outputs.output_dir.type == "path"
+
+    def test_job_properties(self):
+        pipeline_job: PipelineJob = load_job(
+            source="./tests/test_configs/internal/pipeline_jobs/pipeline_job_with_properties.yml"
+        )
+        pipeline_dict = pipeline_job._to_dict()
+        rest_pipeline_dict = pipeline_job._to_rest_object().as_dict()["properties"]
+        assert pipeline_dict["properties"] == {"AZURE_ML_PathOnCompute_input_data": "/tmp/test"}
+        assert rest_pipeline_dict["properties"] == pipeline_dict["properties"]
+        for name, node_dict in pipeline_dict["jobs"].items():
+            rest_node_dict = rest_pipeline_dict["jobs"][name]
+            assert len(node_dict["properties"]) == 1
+            assert "AZURE_ML_PathOnCompute_" in list(node_dict["properties"].keys())[0]
+            assert node_dict["properties"] == rest_node_dict["properties"]
+
@@ -26,139 +26,13 @@ properties:
   AZURE_ML_PathOnCompute_input_data: "/tmp/test"
 
 jobs:
-  node0:  # inline command job with properties
-    command: echo hello ${{inputs.hello_string}}
-    environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest
-    inputs:
-      hello_string: ${{parent.inputs.hello_string}}
-    properties:
-      AZURE_ML_PathOnCompute_hello_string: "/tmp/test"
-
-  node1:  # inline parallel job with properties
-    type: parallel
-    compute: "azureml:cpu-cluster"
-    inputs:
-      test1: ${{parent.inputs.input_data}}
-    resources:
-      instance_count: 3
-    mini_batch_size: "100kb"
-    mini_batch_error_threshold: 5
-    logging_level: "DEBUG"
-    input_data: ${{inputs.input_data}}
-    max_concurrency_per_instance: 2
-    task:
-      type: run_function
-      code: "../python"
-      entry_script: pass_through.py
-      append_row_to: ${{outputs.scored_result}} # optional, If Null, equals to summary_only mode in v1.
-      environment: azureml:my-env:1
-    properties:
-      AZURE_ML_PathOnCompute_input_data: "/tmp/test"
-
-  node2:  # inline import job with properties
-    type: import
-    source:
-      type: azuresqldb
-      query: >-
-        select * from REGION
-      connection: azureml:my_username_password
-    output:
-      type: mltable
-      path: azureml://datastores/workspaceblobstore/paths/output_dir/
-    properties:
-      AZURE_ML_PathOnCompute_output: "/tmp/test"
-
-  node3:  # inline spark job with properties
-    type: spark
-    inputs:
-      test1: ${{parent.inputs.input_data}}
-      file_input2: ${{parent.inputs.input_data}}
-    code: ../dsl_pipeline/spark_job_in_pipeline/src
-    entry:
-      file: entry.py # file path of the entry file relative to the code root folder
-    py_files:
-      - utils.zip
-    jars:
-      - scalaproj.jar
-    files:
-      - my_files.txt
-    args: >-
-      --file_input1 ${{inputs.test1}}
-      --file_input2 ${{inputs.file_input2}}
-      --output ${{outputs.output}}
-    compute: azureml:rezas-synapse-10
-    conf:
-      spark.driver.cores: 2
-      spark.driver.memory: "1g"
-      spark.executor.cores: 1
-      spark.executor.memory: "1g"
-      spark.executor.instances: 1
-    properties:
-      AZURE_ML_PathOnCompute_input_data: "/tmp/test"
-
-  node4:  # inline automl job with properties
-    type: automl
-    task: text_ner
-    log_verbosity: info
-    primary_metric: accuracy
-    limits:
-      max_trials: 1
-      timeout_minutes: 60
-    training_data: ${{parent.inputs.text_ner_training_data}}
-    validation_data: ${{parent.inputs.text_ner_validation_data}}
-    properties:
-      AZURE_ML_PathOnCompute_training_data: "/tmp/test"
-
-  node5:  # inline sweep job with properties
-    type: sweep
-    search_space:
-      component_in_number:
-        type: choice
-        values:
-          - 25
-          - 35
-    limits:
-      max_total_trials: 3
-    sampling_algorithm: random
-    objective:
-      goal: maximize
-      primary_metric: accuracy
-    trial: azureml:microsoftsamplescommandcomponentbasic_nopaths_test:1
-    properties:
-      AZURE_ML_PathOnCompute_input: "/tmp/test"
-
-  node6:  # parallel node with properties as a typical implement of base node.
-    type: parallel
+  node7:  # internal command node with properties as a typical implement of internal base node.
+    type: CommandComponent
     compute: azureml:cpu-cluster
-    component: ../components/parallel_component_with_file_input.yml
+    component: file:../helloworld/helloworld_component_command.yml
     inputs:
-      job_data_path: ${{parent.inputs.pipeline_job_data_path}}
-    outputs:
-      job_output_path:
-    mini_batch_size: "1"
-    mini_batch_error_threshold: 1
-    max_concurrency_per_instance: 1
-    properties:
-      AZURE_ML_PathOnCompute_job_data_path: "/tmp/test"
-
-# Comment these lines out as internal node is not well supported in yaml now.
-#  node7:  # internal command node with properties as a typical implement of internal base node.
-#    type: CommandComponent
-#    compute: azureml:cpu-cluster
-#    component: ../internal/helloworld/helloworld_component_command.yml
-#    inputs:
-#      training_data: ${{parent.inputs.input_data}}
-#      max_epochs: 10
-#      learning_rate: 0.01
-#    properties:
-#      AZURE_ML_PathOnCompute_job_training_data: "/tmp/test"
-
-  node8:  # pipeline node with properties
-    type: pipeline
-    inputs:
-      component_in_number: 11
-      component_in_path: ${{parent.inputs.input_data}}
-
-    component: ../components/helloworld_pipeline_component.yml
+      training_data: ${{parent.inputs.input_data}}
+      max_epochs: 10
+      learning_rate: 0.01
     properties:
-      AZURE_ML_PathOnCompute_job_component_in_path: "/tmp/test"
+      AZURE_ML_PathOnCompute_job_training_data: "/tmp/test"
@@ -8,7 +8,6 @@ inputs:
 jobs:
   a:
     component: azureml://registries/testFeed/components/my_hello_world_asset_2/versions/1
-    command: echo hello ${{inputs.hello_string}}
     environment: azureml://registries/testFeed/environments/sklearn-10-ubuntu2004-py38-cpu/versions/19.dev6
   b:
     command: echo "world" >> ${{outputs.world_output}}/world.txt

@@ -141,18 +141,6 @@ jobs:
     properties:
       AZURE_ML_PathOnCompute_job_data_path: "/tmp/test"
 
-# Comment these lines out as internal node is not well supported in yaml now.
-#  node7:  # internal command node with properties as a typical implement of internal base node.
-#    type: CommandComponent
-#    compute: azureml:cpu-cluster
-#    component: ../internal/helloworld/helloworld_component_command.yml
-#    inputs:
-#      training_data: ${{parent.inputs.input_data}}
-#      max_epochs: 10
-#      learning_rate: 0.01
-#    properties:
-#      AZURE_ML_PathOnCompute_job_training_data: "/tmp/test"
-
   node8:  # pipeline node with properties
     type: pipeline
     inputs: