From c72f4b211aa48d49fb210e515d54229511120546 Mon Sep 17 00:00:00 2001 From: zhangxingzhi Date: Mon, 17 Oct 2022 21:24:16 +0800 Subject: [PATCH] feat: skip path validation if with additional includes --- .../entities/_additional_includes.py | 32 +++++++++----- .../ai/ml/_internal/entities/component.py | 44 +++++++------------ .../ai/ml/_internal/entities/environment.py | 16 +++---- .../internal/unittests/test_component.py | 6 +++ .../component_with_code/component_spec.yaml | 38 ++++++++++++++++ .../scope-component/scope/convert2ss.script | 8 ++++ 6 files changed, 98 insertions(+), 46 deletions(-) create mode 100644 sdk/ml/azure-ai-ml/tests/test_configs/internal/component_with_code/component_spec.yaml create mode 100644 sdk/ml/azure-ai-ml/tests/test_configs/internal/scope-component/scope/convert2ss.script diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/entities/_additional_includes.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/entities/_additional_includes.py index 0712f66d318c..ad0d31e7c9a5 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/entities/_additional_includes.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/entities/_additional_includes.py @@ -16,23 +16,33 @@ class _AdditionalIncludes: def __init__(self, code_path: Union[None, str], yaml_path: str): - self.__yaml_path = Path(yaml_path) + self.__yaml_path = yaml_path self.__code_path = code_path self._tmp_code_path = None - self._includes = None - if self._additional_includes_file_path.is_file(): + self.__includes = None + + @property + def _includes(self): + if not self._additional_includes_file_path.is_file(): + return [] + if self.__includes is None: with open(self._additional_includes_file_path, "r") as f: lines = f.readlines() - self._includes = [line.strip() for line in lines if len(line.strip()) > 0] + self.__includes = [line.strip() for line in lines if len(line.strip()) > 0] + return self.__includes @property def with_includes(self): - return self._includes is not None + return len(self._includes) is not 0 @property def _yaml_path(self) -> Path: - return self.__yaml_path + if self.__yaml_path is None: + # if yaml path is not specified, use a not created + # temp file name + return Path(tempfile.mktemp()) + return Path(self.__yaml_path) @property def _code_path(self) -> Path: @@ -70,17 +80,17 @@ def _validate(self) -> MutableValidationResult: src_path = include_path.resolve() except OSError: error_msg = f"Failed to resolve additional include {additional_include} for {self._yaml_name}." - validation_result.append_error(error_msg) + validation_result.append_error(message=error_msg) continue if not src_path.exists(): error_msg = f"Unable to find additional include {additional_include} for {self._yaml_name}." - validation_result.append_error(error_msg) + validation_result.append_error(message=error_msg) continue if len(src_path.parents) == 0: error_msg = f"Root directory is not supported for additional includes for {self._yaml_name}." - validation_result.append_error(error_msg) + validation_result.append_error(message=error_msg) continue dst_path = Path(self._code_path) / src_path.name @@ -91,11 +101,11 @@ def _validate(self) -> MutableValidationResult: f"A symbolic link already exists for additional include {additional_include} " f"for {self._yaml_name}." ) - validation_result.append_error(error_msg) + validation_result.append_error(message=error_msg) continue elif dst_path.exists(): error_msg = f"A file already exists for additional include {additional_include} for {self._yaml_name}." - validation_result.append_error(error_msg) + validation_result.append_error(message=error_msg) return validation_result def resolve(self) -> None: diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/entities/component.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/entities/component.py index 25d2b633ba08..672f03cdc06b 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/entities/component.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/entities/component.py @@ -4,6 +4,7 @@ # pylint: disable=protected-access, redefined-builtin # disable redefined-builtin to use id/type as argument name from contextlib import contextmanager +from pathlib import Path from typing import Dict, Union import os @@ -177,30 +178,24 @@ def _additional_includes(self): def _create_schema_for_validation(cls, context) -> Union[PathAwareSchema, Schema]: return InternalBaseComponentSchema(context=context) - def _validate(self, raise_error=False) -> MutableValidationResult: - if not self._additional_includes.with_includes and self._additional_includes._validate().passed: - # update source path in case dependency file is in additional_includes - with self._resolve_local_code() as tmp_base_path: - origin_base_path, origin_source_path = self._base_path, self._source_path - - try: - self._base_path, self._source_path = \ - tmp_base_path, tmp_base_path / os.path.basename(self._source_path) - return super()._validate(raise_error=raise_error) - finally: - self._base_path, self._source_path = origin_base_path, origin_source_path - - return super()._validate(raise_error=raise_error) - def _customized_validate(self) -> MutableValidationResult: validation_result = super(InternalComponent, self)._customized_validate() + if self._additional_includes.with_includes: + validation_result.merge_with(self._additional_includes._validate()) + # resolving additional includes & update self._base_path can be dangerous, + # so we just skip path validation if additional_includes is used + # note that there will still be runtime error in submission or execution + skip_path_validation = True + else: + skip_path_validation = False if isinstance(self.environment, InternalEnvironment): validation_result.merge_with( - self.environment._validate(self._source_path), + self.environment._validate( + self._source_path, + skip_path_validation=skip_path_validation + ), field_name="environment", ) - if self._additional_includes.with_includes: - validation_result.merge_with(self._additional_includes._validate()) return validation_result @classmethod @@ -232,15 +227,10 @@ def _to_rest_object(self) -> ComponentVersionData: @contextmanager def _resolve_local_code(self): - # if `self._source_path` is None, component is not loaded from local yaml and - # no need to resolve - if self._source_path is None: - yield self.code - else: - self._additional_includes.resolve() - # use absolute path in case temp folder & work dir are in different drive - yield self._additional_includes.code.absolute() - self._additional_includes.cleanup() + self._additional_includes.resolve() + # use absolute path in case temp folder & work dir are in different drive + yield self._additional_includes.code.absolute() + self._additional_includes.cleanup() def __call__(self, *args, **kwargs) -> InternalBaseNode: # pylint: disable=useless-super-delegation return super(InternalComponent, self).__call__(*args, **kwargs) diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/entities/environment.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/entities/environment.py index 9250659f77db..69179ecc49e3 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/entities/environment.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_internal/entities/environment.py @@ -40,7 +40,7 @@ def __init__( def _parse_file_path(value: str) -> str: return value[len(FILE_PREFIX) :] if value.startswith(FILE_PREFIX) else value - def _validate_conda_section(self, source_path: str) -> MutableValidationResult: + def _validate_conda_section(self, source_path: str, skip_path_validation: bool) -> MutableValidationResult: validation_result = _ValidationResultBuilder.success() if not self.conda: return validation_result @@ -53,21 +53,21 @@ def _validate_conda_section(self, source_path: str) -> MutableValidationResult: ) if self.conda.get(self.CONDA_DEPENDENCIES_FILE): conda_dependencies_file = self.conda[self.CONDA_DEPENDENCIES_FILE] - if not (Path(source_path).parent / conda_dependencies_file).is_file(): + if not skip_path_validation and not (Path(source_path).parent / conda_dependencies_file).is_file(): validation_result.append_error( yaml_path=f"conda.{self.CONDA_DEPENDENCIES_FILE}", message=f"Cannot find conda dependencies file: {conda_dependencies_file!r}", ) if self.conda.get(self.PIP_REQUIREMENTS_FILE): pip_requirements_file = self.conda[self.PIP_REQUIREMENTS_FILE] - if not (Path(source_path).parent / pip_requirements_file).is_file(): + if not skip_path_validation and not (Path(source_path).parent / pip_requirements_file).is_file(): validation_result.append_error( yaml_path=f"conda.{self.PIP_REQUIREMENTS_FILE}", message=f"Cannot find pip requirements file: {pip_requirements_file!r}", ) return validation_result - def _validate_docker_section(self, source_path: str) -> MutableValidationResult: + def _validate_docker_section(self, source_path: str, skip_path_validation: bool) -> MutableValidationResult: validation_result = _ValidationResultBuilder.success() if not self.docker: return validation_result @@ -75,22 +75,22 @@ def _validate_docker_section(self, source_path: str) -> MutableValidationResult: return validation_result dockerfile_file = self.docker[self.BUILD][self.DOCKERFILE] dockerfile_file = self._parse_file_path(dockerfile_file) - if not (Path(source_path).parent / dockerfile_file).is_file(): + if not skip_path_validation and not (Path(source_path).parent / dockerfile_file).is_file(): validation_result.append_error( yaml_path=f"docker.{self.BUILD}.{self.DOCKERFILE}", message=f"Dockerfile not exists: {dockerfile_file}", ) return validation_result - def _validate(self, source_path: str) -> MutableValidationResult: + def _validate(self, source_path: str, skip_path_validation: bool = False) -> MutableValidationResult: validation_result = _ValidationResultBuilder.success() if self.os is not None and self.os not in {"Linux", "Windows"}: validation_result.append_error( yaml_path="os", message=f"Only support 'Linux' and 'Windows', but got {self.os!r}", ) - validation_result.merge_with(self._validate_conda_section(source_path)) - validation_result.merge_with(self._validate_docker_section(source_path)) + validation_result.merge_with(self._validate_conda_section(source_path, skip_path_validation)) + validation_result.merge_with(self._validate_docker_section(source_path, skip_path_validation)) return validation_result def _resolve_conda_section(self, source_path: str) -> None: diff --git a/sdk/ml/azure-ai-ml/tests/internal/unittests/test_component.py b/sdk/ml/azure-ai-ml/tests/internal/unittests/test_component.py index 54b6730ea01b..f6c4c546c8b8 100644 --- a/sdk/ml/azure-ai-ml/tests/internal/unittests/test_component.py +++ b/sdk/ml/azure-ai-ml/tests/internal/unittests/test_component.py @@ -29,6 +29,12 @@ def test_load_v2_component(self): yaml_path = "./tests/test_configs/components/helloworld_component.yml" load_component(yaml_path) + def test_validate_internal_component(self): + yaml_path = r"./tests/test_configs/internal/component_with_code/component_spec.yaml" + from azure.ai.ml.entities._validate_funcs import validate_component + validation_result = validate_component(yaml_path) + assert validation_result.passed, repr(validation_result) + def test_specific_error_message_on_load_from_dict(self): os.environ[AZUREML_INTERNAL_COMPONENTS_ENV_VAR] = "false" yaml_path = "./tests/test_configs/internal/helloworld/helloworld_component_command.yml" diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/internal/component_with_code/component_spec.yaml b/sdk/ml/azure-ai-ml/tests/test_configs/internal/component_with_code/component_spec.yaml new file mode 100644 index 000000000000..e65b13a79136 --- /dev/null +++ b/sdk/ml/azure-ai-ml/tests/test_configs/internal/component_with_code/component_spec.yaml @@ -0,0 +1,38 @@ +$schema: https://componentsdk.azureedge.net/jsonschema/ScopeComponent.json + +name: convert2ss +version: 0.0.1 +display_name: Convert Text to StructureStream + +type: ScopeComponent + +is_deterministic: True + +tags: + org: bing + project: relevance + +description: Convert adls test data to SS format + +inputs: + TextData: + type: [AnyFile, AnyDirectory] + description: text file on ADLS storage + ExtractionClause: + type: string + description: the extraction clause, something like "column1:string, column2:int" +outputs: + SSPath: + type: CosmosStructuredStream + description: the converted structured stream + +code: ./scope + +scope: + script: convert2ss.script + # to reference the inputs/outputs in your script + # you must define the argument name of your intpus/outputs in args section + args: >- + Output_SSPath {outputs.SSPath} + Input_TextData {inputs.TextData} + ExtractionClause {inputs.ExtractionClause} diff --git a/sdk/ml/azure-ai-ml/tests/test_configs/internal/scope-component/scope/convert2ss.script b/sdk/ml/azure-ai-ml/tests/test_configs/internal/scope-component/scope/convert2ss.script new file mode 100644 index 000000000000..ef41fa4deb83 --- /dev/null +++ b/sdk/ml/azure-ai-ml/tests/test_configs/internal/scope-component/scope/convert2ss.script @@ -0,0 +1,8 @@ +#DECLARE Output_stream string = @@Output_SSPath@@; +#DECLARE In_Data string =@"@@Input_TextData@@"; + +RawData = EXTRACT @@ExtractionClause@@ FROM @In_Data +USING DefaultTextExtractor(); + + +OUTPUT RawData TO SSTREAM @Output_stream;