Skip to content

Commit

Permalink
feat: skip path validation if with additional includes
Browse files Browse the repository at this point in the history
  • Loading branch information
elliotzh committed Oct 17, 2022
1 parent 7f1114c commit c72f4b2
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,33 @@

class _AdditionalIncludes:
def __init__(self, code_path: Union[None, str], yaml_path: str):
self.__yaml_path = Path(yaml_path)
self.__yaml_path = yaml_path
self.__code_path = code_path

self._tmp_code_path = None
self._includes = None
if self._additional_includes_file_path.is_file():
self.__includes = None

@property
def _includes(self):
if not self._additional_includes_file_path.is_file():
return []
if self.__includes is None:
with open(self._additional_includes_file_path, "r") as f:
lines = f.readlines()
self._includes = [line.strip() for line in lines if len(line.strip()) > 0]
self.__includes = [line.strip() for line in lines if len(line.strip()) > 0]
return self.__includes

@property
def with_includes(self):
return self._includes is not None
return len(self._includes) is not 0

@property
def _yaml_path(self) -> Path:
return self.__yaml_path
if self.__yaml_path is None:
# if yaml path is not specified, use a not created
# temp file name
return Path(tempfile.mktemp())
return Path(self.__yaml_path)

@property
def _code_path(self) -> Path:
Expand Down Expand Up @@ -70,17 +80,17 @@ def _validate(self) -> MutableValidationResult:
src_path = include_path.resolve()
except OSError:
error_msg = f"Failed to resolve additional include {additional_include} for {self._yaml_name}."
validation_result.append_error(error_msg)
validation_result.append_error(message=error_msg)
continue

if not src_path.exists():
error_msg = f"Unable to find additional include {additional_include} for {self._yaml_name}."
validation_result.append_error(error_msg)
validation_result.append_error(message=error_msg)
continue

if len(src_path.parents) == 0:
error_msg = f"Root directory is not supported for additional includes for {self._yaml_name}."
validation_result.append_error(error_msg)
validation_result.append_error(message=error_msg)
continue

dst_path = Path(self._code_path) / src_path.name
Expand All @@ -91,11 +101,11 @@ def _validate(self) -> MutableValidationResult:
f"A symbolic link already exists for additional include {additional_include} "
f"for {self._yaml_name}."
)
validation_result.append_error(error_msg)
validation_result.append_error(message=error_msg)
continue
elif dst_path.exists():
error_msg = f"A file already exists for additional include {additional_include} for {self._yaml_name}."
validation_result.append_error(error_msg)
validation_result.append_error(message=error_msg)
return validation_result

def resolve(self) -> None:
Expand Down
44 changes: 17 additions & 27 deletions sdk/ml/azure-ai-ml/azure/ai/ml/_internal/entities/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# pylint: disable=protected-access, redefined-builtin
# disable redefined-builtin to use id/type as argument name
from contextlib import contextmanager
from pathlib import Path
from typing import Dict, Union
import os

Expand Down Expand Up @@ -177,30 +178,24 @@ def _additional_includes(self):
def _create_schema_for_validation(cls, context) -> Union[PathAwareSchema, Schema]:
return InternalBaseComponentSchema(context=context)

def _validate(self, raise_error=False) -> MutableValidationResult:
if not self._additional_includes.with_includes and self._additional_includes._validate().passed:
# update source path in case dependency file is in additional_includes
with self._resolve_local_code() as tmp_base_path:
origin_base_path, origin_source_path = self._base_path, self._source_path

try:
self._base_path, self._source_path = \
tmp_base_path, tmp_base_path / os.path.basename(self._source_path)
return super()._validate(raise_error=raise_error)
finally:
self._base_path, self._source_path = origin_base_path, origin_source_path

return super()._validate(raise_error=raise_error)

def _customized_validate(self) -> MutableValidationResult:
validation_result = super(InternalComponent, self)._customized_validate()
if self._additional_includes.with_includes:
validation_result.merge_with(self._additional_includes._validate())
# resolving additional includes & update self._base_path can be dangerous,
# so we just skip path validation if additional_includes is used
# note that there will still be runtime error in submission or execution
skip_path_validation = True
else:
skip_path_validation = False
if isinstance(self.environment, InternalEnvironment):
validation_result.merge_with(
self.environment._validate(self._source_path),
self.environment._validate(
self._source_path,
skip_path_validation=skip_path_validation
),
field_name="environment",
)
if self._additional_includes.with_includes:
validation_result.merge_with(self._additional_includes._validate())
return validation_result

@classmethod
Expand Down Expand Up @@ -232,15 +227,10 @@ def _to_rest_object(self) -> ComponentVersionData:

@contextmanager
def _resolve_local_code(self):
# if `self._source_path` is None, component is not loaded from local yaml and
# no need to resolve
if self._source_path is None:
yield self.code
else:
self._additional_includes.resolve()
# use absolute path in case temp folder & work dir are in different drive
yield self._additional_includes.code.absolute()
self._additional_includes.cleanup()
self._additional_includes.resolve()
# use absolute path in case temp folder & work dir are in different drive
yield self._additional_includes.code.absolute()
self._additional_includes.cleanup()

def __call__(self, *args, **kwargs) -> InternalBaseNode: # pylint: disable=useless-super-delegation
return super(InternalComponent, self).__call__(*args, **kwargs)
16 changes: 8 additions & 8 deletions sdk/ml/azure-ai-ml/azure/ai/ml/_internal/entities/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def __init__(
def _parse_file_path(value: str) -> str:
return value[len(FILE_PREFIX) :] if value.startswith(FILE_PREFIX) else value

def _validate_conda_section(self, source_path: str) -> MutableValidationResult:
def _validate_conda_section(self, source_path: str, skip_path_validation: bool) -> MutableValidationResult:
validation_result = _ValidationResultBuilder.success()
if not self.conda:
return validation_result
Expand All @@ -53,44 +53,44 @@ def _validate_conda_section(self, source_path: str) -> MutableValidationResult:
)
if self.conda.get(self.CONDA_DEPENDENCIES_FILE):
conda_dependencies_file = self.conda[self.CONDA_DEPENDENCIES_FILE]
if not (Path(source_path).parent / conda_dependencies_file).is_file():
if not skip_path_validation and not (Path(source_path).parent / conda_dependencies_file).is_file():
validation_result.append_error(
yaml_path=f"conda.{self.CONDA_DEPENDENCIES_FILE}",
message=f"Cannot find conda dependencies file: {conda_dependencies_file!r}",
)
if self.conda.get(self.PIP_REQUIREMENTS_FILE):
pip_requirements_file = self.conda[self.PIP_REQUIREMENTS_FILE]
if not (Path(source_path).parent / pip_requirements_file).is_file():
if not skip_path_validation and not (Path(source_path).parent / pip_requirements_file).is_file():
validation_result.append_error(
yaml_path=f"conda.{self.PIP_REQUIREMENTS_FILE}",
message=f"Cannot find pip requirements file: {pip_requirements_file!r}",
)
return validation_result

def _validate_docker_section(self, source_path: str) -> MutableValidationResult:
def _validate_docker_section(self, source_path: str, skip_path_validation: bool) -> MutableValidationResult:
validation_result = _ValidationResultBuilder.success()
if not self.docker:
return validation_result
if not self.docker.get(self.BUILD) or not self.docker[self.BUILD].get(self.DOCKERFILE):
return validation_result
dockerfile_file = self.docker[self.BUILD][self.DOCKERFILE]
dockerfile_file = self._parse_file_path(dockerfile_file)
if not (Path(source_path).parent / dockerfile_file).is_file():
if not skip_path_validation and not (Path(source_path).parent / dockerfile_file).is_file():
validation_result.append_error(
yaml_path=f"docker.{self.BUILD}.{self.DOCKERFILE}",
message=f"Dockerfile not exists: {dockerfile_file}",
)
return validation_result

def _validate(self, source_path: str) -> MutableValidationResult:
def _validate(self, source_path: str, skip_path_validation: bool = False) -> MutableValidationResult:
validation_result = _ValidationResultBuilder.success()
if self.os is not None and self.os not in {"Linux", "Windows"}:
validation_result.append_error(
yaml_path="os",
message=f"Only support 'Linux' and 'Windows', but got {self.os!r}",
)
validation_result.merge_with(self._validate_conda_section(source_path))
validation_result.merge_with(self._validate_docker_section(source_path))
validation_result.merge_with(self._validate_conda_section(source_path, skip_path_validation))
validation_result.merge_with(self._validate_docker_section(source_path, skip_path_validation))
return validation_result

def _resolve_conda_section(self, source_path: str) -> None:
Expand Down
6 changes: 6 additions & 0 deletions sdk/ml/azure-ai-ml/tests/internal/unittests/test_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ def test_load_v2_component(self):
yaml_path = "./tests/test_configs/components/helloworld_component.yml"
load_component(yaml_path)

def test_validate_internal_component(self):
yaml_path = r"./tests/test_configs/internal/component_with_code/component_spec.yaml"
from azure.ai.ml.entities._validate_funcs import validate_component
validation_result = validate_component(yaml_path)
assert validation_result.passed, repr(validation_result)

def test_specific_error_message_on_load_from_dict(self):
os.environ[AZUREML_INTERNAL_COMPONENTS_ENV_VAR] = "false"
yaml_path = "./tests/test_configs/internal/helloworld/helloworld_component_command.yml"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
$schema: https://componentsdk.azureedge.net/jsonschema/ScopeComponent.json

name: convert2ss
version: 0.0.1
display_name: Convert Text to StructureStream

type: ScopeComponent

is_deterministic: True

tags:
org: bing
project: relevance

description: Convert adls test data to SS format

inputs:
TextData:
type: [AnyFile, AnyDirectory]
description: text file on ADLS storage
ExtractionClause:
type: string
description: the extraction clause, something like "column1:string, column2:int"
outputs:
SSPath:
type: CosmosStructuredStream
description: the converted structured stream

code: ./scope

scope:
script: convert2ss.script
# to reference the inputs/outputs in your script
# you must define the argument name of your intpus/outputs in args section
args: >-
Output_SSPath {outputs.SSPath}
Input_TextData {inputs.TextData}
ExtractionClause {inputs.ExtractionClause}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#DECLARE Output_stream string = @@Output_SSPath@@;
#DECLARE In_Data string =@"@@Input_TextData@@";

RawData = EXTRACT @@ExtractionClause@@ FROM @In_Data
USING DefaultTextExtractor();


OUTPUT RawData TO SSTREAM @Output_stream;

0 comments on commit c72f4b2

Please sign in to comment.