From a3f172f4df32999361f70f01ba158dd2c144fa5d Mon Sep 17 00:00:00 2001 From: Chen Sun Date: Wed, 4 Aug 2021 23:18:53 +0000 Subject: [PATCH] add HTML and Markdown artifact types --- sdk/python/kfp/dsl/io_types.py | 40 ++++++++++++++----- sdk/python/kfp/dsl/type_utils.py | 2 + sdk/python/kfp/dsl/type_utils_test.py | 12 ++++++ ...htweight_python_functions_v2_pipeline.json | 6 +-- ...ight_python_functions_v2_with_outputs.json | 10 ++--- .../test_data/pipeline_with_after.json | 2 +- .../pipeline_with_concat_placeholder.json | 2 +- .../test_data/pipeline_with_condition.json | 2 +- .../pipeline_with_custom_job_spec.json | 6 +-- .../test_data/pipeline_with_exit_handler.json | 8 ++-- .../pipeline_with_if_placeholder.json | 2 +- .../test_data/pipeline_with_importer.json | 6 +-- .../test_data/pipeline_with_loop_output.json | 2 +- .../pipeline_with_loop_parameter.json | 2 +- .../test_data/pipeline_with_loop_static.json | 2 +- .../pipeline_with_loops_and_conditions.json | 2 +- .../pipeline_with_metrics_outputs.json | 6 +-- .../pipeline_with_nested_conditions.json | 2 +- .../pipeline_with_nested_conditions_yaml.json | 2 +- .../test_data/pipeline_with_nested_loops.json | 2 +- .../test_data/pipeline_with_ontology.json | 2 +- ...ipeline_with_params_containing_format.json | 2 +- .../pipeline_with_resource_spec.json | 2 +- .../pipeline_with_reused_component.json | 2 +- .../pipeline_with_various_io_types.json | 24 +++++++++-- .../pipeline_with_various_io_types.py | 5 +++ .../test_data/two_step_pipeline.json | 2 +- .../test_data/xgboost_sample_pipeline.json | 2 +- 28 files changed, 109 insertions(+), 50 deletions(-) diff --git a/sdk/python/kfp/dsl/io_types.py b/sdk/python/kfp/dsl/io_types.py index e32c0c86540..3e74b1f0574 100644 --- a/sdk/python/kfp/dsl/io_types.py +++ b/sdk/python/kfp/dsl/io_types.py @@ -19,7 +19,6 @@ import os from typing import Dict, Generic, List, Optional, Type, TypeVar, Union - _GCS_LOCAL_MOUNT_PREFIX = '/gcs/' _MINIO_LOCAL_MOUNT_PREFIX = '/minio/' _S3_LOCAL_MOUNT_PREFIX = '/s3/' @@ -399,6 +398,28 @@ def load_confusion_matrix(self, slice: str, categories: List[str], self._update_metadata(slice) +class HTML(Artifact): + """An artifact representing an HTML file.""" + TYPE_NAME = 'system.HTML' + + def __init__(self, + name: Optional[str] = None, + uri: Optional[str] = None, + metadata: Optional[Dict] = None): + super().__init__(uri=uri, name=name, metadata=metadata) + + +class Markdown(Artifact): + """An artifact representing an Markdown file.""" + TYPE_NAME = 'system.Markdown' + + def __init__(self, + name: Optional[str] = None, + uri: Optional[str] = None, + metadata: Optional[Dict] = None): + super().__init__(uri=uri, name=name, metadata=metadata) + + T = TypeVar('T') @@ -407,7 +428,6 @@ class InputAnnotation(): pass - class OutputAnnotation(): """Marker type for output artifacts.""" pass @@ -419,7 +439,6 @@ class OutputAnnotation(): # Input = typing.Annotated[T, InputAnnotation] # Output = typing.Annotated[T, OutputAnnotation] - # Input represents an Input artifact of type T. Input = Union[T, InputAnnotation] @@ -430,16 +449,16 @@ class OutputAnnotation(): def is_artifact_annotation(typ) -> bool: if hasattr(typ, '_subs_tree'): # Python 3.6 subs_tree = typ._subs_tree() - return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [InputAnnotation, OutputAnnotation] + return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [ + InputAnnotation, OutputAnnotation + ] if not hasattr(typ, '__origin__'): return False - if typ.__origin__ != Union and type(typ.__origin__) != type(Union): return False - if not hasattr(typ, '__args__') or len(typ.__args__) != 2: return False @@ -448,6 +467,7 @@ def is_artifact_annotation(typ) -> bool: return True + def is_input_artifact(typ) -> bool: """Returns True if typ is of type Input[T].""" if not is_artifact_annotation(typ): @@ -455,10 +475,11 @@ def is_input_artifact(typ) -> bool: if hasattr(typ, '_subs_tree'): # Python 3.6 subs_tree = typ._subs_tree() - return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation + return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation return typ.__args__[1] == InputAnnotation + def is_output_artifact(typ) -> bool: """Returns True if typ is of type Output[T].""" if not is_artifact_annotation(typ): @@ -466,10 +487,11 @@ def is_output_artifact(typ) -> bool: if hasattr(typ, '_subs_tree'): # Python 3.6 subs_tree = typ._subs_tree() - return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation + return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation return typ.__args__[1] == OutputAnnotation + def get_io_artifact_class(typ): if not is_artifact_annotation(typ): return None @@ -484,6 +506,7 @@ def get_io_artifact_class(typ): return typ.__args__[0] + def get_io_artifact_annotation(typ): if not is_artifact_annotation(typ): return None @@ -497,7 +520,6 @@ def get_io_artifact_annotation(typ): return typ.__args__[1] - _SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = { x.TYPE_NAME: x for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics] diff --git a/sdk/python/kfp/dsl/type_utils.py b/sdk/python/kfp/dsl/type_utils.py index bca5b4b2269..ca8deb99594 100644 --- a/sdk/python/kfp/dsl/type_utils.py +++ b/sdk/python/kfp/dsl/type_utils.py @@ -27,6 +27,8 @@ 'metrics': io_types.Metrics, 'classificationmetrics': io_types.ClassificationMetrics, 'slicedclassificationmetrics': io_types.SlicedClassificationMetrics, + 'html': io_types.HTML, + 'markdown': io_types.Markdown, } # ComponentSpec I/O types to (IR) PipelineTaskSpec I/O types mapping. diff --git a/sdk/python/kfp/dsl/type_utils_test.py b/sdk/python/kfp/dsl/type_utils_test.py index 160e603da38..ed145343fbd 100644 --- a/sdk/python/kfp/dsl/type_utils_test.py +++ b/sdk/python/kfp/dsl/type_utils_test.py @@ -129,6 +129,18 @@ def test_is_parameter_type(self): 'expected_result': pb.ArtifactTypeSchema(schema_title='system.Artifact') }, + { + 'artifact_class_or_type_name': + io_types.HTML, + 'expected_result': + pb.ArtifactTypeSchema(schema_title='system.HTML') + }, + { + 'artifact_class_or_type_name': + io_types.Markdown, + 'expected_result': + pb.ArtifactTypeSchema(schema_title='system.Markdown') + }, ) def test_get_artifact_type_schema(self, artifact_class_or_type_name, expected_result): diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/lightweight_python_functions_v2_pipeline.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/lightweight_python_functions_v2_pipeline.json index 6de0e17d8ca..be0afad4841 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/lightweight_python_functions_v2_pipeline.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/lightweight_python_functions_v2_pipeline.json @@ -111,7 +111,7 @@ "sh", "-ec", "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", - "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [InputAnnotation, OutputAnnotation]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef preprocess(\n # An input parameter of type string.\n message: str,\n # Use Output[T] to get a metadata-rich handle to the output artifact\n # of type `Dataset`.\n output_dataset_one: Output[Dataset],\n # A locally accessible filepath for another output artifact of type\n # `Dataset`.\n output_dataset_two_path: OutputPath('Dataset'),\n # A locally accessible filepath for an output parameter of type string.\n output_parameter_path: OutputPath(str),\n # A locally accessible filepath for an output parameter of type bool.\n output_bool_parameter_path: OutputPath(bool),\n # A locally accessible filepath for an output parameter of type dict.\n output_dict_parameter_path: OutputPath(Dict[str, int]),\n # A locally accessible filepath for an output parameter of type list.\n output_list_parameter_path: OutputPath(List[str]),\n):\n \"\"\"Dummy preprocessing step\"\"\"\n\n # Use Dataset.path to access a local file path for writing.\n # One can also use Dataset.uri to access the actual URI file path.\n with open(output_dataset_one.path, 'w') as f:\n f.write(message)\n\n # OutputPath is used to just pass the local file path of the output artifact\n # to the function.\n with open(output_dataset_two_path, 'w') as f:\n f.write(message)\n\n with open(output_parameter_path, 'w') as f:\n f.write(message)\n\n with open(output_bool_parameter_path, 'w') as f:\n f.write(str(True)) # use either `str()` or `json.dumps()` for bool values.\n\n import json\n with open(output_dict_parameter_path, 'w') as f:\n f.write(json.dumps({'A': 1, 'B': 2}))\n\n with open(output_list_parameter_path, 'w') as f:\n f.write(json.dumps(['a', 'b', 'c']))\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" + "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nclass HTML(Artifact):\n \"\"\"An artifact representing an HTML file.\"\"\"\n TYPE_NAME = 'system.HTML'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Markdown(Artifact):\n \"\"\"An artifact representing an Markdown file.\"\"\"\n TYPE_NAME = 'system.Markdown'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [\n InputAnnotation, OutputAnnotation\n ]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef preprocess(\n # An input parameter of type string.\n message: str,\n # Use Output[T] to get a metadata-rich handle to the output artifact\n # of type `Dataset`.\n output_dataset_one: Output[Dataset],\n # A locally accessible filepath for another output artifact of type\n # `Dataset`.\n output_dataset_two_path: OutputPath('Dataset'),\n # A locally accessible filepath for an output parameter of type string.\n output_parameter_path: OutputPath(str),\n # A locally accessible filepath for an output parameter of type bool.\n output_bool_parameter_path: OutputPath(bool),\n # A locally accessible filepath for an output parameter of type dict.\n output_dict_parameter_path: OutputPath(Dict[str, int]),\n # A locally accessible filepath for an output parameter of type list.\n output_list_parameter_path: OutputPath(List[str]),\n):\n \"\"\"Dummy preprocessing step\"\"\"\n\n # Use Dataset.path to access a local file path for writing.\n # One can also use Dataset.uri to access the actual URI file path.\n with open(output_dataset_one.path, 'w') as f:\n f.write(message)\n\n # OutputPath is used to just pass the local file path of the output artifact\n # to the function.\n with open(output_dataset_two_path, 'w') as f:\n f.write(message)\n\n with open(output_parameter_path, 'w') as f:\n f.write(message)\n\n with open(output_bool_parameter_path, 'w') as f:\n f.write(str(True)) # use either `str()` or `json.dumps()` for bool values.\n\n import json\n with open(output_dict_parameter_path, 'w') as f:\n f.write(json.dumps({'A': 1, 'B': 2}))\n\n with open(output_list_parameter_path, 'w') as f:\n f.write(json.dumps(['a', 'b', 'c']))\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" ], "image": "python:3.7" } @@ -144,7 +144,7 @@ "sh", "-ec", "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", - "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [InputAnnotation, OutputAnnotation]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef train(\n # Use InputPath to get a locally accessible path for the input artifact\n # of type `Dataset`.\n dataset_one_path: InputPath('Dataset'),\n # Use Input[T] to get a metadata-rich handle to the input artifact\n # of type `Dataset`.\n dataset_two: Input[Dataset],\n # An input parameter of type string.\n message: str,\n # Use Output[T] to get a metadata-rich handle to the output artifact\n # of type `Dataset`.\n model: Output[Model],\n # An input parameter of type bool.\n input_bool: bool,\n # An input parameter of type dict.\n input_dict: Dict[str, int],\n # An input parameter of type List[str].\n input_list: List[str],\n # An input parameter of type int with a default value.\n num_steps: int = 100,\n):\n \"\"\"Dummy Training step\"\"\"\n with open(dataset_one_path, 'r') as input_file:\n dataset_one_contents = input_file.read()\n\n with open(dataset_two.path, 'r') as input_file:\n dataset_two_contents = input_file.read()\n\n line = (f'dataset_one_contents: {dataset_one_contents} || '\n f'dataset_two_contents: {dataset_two_contents} || '\n f'message: {message} || '\n f'input_bool: {input_bool}, type {type(input_bool)} || '\n f'input_dict: {input_dict}, type {type(input_dict)} || '\n f'input_list: {input_list}, type {type(input_list)} \\n')\n\n with open(model.path, 'w') as output_file:\n for i in range(num_steps):\n output_file.write('Step {}\\n{}\\n=====\\n'.format(i, line))\n\n # Use model.get() to get a Model artifact, which has a .metadata dictionary\n # to store arbitrary metadata for the output artifact.\n model.metadata['accuracy'] = 0.9\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" + "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nclass HTML(Artifact):\n \"\"\"An artifact representing an HTML file.\"\"\"\n TYPE_NAME = 'system.HTML'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Markdown(Artifact):\n \"\"\"An artifact representing an Markdown file.\"\"\"\n TYPE_NAME = 'system.Markdown'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [\n InputAnnotation, OutputAnnotation\n ]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef train(\n # Use InputPath to get a locally accessible path for the input artifact\n # of type `Dataset`.\n dataset_one_path: InputPath('Dataset'),\n # Use Input[T] to get a metadata-rich handle to the input artifact\n # of type `Dataset`.\n dataset_two: Input[Dataset],\n # An input parameter of type string.\n message: str,\n # Use Output[T] to get a metadata-rich handle to the output artifact\n # of type `Dataset`.\n model: Output[Model],\n # An input parameter of type bool.\n input_bool: bool,\n # An input parameter of type dict.\n input_dict: Dict[str, int],\n # An input parameter of type List[str].\n input_list: List[str],\n # An input parameter of type int with a default value.\n num_steps: int = 100,\n):\n \"\"\"Dummy Training step\"\"\"\n with open(dataset_one_path, 'r') as input_file:\n dataset_one_contents = input_file.read()\n\n with open(dataset_two.path, 'r') as input_file:\n dataset_two_contents = input_file.read()\n\n line = (f'dataset_one_contents: {dataset_one_contents} || '\n f'dataset_two_contents: {dataset_two_contents} || '\n f'message: {message} || '\n f'input_bool: {input_bool}, type {type(input_bool)} || '\n f'input_dict: {input_dict}, type {type(input_dict)} || '\n f'input_list: {input_list}, type {type(input_list)} \\n')\n\n with open(model.path, 'w') as output_file:\n for i in range(num_steps):\n output_file.write('Step {}\\n{}\\n=====\\n'.format(i, line))\n\n # Use model.get() to get a Model artifact, which has a .metadata dictionary\n # to store arbitrary metadata for the output artifact.\n model.metadata['accuracy'] = 0.9\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" ], "image": "python:3.7" } @@ -249,7 +249,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root" diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/lightweight_python_functions_v2_with_outputs.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/lightweight_python_functions_v2_with_outputs.json index 78af79e05cc..69febe0694e 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/lightweight_python_functions_v2_with_outputs.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/lightweight_python_functions_v2_with_outputs.json @@ -115,7 +115,7 @@ "sh", "-ec", "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", - "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [InputAnnotation, OutputAnnotation]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef add_numbers(first: int, second: int) -> int:\n return first + second\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" + "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nclass HTML(Artifact):\n \"\"\"An artifact representing an HTML file.\"\"\"\n TYPE_NAME = 'system.HTML'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Markdown(Artifact):\n \"\"\"An artifact representing an Markdown file.\"\"\"\n TYPE_NAME = 'system.Markdown'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [\n InputAnnotation, OutputAnnotation\n ]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef add_numbers(first: int, second: int) -> int:\n return first + second\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" ], "image": "python:3.7" } @@ -138,7 +138,7 @@ "sh", "-ec", "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", - "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [InputAnnotation, OutputAnnotation]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef concat_message(first: str, second: str) -> str:\n return first + second\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" + "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nclass HTML(Artifact):\n \"\"\"An artifact representing an HTML file.\"\"\"\n TYPE_NAME = 'system.HTML'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Markdown(Artifact):\n \"\"\"An artifact representing an Markdown file.\"\"\"\n TYPE_NAME = 'system.Markdown'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [\n InputAnnotation, OutputAnnotation\n ]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef concat_message(first: str, second: str) -> str:\n return first + second\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" ], "image": "python:3.7" } @@ -161,7 +161,7 @@ "sh", "-ec", "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", - "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [InputAnnotation, OutputAnnotation]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef output_artifact(number: int, message: str) -> Dataset:\n result = [message for _ in range(number)]\n return '\\n'.join(result)\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" + "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nclass HTML(Artifact):\n \"\"\"An artifact representing an HTML file.\"\"\"\n TYPE_NAME = 'system.HTML'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Markdown(Artifact):\n \"\"\"An artifact representing an Markdown file.\"\"\"\n TYPE_NAME = 'system.Markdown'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [\n InputAnnotation, OutputAnnotation\n ]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef output_artifact(number: int, message: str) -> Dataset:\n result = [message for _ in range(number)]\n return '\\n'.join(result)\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" ], "image": "python:3.7" } @@ -186,7 +186,7 @@ "sh", "-ec", "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", - "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [InputAnnotation, OutputAnnotation]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef output_named_tuple(\n artifact: Input[Dataset]\n) -> NamedTuple('Outputs', [\n ('scalar', str),\n ('metrics', Metrics),\n ('model', Model),\n]):\n scalar = \"123\"\n\n import json\n metrics = json.dumps({\n 'metrics': [{\n 'name': 'accuracy',\n 'numberValue': 0.9,\n 'format': \"PERCENTAGE\",\n }]\n })\n\n with open(artifact.path, 'r') as f:\n artifact_contents = f.read()\n model = \"Model contents: \" + artifact_contents\n\n from collections import namedtuple\n output = namedtuple('Outputs', ['scalar', 'metrics', 'model'])\n return output(scalar, metrics, model)\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" + "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nclass HTML(Artifact):\n \"\"\"An artifact representing an HTML file.\"\"\"\n TYPE_NAME = 'system.HTML'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Markdown(Artifact):\n \"\"\"An artifact representing an Markdown file.\"\"\"\n TYPE_NAME = 'system.Markdown'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [\n InputAnnotation, OutputAnnotation\n ]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef output_named_tuple(\n artifact: Input[Dataset]\n) -> NamedTuple('Outputs', [\n ('scalar', str),\n ('metrics', Metrics),\n ('model', Model),\n]):\n scalar = \"123\"\n\n import json\n metrics = json.dumps({\n 'metrics': [{\n 'name': 'accuracy',\n 'numberValue': 0.9,\n 'format': \"PERCENTAGE\",\n }]\n })\n\n with open(artifact.path, 'r') as f:\n artifact_contents = f.read()\n model = \"Model contents: \" + artifact_contents\n\n from collections import namedtuple\n output = namedtuple('Outputs', ['scalar', 'metrics', 'model'])\n return output(scalar, metrics, model)\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" ], "image": "python:3.7" } @@ -337,7 +337,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root" diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_after.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_after.json index da2d46ccc29..94f265b35b4 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_after.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_after.json @@ -152,7 +152,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root" diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_concat_placeholder.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_concat_placeholder.json index 3fac03538e1..e02bb84efc2 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_concat_placeholder.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_concat_placeholder.json @@ -57,7 +57,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root" diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_condition.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_condition.json index 0e3942ea646..75f0ce06524 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_condition.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_condition.json @@ -275,7 +275,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root", diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_custom_job_spec.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_custom_job_spec.json index af8a841caf2..eed44c591ca 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_custom_job_spec.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_custom_job_spec.json @@ -44,7 +44,7 @@ "sh", "-ec", "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", - "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [InputAnnotation, OutputAnnotation]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef training_op(input1: str):\n print('dummy training master: {}'.format(input1))\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" + "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nclass HTML(Artifact):\n \"\"\"An artifact representing an HTML file.\"\"\"\n TYPE_NAME = 'system.HTML'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Markdown(Artifact):\n \"\"\"An artifact representing an Markdown file.\"\"\"\n TYPE_NAME = 'system.Markdown'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [\n InputAnnotation, OutputAnnotation\n ]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef training_op(input1: str):\n print('dummy training master: {}'.format(input1))\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" ], "imageUri": "python:3.7" }, @@ -67,7 +67,7 @@ "sh", "-ec", "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", - "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [InputAnnotation, OutputAnnotation]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef training_op(input1: str):\n print('dummy training master: {}'.format(input1))\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" + "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nclass HTML(Artifact):\n \"\"\"An artifact representing an HTML file.\"\"\"\n TYPE_NAME = 'system.HTML'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Markdown(Artifact):\n \"\"\"An artifact representing an Markdown file.\"\"\"\n TYPE_NAME = 'system.Markdown'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [\n InputAnnotation, OutputAnnotation\n ]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef training_op(input1: str):\n print('dummy training master: {}'.format(input1))\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" ], "imageUri": "python:3.7" }, @@ -163,7 +163,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root" diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_exit_handler.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_exit_handler.json index ac417bbdf4d..1b08fdbc3f4 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_exit_handler.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_exit_handler.json @@ -101,7 +101,7 @@ "sh", "-ec", "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", - "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [InputAnnotation, OutputAnnotation]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef fail_op(message: str):\n \"\"\"Fails.\"\"\"\n import sys\n print(message)\n sys.exit(1)\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" + "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nclass HTML(Artifact):\n \"\"\"An artifact representing an HTML file.\"\"\"\n TYPE_NAME = 'system.HTML'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Markdown(Artifact):\n \"\"\"An artifact representing an Markdown file.\"\"\"\n TYPE_NAME = 'system.Markdown'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [\n InputAnnotation, OutputAnnotation\n ]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef fail_op(message: str):\n \"\"\"Fails.\"\"\"\n import sys\n print(message)\n sys.exit(1)\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" ], "image": "python:3.7" } @@ -120,7 +120,7 @@ "sh", "-ec", "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", - "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [InputAnnotation, OutputAnnotation]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef print_op(message: str):\n \"\"\"Prints a message.\"\"\"\n print(message)\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" + "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nclass HTML(Artifact):\n \"\"\"An artifact representing an HTML file.\"\"\"\n TYPE_NAME = 'system.HTML'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Markdown(Artifact):\n \"\"\"An artifact representing an Markdown file.\"\"\"\n TYPE_NAME = 'system.Markdown'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [\n InputAnnotation, OutputAnnotation\n ]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef print_op(message: str):\n \"\"\"Prints a message.\"\"\"\n print(message)\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" ], "image": "python:3.7" } @@ -139,7 +139,7 @@ "sh", "-ec", "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", - "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [InputAnnotation, OutputAnnotation]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef print_op(message: str):\n \"\"\"Prints a message.\"\"\"\n print(message)\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" + "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nclass HTML(Artifact):\n \"\"\"An artifact representing an HTML file.\"\"\"\n TYPE_NAME = 'system.HTML'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Markdown(Artifact):\n \"\"\"An artifact representing an Markdown file.\"\"\"\n TYPE_NAME = 'system.Markdown'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [\n InputAnnotation, OutputAnnotation\n ]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef print_op(message: str):\n \"\"\"Prints a message.\"\"\"\n print(message)\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" ], "image": "python:3.7" } @@ -203,7 +203,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root", diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_if_placeholder.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_if_placeholder.json index a4a735f488e..db47f10674b 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_if_placeholder.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_if_placeholder.json @@ -76,7 +76,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root" diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_importer.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_importer.json index 38e945e3424..ddc86636d3b 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_importer.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_importer.json @@ -192,7 +192,7 @@ "sh", "-ec", "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", - "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [InputAnnotation, OutputAnnotation]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef train(\n dataset: Input[Dataset]\n) -> NamedTuple('Outputs', [\n ('scalar', str),\n ('model', Model),\n]):\n \"\"\"Dummy Training step.\"\"\"\n with open(dataset.path, 'r') as f:\n data = f.read()\n print('Dataset:', data)\n\n scalar = '123'\n model = 'My model trained using data: {}'.format(data)\n\n from collections import namedtuple\n output = namedtuple('Outputs', ['scalar', 'model'])\n return output(scalar, model)\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" + "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nclass HTML(Artifact):\n \"\"\"An artifact representing an HTML file.\"\"\"\n TYPE_NAME = 'system.HTML'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Markdown(Artifact):\n \"\"\"An artifact representing an Markdown file.\"\"\"\n TYPE_NAME = 'system.Markdown'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [\n InputAnnotation, OutputAnnotation\n ]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef train(\n dataset: Input[Dataset]\n) -> NamedTuple('Outputs', [\n ('scalar', str),\n ('model', Model),\n]):\n \"\"\"Dummy Training step.\"\"\"\n with open(dataset.path, 'r') as f:\n data = f.read()\n print('Dataset:', data)\n\n scalar = '123'\n model = 'My model trained using data: {}'.format(data)\n\n from collections import namedtuple\n output = namedtuple('Outputs', ['scalar', 'model'])\n return output(scalar, model)\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" ], "image": "python:3.7" } @@ -215,7 +215,7 @@ "sh", "-ec", "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", - "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [InputAnnotation, OutputAnnotation]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef train(\n dataset: Input[Dataset]\n) -> NamedTuple('Outputs', [\n ('scalar', str),\n ('model', Model),\n]):\n \"\"\"Dummy Training step.\"\"\"\n with open(dataset.path, 'r') as f:\n data = f.read()\n print('Dataset:', data)\n\n scalar = '123'\n model = 'My model trained using data: {}'.format(data)\n\n from collections import namedtuple\n output = namedtuple('Outputs', ['scalar', 'model'])\n return output(scalar, model)\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" + "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nclass HTML(Artifact):\n \"\"\"An artifact representing an HTML file.\"\"\"\n TYPE_NAME = 'system.HTML'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Markdown(Artifact):\n \"\"\"An artifact representing an Markdown file.\"\"\"\n TYPE_NAME = 'system.Markdown'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [\n InputAnnotation, OutputAnnotation\n ]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef train(\n dataset: Input[Dataset]\n) -> NamedTuple('Outputs', [\n ('scalar', str),\n ('model', Model),\n]):\n \"\"\"Dummy Training step.\"\"\"\n with open(dataset.path, 'r') as f:\n data = f.read()\n print('Dataset:', data)\n\n scalar = '123'\n model = 'My model trained using data: {}'.format(data)\n\n from collections import namedtuple\n output = namedtuple('Outputs', ['scalar', 'model'])\n return output(scalar, model)\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" ], "image": "python:3.7" } @@ -312,7 +312,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root", diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_loop_output.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_loop_output.json index f113c601c64..cb7056df7d1 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_loop_output.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_loop_output.json @@ -226,7 +226,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root" diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_loop_parameter.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_loop_parameter.json index cc43a2e3e4a..27e2e4d80d8 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_loop_parameter.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_loop_parameter.json @@ -191,7 +191,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root", diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_loop_static.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_loop_static.json index dab99349378..e803dfd2ab4 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_loop_static.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_loop_static.json @@ -174,7 +174,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root" diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_loops_and_conditions.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_loops_and_conditions.json index 8a597cefcab..e5237831a2f 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_loops_and_conditions.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_loops_and_conditions.json @@ -678,7 +678,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root", diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_metrics_outputs.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_metrics_outputs.json index 867a8c07dfb..4e0e21ccea5 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_metrics_outputs.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_metrics_outputs.json @@ -87,7 +87,7 @@ "sh", "-ec", "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", - "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [InputAnnotation, OutputAnnotation]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef output_metrics(metrics: Output[Metrics]):\n \"\"\"Dummy component that outputs metrics with a random accuracy.\"\"\"\n import random\n result = random.randint(0, 100)\n metrics.log_metric('accuracy', result)\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" + "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nclass HTML(Artifact):\n \"\"\"An artifact representing an HTML file.\"\"\"\n TYPE_NAME = 'system.HTML'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Markdown(Artifact):\n \"\"\"An artifact representing an Markdown file.\"\"\"\n TYPE_NAME = 'system.Markdown'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [\n InputAnnotation, OutputAnnotation\n ]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef output_metrics(metrics: Output[Metrics]):\n \"\"\"Dummy component that outputs metrics with a random accuracy.\"\"\"\n import random\n result = random.randint(0, 100)\n metrics.log_metric('accuracy', result)\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" ], "image": "python:3.7" } @@ -106,7 +106,7 @@ "sh", "-ec", "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", - "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [InputAnnotation, OutputAnnotation]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef output_metrics(metrics: Output[Metrics]):\n \"\"\"Dummy component that outputs metrics with a random accuracy.\"\"\"\n import random\n result = random.randint(0, 100)\n metrics.log_metric('accuracy', result)\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" + "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nclass HTML(Artifact):\n \"\"\"An artifact representing an HTML file.\"\"\"\n TYPE_NAME = 'system.HTML'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Markdown(Artifact):\n \"\"\"An artifact representing an Markdown file.\"\"\"\n TYPE_NAME = 'system.Markdown'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [\n InputAnnotation, OutputAnnotation\n ]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef output_metrics(metrics: Output[Metrics]):\n \"\"\"Dummy component that outputs metrics with a random accuracy.\"\"\"\n import random\n result = random.randint(0, 100)\n metrics.log_metric('accuracy', result)\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" ], "image": "python:3.7" } @@ -182,7 +182,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root" diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_nested_conditions.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_nested_conditions.json index 054fd67207e..ca7d509f746 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_nested_conditions.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_nested_conditions.json @@ -452,7 +452,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root" diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_nested_conditions_yaml.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_nested_conditions_yaml.json index 5d23c2f94a7..d6854cbd441 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_nested_conditions_yaml.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_nested_conditions_yaml.json @@ -544,7 +544,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root" diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_nested_loops.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_nested_loops.json index 747eb9ce437..fc6e8074f23 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_nested_loops.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_nested_loops.json @@ -142,7 +142,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root", diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_ontology.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_ontology.json index d770ce0163a..962bd0f8201 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_ontology.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_ontology.json @@ -153,7 +153,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root", diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_params_containing_format.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_params_containing_format.json index 1319543235f..ebcfa635387 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_params_containing_format.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_params_containing_format.json @@ -251,7 +251,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root", diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_resource_spec.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_resource_spec.json index 254bb92dc9a..56f780a606f 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_resource_spec.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_resource_spec.json @@ -161,7 +161,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root", diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_reused_component.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_reused_component.json index 9fc8b035dae..eb7673ef028 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_reused_component.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_reused_component.json @@ -208,7 +208,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root", diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_various_io_types.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_various_io_types.json index 894fe25b4a5..a5f64993700 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_various_io_types.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_various_io_types.json @@ -34,6 +34,11 @@ "artifactType": { "schemaTitle": "system.Artifact" } + }, + "input_h": { + "artifactType": { + "schemaTitle": "system.HTML" + } } }, "parameters": { @@ -92,6 +97,11 @@ "artifactType": { "schemaTitle": "system.Artifact" } + }, + "output_8": { + "artifactType": { + "schemaTitle": "system.HTML" + } } }, "parameters": { @@ -113,7 +123,8 @@ "{{$.inputs.artifacts['input_d'].uri}}", "{{$.inputs.artifacts['input_e'].uri}}", "{{$.inputs.artifacts['input_f'].path}}", - "{{$.inputs.artifacts['input_g'].path}}" + "{{$.inputs.artifacts['input_g'].path}}", + "{{$.inputs.artifacts['input_h'].path}}" ], "image": "gcr.io/image" } @@ -131,7 +142,8 @@ "{{$.outputs.artifacts['output_4'].uri}}", "{{$.outputs.artifacts['output_5'].uri}}", "{{$.outputs.artifacts['output_6'].path}}", - "{{$.outputs.artifacts['output_7'].path}}" + "{{$.outputs.artifacts['output_7'].path}}", + "{{$.outputs.artifacts['output_8'].path}}" ], "image": "gcr.io/image" } @@ -191,6 +203,12 @@ "outputArtifactKey": "output_7", "producerTask": "upstream" } + }, + "input_h": { + "taskOutputArtifact": { + "outputArtifactKey": "output_8", + "producerTask": "upstream" + } } }, "parameters": { @@ -254,7 +272,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root", diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_various_io_types.py b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_various_io_types.py index 79f3ba25cb1..cb94a843231 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_various_io_types.py +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/pipeline_with_various_io_types.py @@ -33,6 +33,7 @@ - {name: output_5, type: Datasets} - {name: output_6, type: Some arbitrary type} - {name: output_7, type: {GcsPath: {data_type: TSV}}} +- {name: output_8, type: HTML} implementation: container: image: gcr.io/image @@ -48,6 +49,7 @@ - {outputUri: output_5} - {outputPath: output_6} - {outputPath: output_7} + - {outputPath: output_8} """) component_op_2 = components.load_component_from_text(""" @@ -60,6 +62,7 @@ - {name: input_e, type: Datasets} - {name: input_f, type: Some arbitrary type} - {name: input_g, type: {GcsPath: {data_type: TSV}}} +- {name: input_h, type: HTML} implementation: container: image: gcr.io/image @@ -71,6 +74,7 @@ - {inputUri: input_e} - {inputPath: input_f} - {inputPath: input_g} + - {inputPath: input_h} """) @@ -92,6 +96,7 @@ def my_pipeline(input1: str, input_e=component_1.outputs['output_5'], input_f=component_1.outputs['output_6'], input_g=component_1.outputs['output_7'], + input_h=component_1.outputs['output_8'], ) diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/two_step_pipeline.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/two_step_pipeline.json index 346c7030a7f..e86a6136416 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/two_step_pipeline.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/two_step_pipeline.json @@ -119,7 +119,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root", diff --git a/sdk/python/kfp/v2/compiler_cli_tests/test_data/xgboost_sample_pipeline.json b/sdk/python/kfp/v2/compiler_cli_tests/test_data/xgboost_sample_pipeline.json index a34723319e6..075df09d12d 100644 --- a/sdk/python/kfp/v2/compiler_cli_tests/test_data/xgboost_sample_pipeline.json +++ b/sdk/python/kfp/v2/compiler_cli_tests/test_data/xgboost_sample_pipeline.json @@ -867,7 +867,7 @@ } }, "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.6.4" + "sdkVersion": "kfp-1.6.6" }, "runtimeConfig": { "gcsOutputDirectory": "dummy_root"