From 98aa248f12e36826d7fbb3de3e0b1117626c9bf0 Mon Sep 17 00:00:00 2001 From: William Parsley Date: Fri, 23 Jun 2023 14:17:52 -0500 Subject: [PATCH 01/32] change package name, create branch --- setup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index aee2ed0b3e..5ab3d3903c 100644 --- a/setup.py +++ b/setup.py @@ -35,10 +35,10 @@ from distutils.command.build_py import build_py from distutils.core import setup -NAME = "feast" -DESCRIPTION = "Python SDK for Feast" -URL = "https://github.com/feast-dev/feast" -AUTHOR = "Feast" +NAME = "eg-feast" +DESCRIPTION = "EG-specific Python SDK for Feast" +URL = "https://github.com/ExpediaGroup/feast" +AUTHOR = "Feast and EG" REQUIRES_PYTHON = ">=3.8.0" REQUIRED = [ From 8b5b2dd20730326d4dad53a136ed3f29d913b445 Mon Sep 17 00:00:00 2001 From: William Parsley Date: Fri, 23 Jun 2023 15:38:39 -0500 Subject: [PATCH 02/32] remove publish and release actions --- .github/workflows/publish.yml | 182 ---------------------------------- .github/workflows/release.yml | 157 ----------------------------- 2 files changed, 339 deletions(-) delete mode 100644 .github/workflows/publish.yml delete mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml deleted file mode 100644 index 11f08bf2e5..0000000000 --- a/.github/workflows/publish.yml +++ /dev/null @@ -1,182 +0,0 @@ -name: publish - -on: - push: - tags: - - 'v*.*.*' - -jobs: - get-version: - if: github.repository == 'feast-dev/feast' - runs-on: ubuntu-latest - outputs: - release_version: ${{ steps.get_release_version.outputs.release_version }} - version_without_prefix: ${{ steps.get_release_version_without_prefix.outputs.version_without_prefix }} - highest_semver_tag: ${{ steps.get_highest_semver.outputs.highest_semver_tag }} - steps: - - uses: actions/checkout@v2 - - name: Get release version - id: get_release_version - run: echo ::set-output name=release_version::${GITHUB_REF#refs/*/} - - name: Get release version without prefix - id: get_release_version_without_prefix - env: - RELEASE_VERSION: ${{ steps.get_release_version.outputs.release_version }} - run: | - echo ::set-output name=version_without_prefix::${RELEASE_VERSION:1} - - name: Get highest semver - id: get_highest_semver - env: - RELEASE_VERSION: ${{ steps.get_release_version.outputs.release_version }} - run: | - source infra/scripts/setup-common-functions.sh - SEMVER_REGEX='^v[0-9]+\.[0-9]+\.[0-9]+(-([0-9A-Za-z-]+(\.[0-9A-Za-z-]+)*))?$' - if echo "${RELEASE_VERSION}" | grep -P "$SEMVER_REGEX" &>/dev/null ; then - echo ::set-output name=highest_semver_tag::$(get_tag_release -m) - fi - - name: Check output - env: - RELEASE_VERSION: ${{ steps.get_release_version.outputs.release_version }} - VERSION_WITHOUT_PREFIX: ${{ steps.get_release_version_without_prefix.outputs.version_without_prefix }} - HIGHEST_SEMVER_TAG: ${{ steps.get_highest_semver.outputs.highest_semver_tag }} - run: | - echo $RELEASE_VERSION - echo $VERSION_WITHOUT_PREFIX - echo $HIGHEST_SEMVER_TAG - - build-publish-docker-images: - runs-on: ubuntu-latest - needs: [get-version, publish-python-sdk] - strategy: - matrix: - component: [feature-server, feature-server-python-aws, feature-server-java, feature-transformation-server] - env: - MAVEN_CACHE: gs://feast-templocation-kf-feast/.m2.2020-08-19.tar - REGISTRY: feastdev - steps: - - uses: actions/checkout@v2 - - name: Set up QEMU - uses: docker/setup-qemu-action@v1 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 - - name: Login to DockerHub - uses: docker/login-action@v1 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Authenticate to Google Cloud - uses: 'google-github-actions/auth@v1' - with: - credentials_json: '${{ secrets.GCP_SA_KEY }}' - - name: Set up gcloud SDK - uses: google-github-actions/setup-gcloud@v1 - with: - project_id: ${{ secrets.GCP_PROJECT_ID }} - - name: Use gcloud CLI - run: gcloud info - - run: gcloud auth configure-docker --quiet - - name: Build image - run: | - make build-${{ matrix.component }}-docker REGISTRY=${REGISTRY} VERSION=${VERSION_WITHOUT_PREFIX} - env: - RELEASE_VERSION: ${{ needs.get-version.outputs.release_version }} - VERSION_WITHOUT_PREFIX: ${{ needs.get-version.outputs.version_without_prefix }} - HIGHEST_SEMVER_TAG: ${{ needs.get-version.outputs.highest_semver_tag }} - - name: Push versioned images - env: - RELEASE_VERSION: ${{ needs.get-version.outputs.release_version }} - VERSION_WITHOUT_PREFIX: ${{ needs.get-version.outputs.version_without_prefix }} - HIGHEST_SEMVER_TAG: ${{ needs.get-version.outputs.highest_semver_tag }} - run: | - make push-${{ matrix.component }}-docker REGISTRY=${REGISTRY} VERSION=${VERSION_WITHOUT_PREFIX} - - echo "Only push to latest tag if tag is the highest semver version $HIGHEST_SEMVER_TAG" - if [ "${VERSION_WITHOUT_PREFIX}" = "${HIGHEST_SEMVER_TAG:1}" ] - then - docker tag feastdev/${{ matrix.component }}:${VERSION_WITHOUT_PREFIX} feastdev/${{ matrix.component }}:latest - docker push feastdev/${{ matrix.component }}:latest - fi - - publish-helm-charts: - if: github.repository == 'feast-dev/feast' - runs-on: ubuntu-latest - needs: get-version - env: - HELM_VERSION: v3.8.0 - VERSION_WITHOUT_PREFIX: ${{ needs.get-version.outputs.version_without_prefix }} - steps: - - uses: actions/checkout@v2 - - name: Authenticate to Google Cloud - uses: 'google-github-actions/auth@v1' - with: - credentials_json: '${{ secrets.GCP_SA_KEY }}' - - name: Set up gcloud SDK - uses: google-github-actions/setup-gcloud@v1 - with: - project_id: ${{ secrets.GCP_PROJECT_ID }} - - run: gcloud auth configure-docker --quiet - - name: Remove previous Helm - run: sudo rm -rf $(which helm) - - name: Install Helm - run: ./infra/scripts/helm/install-helm.sh - - name: Validate Helm chart prior to publishing - run: ./infra/scripts/helm/validate-helm-chart-publish.sh - - name: Validate all version consistency - run: ./infra/scripts/helm/validate-helm-chart-versions.sh $VERSION_WITHOUT_PREFIX - - name: Publish Helm charts - run: ./infra/scripts/helm/push-helm-charts.sh $VERSION_WITHOUT_PREFIX - - build_wheels: - uses: ./.github/workflows/build_wheels.yml - - publish-python-sdk: - if: github.repository == 'feast-dev/feast' - runs-on: ubuntu-latest - needs: [build_wheels] - steps: - - uses: actions/download-artifact@v2 - with: - name: wheels - path: dist - - uses: pypa/gh-action-pypi-publish@v1.4.2 - with: - user: __token__ - password: ${{ secrets.PYPI_PASSWORD }} - - publish-java-sdk: - if: github.repository == 'feast-dev/feast' - container: maven:3.6-jdk-11 - runs-on: ubuntu-latest - needs: get-version - steps: - - uses: actions/checkout@v2 - with: - submodules: 'true' - - name: Set up JDK 11 - uses: actions/setup-java@v1 - with: - java-version: '11' - java-package: jdk - architecture: x64 - - uses: actions/setup-python@v2 - with: - python-version: '3.7' - architecture: 'x64' - - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-it-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ runner.os }}-it-maven- - - name: Publish java sdk - env: - VERSION_WITHOUT_PREFIX: ${{ needs.get-version.outputs.version_without_prefix }} - GPG_PUBLIC_KEY: ${{ secrets.GPG_PUBLIC_KEY }} - GPG_PRIVATE_KEY: ${{ secrets.GPG_PRIVATE_KEY }} - MAVEN_SETTINGS: ${{ secrets.MAVEN_SETTINGS }} - run: | - echo -n "$GPG_PUBLIC_KEY" > /root/public-key - echo -n "$GPG_PRIVATE_KEY" > /root/private-key - mkdir -p /root/.m2/ - echo -n "$MAVEN_SETTINGS" > /root/.m2/settings.xml - infra/scripts/publish-java-sdk.sh --revision ${VERSION_WITHOUT_PREFIX} --gpg-key-import-dir /root diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml deleted file mode 100644 index da16c5f8f1..0000000000 --- a/.github/workflows/release.yml +++ /dev/null @@ -1,157 +0,0 @@ -name: release - -on: - workflow_dispatch: - inputs: - dry_run: - description: 'Dry Run' - required: true - default: true - type: boolean - token: - description: 'Personal Access Token' - required: true - default: "" - type: string - publish_ui: - description: 'Publish to NPM?' - required: true - default: true - type: boolean - -jobs: - - get_dry_release_versions: - runs-on: ubuntu-latest - env: - GITHUB_TOKEN: ${{ github.event.inputs.token }} - outputs: - current_version: ${{ steps.get_versions.outputs.current_version }} - next_version: ${{ steps.get_versions.outputs.next_version }} - steps: - - name: Checkout - uses: actions/checkout@v2 - with: - persist-credentials: false - - name: Setup Node.js - uses: actions/setup-node@v2 - with: - node-version: '18.x' - registry-url: 'https://registry.npmjs.org' - - name: Release (Dry Run) - id: get_versions - run: | - CURRENT_VERSION=$(npx -p @semantic-release/changelog -p @semantic-release/git -p @semantic-release/exec -p semantic-release semantic-release --dry-run | grep "associated with version " | sed -E 's/.* version//' | sed -E 's/ on.*//') - NEXT_VERSION=$(npx -p @semantic-release/changelog -p @semantic-release/git -p @semantic-release/exec -p semantic-release semantic-release --dry-run | grep 'The next release version is' | sed -E 's/.* ([[:digit:].]+)$/\1/') - echo ::set-output name=current_version::$CURRENT_VERSION - echo ::set-output name=next_version::$NEXT_VERSION - echo "Current version is ${CURRENT_VERSION}" - echo "Next version is ${NEXT_VERSION}" - - validate_version_bumps: - if: github.repository == 'feast-dev/feast' - needs: get_dry_release_versions - runs-on: ubuntu-latest - env: - # This publish is working using an NPM automation token to bypass 2FA - NPM_TOKEN: ${{ secrets.NPM_TOKEN }} - HELM_VERSION: v3.8.0 - CURRENT_VERSION: ${{ needs.get_dry_release_versions.outputs.current_version }} - NEXT_VERSION: ${{ needs.get_dry_release_versions.outputs.next_version }} - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-node@v2 - with: - node-version: '18.x' - registry-url: 'https://registry.npmjs.org' - - name: Bump file versions - run: python ./infra/scripts/release/bump_file_versions.py ${CURRENT_VERSION} ${NEXT_VERSION} - - name: Install yarn dependencies - working-directory: ./ui - run: yarn install - - name: Build yarn rollup - working-directory: ./ui - run: yarn build:lib - - name: Bundle UI in SDK - run: make build-ui - - name: Remove previous Helm - run: sudo rm -rf $(which helm) - - name: Set up Homebrew - uses: Homebrew/actions/setup-homebrew@master - - name: Setup Helm-docs - run: | - brew install norwoodj/tap/helm-docs - - name: Generate helm chart READMEs - run: make build-helm-docs - - name: Install Helm - run: ./infra/scripts/helm/install-helm.sh - - name: Validate Helm chart prior to publishing - run: ./infra/scripts/helm/validate-helm-chart-publish.sh - - name: Validate all version consistency - run: ./infra/scripts/helm/validate-helm-chart-versions.sh $NEXT_VERSION - - - publish-web-ui-npm: - needs: [validate_version_bumps, get_dry_release_versions] - runs-on: ubuntu-latest - env: - # This publish is working using an NPM automation token to bypass 2FA - NPM_TOKEN: ${{ secrets.NPM_TOKEN }} - CURRENT_VERSION: ${{ needs.get_dry_release_versions.outputs.current_version }} - NEXT_VERSION: ${{ needs.get_dry_release_versions.outputs.next_version }} - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-node@v2 - with: - node-version: '18.x' - registry-url: 'https://registry.npmjs.org' - - name: Bump file versions (temporarily for Web UI publish) - run: python ./infra/scripts/release/bump_file_versions.py ${CURRENT_VERSION} ${NEXT_VERSION} - - name: Install yarn dependencies - working-directory: ./ui - run: yarn install - - name: Build yarn rollup - working-directory: ./ui - run: yarn build:lib - - name: Publish UI package - working-directory: ./ui - if: github.event.inputs.dry_run == 'false' && github.event.inputs.publish_ui == 'true' - run: npm publish - env: - # This publish is working using an NPM automation token to bypass 2FA - NPM_TOKEN: ${{ secrets.NPM_TOKEN }} - - release: - name: release - runs-on: ubuntu-latest - needs: publish-web-ui-npm - env: - GITHUB_TOKEN: ${{ github.event.inputs.token }} - GIT_AUTHOR_NAME: feast-ci-bot - GIT_AUTHOR_EMAIL: feast-ci-bot@willem.co - GIT_COMMITTER_NAME: feast-ci-bot - GIT_COMMITTER_EMAIL: feast-ci-bot@willem.co - steps: - - name: Checkout - uses: actions/checkout@v2 - with: - persist-credentials: false - - name: Setup Node.js - uses: actions/setup-node@v2 - with: - node-version: '18.x' - registry-url: 'https://registry.npmjs.org' - - name: Set up Homebrew - id: set-up-homebrew - uses: Homebrew/actions/setup-homebrew@master - - name: Setup Helm-docs - run: | - brew install norwoodj/tap/helm-docs - - name: Release (Dry Run) - if: github.event.inputs.dry_run == 'true' - run: | - npx -p @semantic-release/changelog -p @semantic-release/git -p @semantic-release/exec -p semantic-release semantic-release --dry-run - - name: Release - if: github.event.inputs.dry_run == 'false' - run: | - npx -p @semantic-release/changelog -p @semantic-release/git -p @semantic-release/exec -p semantic-release semantic-release From b18eee1c1b2d3efc990ebca6377bc95af01680ac Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Thu, 29 Jun 2023 14:24:09 -0500 Subject: [PATCH 03/32] Subclass Entity to Pydantic BaseModel so it can be sent and received by FastAPI --- sdk/python/feast/entity.py | 95 ++++++++----------- sdk/python/feast/feature_store.py | 2 +- .../requirements/py3.8-ci-requirements.txt | 2 +- sdk/python/tests/unit/test_entity.py | 2 +- 4 files changed, 45 insertions(+), 56 deletions(-) diff --git a/sdk/python/feast/entity.py b/sdk/python/feast/entity.py index 30f04e9c06..7a734bccf1 100644 --- a/sdk/python/feast/entity.py +++ b/sdk/python/feast/entity.py @@ -12,9 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. from datetime import datetime +from json import dumps from typing import Dict, List, Optional from google.protobuf.json_format import MessageToJson +from pydantic import BaseModel, root_validator from typeguard import typechecked from feast.protos.feast.core.Entity_pb2 import Entity as EntityProto @@ -25,7 +27,7 @@ @typechecked -class Entity: +class Entity(BaseModel): """ An entity defines a collection of entities for which features can be defined. An entity can also contain associated metadata. @@ -44,62 +46,49 @@ class Entity: """ name: str - value_type: ValueType + value_type: Optional[ValueType] = None join_key: str - description: str - tags: Dict[str, str] - owner: str + description: str = "" + tags: Optional[Dict[str, str]] = None + owner: str = "" created_timestamp: Optional[datetime] last_updated_timestamp: Optional[datetime] - @log_exceptions - def __init__( - self, - *, - name: str, - join_keys: Optional[List[str]] = None, - value_type: Optional[ValueType] = None, - description: str = "", - tags: Optional[Dict[str, str]] = None, - owner: str = "", - ): - """ - Creates an Entity object. - - Args: - name: The unique name of the entity. - join_keys (optional): A list of properties that uniquely identifies different entities - within the collection. This currently only supports a list of size one, but is - intended to eventually support multiple join keys. - value_type (optional): The type of the entity, such as string or float. If not specified, - it will be inferred from the schema of the underlying data source. - description (optional): A human-readable description. - tags (optional): A dictionary of key-value pairs to store arbitrary metadata. - owner (optional): The owner of the entity, typically the email of the primary maintainer. - - Raises: - ValueError: Parameters are specified incorrectly. - """ - self.name = name - self.value_type = value_type or ValueType.UNKNOWN - - if join_keys and len(join_keys) > 1: - # TODO(felixwang9817): When multiple join keys are supported, add a `join_keys` attribute - # and deprecate the `join_key` attribute. - raise ValueError( - "An entity may only have a single join key. " - "Multiple join keys will be supported in the future." - ) - elif join_keys and len(join_keys) == 1: - self.join_key = join_keys[0] - else: - self.join_key = self.name - - self.description = description - self.tags = tags if tags is not None else {} - self.owner = owner - self.created_timestamp = None - self.last_updated_timestamp = None + class Config: + arbitrary_types_allowed = True + extra = "allow" + json_encoders = { + ValueType: lambda v: int(dumps(v.value, default=str)) + } + + @root_validator(pre=True) + def validate_and_adjust_fields(cls, values): + try: + values["value_type"] = values.get("value_type", ValueType.UNKNOWN) + values["tags"] = values.get("tags", {}) + values["created_timestamp"] = None + values["last_updated_timestamp"] = None + + # Replace join_keys with a single join_key + join_keys = values.get("join_keys", None) + if join_keys and len(join_keys) > 1: + # TODO(felixwang9817): When multiple join keys are supported, add a `join_keys` attribute + # and deprecate the `join_key` attribute. + raise ValueError( + "An entity may only have a single join key. " + "Multiple join keys will be supported in the future." + ) + elif join_keys and len(join_keys) == 1: + values["join_key"] = join_keys[0] + else: + values["join_key"] = values["name"] + + if "join_keys" in values: + del values["join_keys"] + + return values + except KeyError as exception: + raise TypeError("Entity missing required values.") from exception def __hash__(self) -> int: return hash((self.name, self.join_key)) diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 70f7d3dcb7..d2f2e10c78 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -819,7 +819,7 @@ def apply( >>> fs.apply([driver_hourly_stats_view, driver]) # register entity and feature view """ # TODO: Add locking - if not isinstance(objects, Iterable): + if not isinstance(objects, list): objects = [objects] assert isinstance(objects, list) diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index 85e089b03a..87cf2ce1dd 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -78,7 +78,7 @@ babel==2.12.1 # via sphinx backcall==0.2.0 # via ipython -backports-zoneinfo==0.2.1 +backports-zoneinfo==0.2.1;python_version<"3.9" # via # trino # tzlocal diff --git a/sdk/python/tests/unit/test_entity.py b/sdk/python/tests/unit/test_entity.py index 78f7123104..b481a735f7 100644 --- a/sdk/python/tests/unit/test_entity.py +++ b/sdk/python/tests/unit/test_entity.py @@ -44,7 +44,7 @@ def test_entity_without_description(): def test_entity_without_name(): - with pytest.raises(TypeError): + with pytest.raises(ValueError): _ = Entity() From 53a6fe7854ff1dafb1b4dc6530533b6401430385 Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Thu, 29 Jun 2023 22:48:00 -0500 Subject: [PATCH 04/32] Add Pydantic Model conversion to Entity instead of subclassing it --- sdk/python/feast/base_feature_view.py | 2 + sdk/python/feast/entity.py | 145 +++++++++++++++++++------- sdk/python/tests/unit/test_entity.py | 2 +- setup.py | 2 +- 4 files changed, 110 insertions(+), 41 deletions(-) diff --git a/sdk/python/feast/base_feature_view.py b/sdk/python/feast/base_feature_view.py index 975537a394..4425fb3e4f 100644 --- a/sdk/python/feast/base_feature_view.py +++ b/sdk/python/feast/base_feature_view.py @@ -13,10 +13,12 @@ # limitations under the License. from abc import ABC, abstractmethod from datetime import datetime +from json import dumps from typing import Dict, List, Optional, Type from google.protobuf.json_format import MessageToJson from proto import Message +from pydantic import BaseModel, root_validator from feast.feature_view_projection import FeatureViewProjection from feast.field import Field diff --git a/sdk/python/feast/entity.py b/sdk/python/feast/entity.py index 7a734bccf1..ad5ee051ef 100644 --- a/sdk/python/feast/entity.py +++ b/sdk/python/feast/entity.py @@ -26,8 +26,30 @@ from feast.value_type import ValueType +class EntityModel(BaseModel): + """ + Pydantic Model of a Feast entity. + """ + + name: str + join_key: str + value_type: Optional[ValueType] = None + description: str = "" + tags: Optional[Dict[str, str]] = None + owner: str = "" + created_timestamp: Optional[datetime] + last_updated_timestamp: Optional[datetime] + + class Config: + arbitrary_types_allowed = True + extra = "allow" + json_encoders = { + ValueType: lambda v: int(dumps(v.value, default=str)) + } + + @typechecked -class Entity(BaseModel): +class Entity: """ An entity defines a collection of entities for which features can be defined. An entity can also contain associated metadata. @@ -46,49 +68,62 @@ class Entity(BaseModel): """ name: str - value_type: Optional[ValueType] = None + value_type: ValueType join_key: str - description: str = "" - tags: Optional[Dict[str, str]] = None - owner: str = "" + description: str + tags: Dict[str, str] + owner: str created_timestamp: Optional[datetime] last_updated_timestamp: Optional[datetime] - class Config: - arbitrary_types_allowed = True - extra = "allow" - json_encoders = { - ValueType: lambda v: int(dumps(v.value, default=str)) - } + @log_exceptions + def __init__( + self, + *, + name: str, + join_keys: Optional[List[str]] = None, + value_type: Optional[ValueType] = None, + description: str = "", + tags: Optional[Dict[str, str]] = None, + owner: str = "", + ): + """ + Creates an Entity object. - @root_validator(pre=True) - def validate_and_adjust_fields(cls, values): - try: - values["value_type"] = values.get("value_type", ValueType.UNKNOWN) - values["tags"] = values.get("tags", {}) - values["created_timestamp"] = None - values["last_updated_timestamp"] = None - - # Replace join_keys with a single join_key - join_keys = values.get("join_keys", None) - if join_keys and len(join_keys) > 1: - # TODO(felixwang9817): When multiple join keys are supported, add a `join_keys` attribute - # and deprecate the `join_key` attribute. - raise ValueError( - "An entity may only have a single join key. " - "Multiple join keys will be supported in the future." - ) - elif join_keys and len(join_keys) == 1: - values["join_key"] = join_keys[0] - else: - values["join_key"] = values["name"] - - if "join_keys" in values: - del values["join_keys"] - - return values - except KeyError as exception: - raise TypeError("Entity missing required values.") from exception + Args: + name: The unique name of the entity. + join_keys (optional): A list of properties that uniquely identifies different entities + within the collection. This currently only supports a list of size one, but is + intended to eventually support multiple join keys. + value_type (optional): The type of the entity, such as string or float. If not specified, + it will be inferred from the schema of the underlying data source. + description (optional): A human-readable description. + tags (optional): A dictionary of key-value pairs to store arbitrary metadata. + owner (optional): The owner of the entity, typically the email of the primary maintainer. + + Raises: + ValueError: Parameters are specified incorrectly. + """ + self.name = name + self.value_type = value_type or ValueType.UNKNOWN + + if join_keys and len(join_keys) > 1: + # TODO(felixwang9817): When multiple join keys are supported, add a `join_keys` attribute + # and deprecate the `join_key` attribute. + raise ValueError( + "An entity may only have a single join key. " + "Multiple join keys will be supported in the future." + ) + elif join_keys and len(join_keys) == 1: + self.join_key = join_keys[0] + else: + self.join_key = self.name + + self.description = description + self.tags = tags if tags is not None else {} + self.owner = owner + self.created_timestamp = None + self.last_updated_timestamp = None def __hash__(self) -> int: return hash((self.name, self.join_key)) @@ -181,3 +216,35 @@ def to_proto(self) -> EntityProto: ) return EntityProto(spec=spec, meta=meta) + + def to_pydantic_model(self) -> EntityModel: + """ + Converts an entity object to its pydantic model representation. + + Returns: + An EntityModel. + """ + return EntityModel( + name=self.name, + join_key=self.join_key, + value_type=self.value_type, + description=self.description, + tags=self.tags if self.tags else None, + owner=self.owner) + + +def entity_from_pydantic_model(pydantic_entity): + """ + Given a Pydantic EntityModel, create and return an Entity. + + Returns: + An Entity. + """ + return Entity( + name=pydantic_entity.name, + join_key=pydantic_entity.join_key, + value_type=pydantic_entity.value_type, + description=pydantic_entity.description, + tags=pydantic_entity.tags if pydantic_entity.tags else None, + owner=pydantic_entity.owner) + diff --git a/sdk/python/tests/unit/test_entity.py b/sdk/python/tests/unit/test_entity.py index b481a735f7..78f7123104 100644 --- a/sdk/python/tests/unit/test_entity.py +++ b/sdk/python/tests/unit/test_entity.py @@ -44,7 +44,7 @@ def test_entity_without_description(): def test_entity_without_name(): - with pytest.raises(ValueError): + with pytest.raises(TypeError): _ = Entity() diff --git a/setup.py b/setup.py index 5ab3d3903c..0887705734 100644 --- a/setup.py +++ b/setup.py @@ -39,7 +39,7 @@ DESCRIPTION = "EG-specific Python SDK for Feast" URL = "https://github.com/ExpediaGroup/feast" AUTHOR = "Feast and EG" -REQUIRES_PYTHON = ">=3.8.0" +REQUIRES_PYTHON = "<=3.8.0" REQUIRED = [ "click>=7.0.0,<9.0.0", From 9a8259d277284a2cfc755c46f7093df2ff1bfdbc Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Thu, 29 Jun 2023 23:30:22 -0500 Subject: [PATCH 05/32] Fix accidental python requirement change --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0887705734..5ab3d3903c 100644 --- a/setup.py +++ b/setup.py @@ -39,7 +39,7 @@ DESCRIPTION = "EG-specific Python SDK for Feast" URL = "https://github.com/ExpediaGroup/feast" AUTHOR = "Feast and EG" -REQUIRES_PYTHON = "<=3.8.0" +REQUIRES_PYTHON = ">=3.8.0" REQUIRED = [ "click>=7.0.0,<9.0.0", From de11bcf32dbc1a5076871684b71b42cd92b86ae9 Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Fri, 30 Jun 2023 11:32:29 -0500 Subject: [PATCH 06/32] Add Pydantic Model conversion to two DataSources, RequestSource and SparkSource --- sdk/python/feast/data_source.py | 91 +++++++++++++++++++ sdk/python/feast/entity.py | 28 +++--- sdk/python/feast/field.py | 43 ++++----- .../spark_offline_store/spark_source.py | 69 +++++++++++++- sdk/python/tests/unit/test_feature_views.py | 4 +- 5 files changed, 194 insertions(+), 41 deletions(-) diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py index b7ce19aad9..2ce2dcf783 100644 --- a/sdk/python/feast/data_source.py +++ b/sdk/python/feast/data_source.py @@ -16,10 +16,13 @@ import warnings from abc import ABC, abstractmethod from datetime import timedelta +from json import dumps from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple from google.protobuf.duration_pb2 import Duration from google.protobuf.json_format import MessageToJson +from pydantic import BaseModel, root_validator +from pydantic import Field as PydanticField from typeguard import typechecked from feast import type_map @@ -164,6 +167,25 @@ def to_proto(self) -> DataSourceProto.KinesisOptions: } +class DataSourceModel(BaseModel): + """ + Pydantic Model of a Feast DataSource. + """ + + name: str + timestamp_field: Optional[str] = "" + created_timestamp_column: Optional[str] = "" + field_mapping: Optional[Dict[str, str]] = None + description: Optional[str] = "" + tags: Optional[Dict[str, str]] = None + owner: Optional[str] = "" + date_partition_column: Optional[str] = "" + + class Config: + arbitrary_types_allowed = True + extra = "allow" + + @typechecked class DataSource(ABC): """ @@ -341,6 +363,25 @@ def get_table_query_string(self) -> str: """ raise NotImplementedError + def to_pydantic_model(self) -> DataSourceModel: + """ + Converts a DataSource object to its pydantic model representation. + + Returns: + A DataSourceModel. + """ + raise NotImplementedError + + @staticmethod + def datasource_from_pydantic_model(pydantic_datasource): + """ + Given a Pydantic DataSourceModel, create and return a DataSource. + + Returns: + A DataSource. + """ + raise NotImplementedError + @typechecked class KafkaSource(DataSource): @@ -500,6 +541,27 @@ def get_table_query_string(self) -> str: raise NotImplementedError + +class RequestSourceModel(DataSourceModel): + """ + Pydantic Model of a Feast RequestSource. + """ + + name: str + schema_: List[Field] = PydanticField(None, alias='schema') + description: Optional[Dict[str, str]] = None + tags: Dict[str, str] + owner: Optional[str] = "" + + class Config: + arbitrary_types_allowed = True + extra = "allow" + json_encoders = { + Field: lambda v: int(dumps(v.value, default=str)) + } + + + @typechecked class RequestSource(DataSource): """ @@ -606,6 +668,35 @@ def get_table_query_string(self) -> str: def source_datatype_to_feast_value_type() -> Callable[[str], ValueType]: raise NotImplementedError + def to_pydantic_model(self) -> RequestSourceModel: + """ + Converts a RequestSource object to its pydantic model representation. + + Returns: + A RequestSourceModel. + """ + return RequestSourceModel( + name=self.name, + schema=self.schema, + description=self.description, + tags=self.tags if self.tags else None, + owner=self.owner) + + @staticmethod + def datasource_from_pydantic_model(pydantic_datasource): + """ + Given a Pydantic RequestSourceModel, create and return a RequestSource. + + Returns: + A RequestSource. + """ + return RequestSource( + name=pydantic_datasource.name, + schema=pydantic_datasource.schema, + description=pydantic_datasource.description, + tags=pydantic_datasource.tags if pydantic_datasource.tags else None, + owner=pydantic_datasource.owner) + @typechecked class KinesisSource(DataSource): diff --git a/sdk/python/feast/entity.py b/sdk/python/feast/entity.py index ad5ee051ef..9b084f2e59 100644 --- a/sdk/python/feast/entity.py +++ b/sdk/python/feast/entity.py @@ -232,19 +232,19 @@ def to_pydantic_model(self) -> EntityModel: tags=self.tags if self.tags else None, owner=self.owner) + @staticmethod + def entity_from_pydantic_model(pydantic_entity): + """ + Given a Pydantic EntityModel, create and return an Entity. -def entity_from_pydantic_model(pydantic_entity): - """ - Given a Pydantic EntityModel, create and return an Entity. - - Returns: - An Entity. - """ - return Entity( - name=pydantic_entity.name, - join_key=pydantic_entity.join_key, - value_type=pydantic_entity.value_type, - description=pydantic_entity.description, - tags=pydantic_entity.tags if pydantic_entity.tags else None, - owner=pydantic_entity.owner) + Returns: + An Entity. + """ + return Entity( + name=pydantic_entity.name, + join_keys=[pydantic_entity.join_key], + value_type=pydantic_entity.value_type, + description=pydantic_entity.description, + tags=pydantic_entity.tags if pydantic_entity.tags else None, + owner=pydantic_entity.owner) diff --git a/sdk/python/feast/field.py b/sdk/python/feast/field.py index b07bddfeac..63d869a0ff 100644 --- a/sdk/python/feast/field.py +++ b/sdk/python/feast/field.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from json import dumps from typing import Dict, Optional +from pydantic import BaseModel, validator from typeguard import typechecked from feast.feature import Feature @@ -23,7 +25,7 @@ @typechecked -class Field: +class Field(BaseModel): """ A Field represents a set of values with the same structure. @@ -36,30 +38,21 @@ class Field: name: str dtype: FeastType - description: str - tags: Dict[str, str] - - def __init__( - self, - *, - name: str, - dtype: FeastType, - description: str = "", - tags: Optional[Dict[str, str]] = None, - ): - """ - Creates a Field object. - - Args: - name: The name of the field. - dtype: The type of the field, such as string or float. - description (optional): A human-readable description. - tags (optional): User-defined metadata in dictionary form. - """ - self.name = name - self.dtype = dtype - self.description = description - self.tags = tags or {} + description: Optional[str] = "" + tags: Optional[Dict[str, str]] = {} + + class Config: + arbitrary_types_allowed = True + extra = "allow" + json_encoders = { + FeastType: lambda v: int(dumps(v.to_value_type().value, default=str)) + } + + @validator('dtype', pre=True, always=True) + def dtype_is_feasttype(cls, v): + if not isinstance(v, FeastType): + raise TypeError("dtype must be of type FeastType") + return v def __eq__(self, other): if type(self) != type(other): diff --git a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py index a27065fb5e..95644bff3f 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py @@ -2,12 +2,14 @@ import traceback import warnings from enum import Enum +from json import dumps from typing import Any, Callable, Dict, Iterable, Optional, Tuple +from pydantic import BaseModel, root_validator from pyspark.sql import SparkSession from feast import flags_helper -from feast.data_source import DataSource +from feast.data_source import DataSource, DataSourceModel from feast.errors import DataSourceNoNameException from feast.infra.offline_stores.offline_utils import get_temp_entity_table_name from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto @@ -30,6 +32,28 @@ class SparkSourceFormat(Enum): avro = "avro" +class SparkSourceModel(DataSourceModel): + """ + Pydantic Model of a Feast SparkSource. + """ + name: str + table: Optional[str] = None + query: Optional[str] = None + path: Optional[str] = None + file_format: Optional[str] = None + event_timestamp_column: Optional[str] = None + created_timestamp_column: Optional[str] = None + field_mapping: Optional[Dict[str, str]] = None + description: Optional[str] = "" + tags: Optional[Dict[str, str]] = None + owner: Optional[str] = "" + timestamp_field: Optional[str] = None + + class Config: + arbitrary_types_allowed = True + extra = "allow" + + class SparkSource(DataSource): def __init__( self, @@ -185,6 +209,49 @@ def get_table_query_string(self) -> str: return f"`{tmp_table_name}`" + def to_pydantic_model(self) -> SparkSourceModel: + """ + Converts a SparkSource object to its pydantic model representation. + + Returns: + A SparkSourceModel. + """ + return SparkSourceModel( + name=self.name, + table=self.table if self.table else "", + query=self.query if self.query else "", + path=self.path if self.path else "", + file_format=self.file_format if self.file_format else "", + event_timestamp_column=self.event_timestamp_column if self.event_timestamp_column else "", + created_timestamp_column=self.created_timestamp_column if self.created_timestamp_column else "", + field_mapping=self.field_mapping if self.field else {}, + description=self.description if self.description else "", + tags=self.tags if self.tags else {}, + owner=self.owner if self.owner else "", + timestamp_field=self.timestamp_field if self.timestamp_field else "") + + @staticmethod + def datasource_from_pydantic_model(pydantic_datasource): + """ + Given a Pydantic SparkSourceModel, create and return a SparkSource. + + Returns: + A SparkSource. + """ + return SparkSource( + name=pydantic_datasource.name, + table=pydantic_datasource.table, + query=pydantic_datasource.query, + path=pydantic_datasource.path, + file_format=pydantic_datasource.file_format, + event_timestamp_column=pydantic_datasource.event_timestamp_column, + created_timestamp_column=pydantic_datasource.created_timestamp_column, + field_mapping=pydantic_datasource.field_mapping if pydantic_datasource.field_mapping else None, + description=pydantic_datasource.description, + tags=pydantic_datasource.tags if pydantic_datasource.tags else None, + owner=pydantic_datasource.owner, + timestamp_field=pydantic_datasource.timestamp_field) + class SparkOptions: allowed_formats = [format.value for format in SparkSourceFormat] diff --git a/sdk/python/tests/unit/test_feature_views.py b/sdk/python/tests/unit/test_feature_views.py index 379396e5c6..64e5059026 100644 --- a/sdk/python/tests/unit/test_feature_views.py +++ b/sdk/python/tests/unit/test_feature_views.py @@ -2,6 +2,8 @@ import pytest +from pydantic import ValidationError + from feast.aggregation import Aggregation from feast.batch_feature_view import BatchFeatureView from feast.data_format import AvroFormat @@ -275,5 +277,5 @@ def test_hash(): def test_field_types(): - with pytest.raises(TypeError): + with pytest.raises(ValidationError): Field(name="name", dtype=ValueType.INT32) From 051631f6eb6cd68cb8063e0c9b0751c78cb9f3c6 Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Fri, 30 Jun 2023 14:20:03 -0500 Subject: [PATCH 07/32] Add Pydantic Model conversion to FeatureView --- sdk/python/feast/data_source.py | 4 +- sdk/python/feast/entity.py | 2 +- sdk/python/feast/feature_view.py | 69 ++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 3 deletions(-) diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py index 2ce2dcf783..5be1bd853f 100644 --- a/sdk/python/feast/data_source.py +++ b/sdk/python/feast/data_source.py @@ -549,8 +549,8 @@ class RequestSourceModel(DataSourceModel): name: str schema_: List[Field] = PydanticField(None, alias='schema') - description: Optional[Dict[str, str]] = None - tags: Dict[str, str] + description: Optional[str] = "" + tags: Optional[Dict[str, str]] = None owner: Optional[str] = "" class Config: diff --git a/sdk/python/feast/entity.py b/sdk/python/feast/entity.py index 9b084f2e59..afec3f2c52 100644 --- a/sdk/python/feast/entity.py +++ b/sdk/python/feast/entity.py @@ -28,7 +28,7 @@ class EntityModel(BaseModel): """ - Pydantic Model of a Feast entity. + Pydantic Model of a Feast Entity. """ name: str diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index e26759ba92..0b62f08499 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -14,9 +14,11 @@ import copy import warnings from datetime import datetime, timedelta +from json import dumps from typing import Dict, List, Optional, Tuple, Type from google.protobuf.duration_pb2 import Duration +from pydantic import BaseModel, root_validator from typeguard import typechecked from feast import utils @@ -51,6 +53,32 @@ ) +class FeatureViewModel(BaseModel): + """ + Pydantic Model of a Feast FeatureView. + """ + + name: str + entities: List[str] + ttl: Optional[timedelta] + batch_source: DataSource + stream_source: Optional[DataSource] + entity_columns: List[Field] + features: List[Field] + online: bool = True + description: str = "" + tags: Optional[Dict[str, str]] = None + owner: str = "" + materialization_intervals: List[Tuple[datetime, datetime]] + + class Config: + arbitrary_types_allowed = True + extra = "allow" + json_encoders = { + Field: lambda v: int(dumps(v.value, default=str)) + } + + @typechecked class FeatureView(BaseFeatureView): """ @@ -452,3 +480,44 @@ def most_recent_end_time(self) -> Optional[datetime]: if len(self.materialization_intervals) == 0: return None return max([interval[1] for interval in self.materialization_intervals]) + + def to_pydantic_model(self) -> FeatureViewModel: + """ + Converts a FeatureView object to its pydantic model representation. + + Returns: + A FeatureViewModel. + """ + return FeatureViewModel( + name=self.name, + entities=self.entities if self.entities else None, + ttl=self.ttl, + batch_source=self.batch_source, + stream_source=self.stream_source, + entity_columns=self.entity_columns if self.entity_columns else None, + features=self.features if self.features else None, + online=self.online, + description=self.description, + tags=self.tags if self.tags else None, + owner=self.owner, + materialization_intervals=self.materialization_intervals if self.materialization_intervals else None) + + @staticmethod + def featureview_from_pydantic_model(pydantic_featureview): + """ + Given a Pydantic FeatureViewModel, create and return a FeatureView. + + Returns: + A FeatureView. + """ + return FeatureView( + name=pydantic_featureview.name, + source=pydantic_featureview.source, + schema=pydantic_featureview.schema, + entities=pydantic_featureview.entities, + ttl=pydantic_featureview.ttl, + online=pydantic_featureview.online, + description=pydantic_featureview.description, + tags=pydantic_featureview.tags if pydantic_featureview.tags else None, + owner=pydantic_featureview.owner) + From caf9c59138f4b82b4909367eece6c4b083a97b2a Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Fri, 30 Jun 2023 14:45:23 -0500 Subject: [PATCH 08/32] Fix some optional strings for Pydantic models --- .../spark_offline_store/spark_source.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py index 95644bff3f..5bf6910b09 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py @@ -240,17 +240,17 @@ def datasource_from_pydantic_model(pydantic_datasource): """ return SparkSource( name=pydantic_datasource.name, - table=pydantic_datasource.table, - query=pydantic_datasource.query, - path=pydantic_datasource.path, - file_format=pydantic_datasource.file_format, - event_timestamp_column=pydantic_datasource.event_timestamp_column, - created_timestamp_column=pydantic_datasource.created_timestamp_column, + table=pydantic_datasource.table or "", + query=pydantic_datasource.query or "", + path=pydantic_datasource.path or "", + file_format=pydantic_datasource.file_format or "", + event_timestamp_column=pydantic_datasource.event_timestamp_column or "", + created_timestamp_column=pydantic_datasource.created_timestamp_column or "", field_mapping=pydantic_datasource.field_mapping if pydantic_datasource.field_mapping else None, - description=pydantic_datasource.description, + description=pydantic_datasource.description or "", tags=pydantic_datasource.tags if pydantic_datasource.tags else None, - owner=pydantic_datasource.owner, - timestamp_field=pydantic_datasource.timestamp_field) + owner=pydantic_datasource.owner or "", + timestamp_field=pydantic_datasource.timestamp_field or "") class SparkOptions: From 623fab7ab54abeb4591547923740627e5d824760 Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Fri, 30 Jun 2023 16:16:35 -0500 Subject: [PATCH 09/32] Fix small errors in DataSource and FeatureView pydantic models, do partial work for FeatureService pydantic model, and add unit tests for pydantic models. --- sdk/python/feast/data_source.py | 5 +- sdk/python/feast/entity.py | 2 +- sdk/python/feast/feature_service.py | 27 +++++++ sdk/python/feast/feature_view.py | 2 +- sdk/python/feast/field.py | 6 +- .../spark_offline_store/spark_source.py | 7 +- sdk/python/tests/unit/test_pydantic_models.py | 72 +++++++++++++++++++ 7 files changed, 109 insertions(+), 12 deletions(-) create mode 100644 sdk/python/tests/unit/test_pydantic_models.py diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py index 5be1bd853f..692da484b3 100644 --- a/sdk/python/feast/data_source.py +++ b/sdk/python/feast/data_source.py @@ -21,7 +21,7 @@ from google.protobuf.duration_pb2 import Duration from google.protobuf.json_format import MessageToJson -from pydantic import BaseModel, root_validator +from pydantic import BaseModel from pydantic import Field as PydanticField from typeguard import typechecked @@ -557,7 +557,6 @@ class Config: arbitrary_types_allowed = True extra = "allow" json_encoders = { - Field: lambda v: int(dumps(v.value, default=str)) } @@ -692,7 +691,7 @@ def datasource_from_pydantic_model(pydantic_datasource): """ return RequestSource( name=pydantic_datasource.name, - schema=pydantic_datasource.schema, + schema=pydantic_datasource.schema_, description=pydantic_datasource.description, tags=pydantic_datasource.tags if pydantic_datasource.tags else None, owner=pydantic_datasource.owner) diff --git a/sdk/python/feast/entity.py b/sdk/python/feast/entity.py index afec3f2c52..31d63fdf5a 100644 --- a/sdk/python/feast/entity.py +++ b/sdk/python/feast/entity.py @@ -16,7 +16,7 @@ from typing import Dict, List, Optional from google.protobuf.json_format import MessageToJson -from pydantic import BaseModel, root_validator +from pydantic import BaseModel from typeguard import typechecked from feast.protos.feast.core.Entity_pb2 import Entity as EntityProto diff --git a/sdk/python/feast/feature_service.py b/sdk/python/feast/feature_service.py index c3037a55da..bfaddbb331 100644 --- a/sdk/python/feast/feature_service.py +++ b/sdk/python/feast/feature_service.py @@ -1,7 +1,9 @@ from datetime import datetime +from json import dumps from typing import Dict, List, Optional, Union from google.protobuf.json_format import MessageToJson +from pydantic import BaseModel, root_validator from typeguard import typechecked from feast.base_feature_view import BaseFeatureView @@ -22,6 +24,31 @@ from feast.usage import log_exceptions +class FeatureServiceModel(BaseModel): + """ + Pydantic Model of a Feast FeatureService. + """ + + name: str + _features: List[Union[FeatureView, OnDemandFeatureView]] + feature_view_projections: List[FeatureViewProjection] + description: str = "" + tags: Optional[Dict[str, str]] = None + owner: str = "" + created_timestamp: Optional[datetime] = None + last_updated_timestamp: Optional[datetime] = None + logging_config: Optional[LoggingConfig] = None + + class Config: + arbitrary_types_allowed = True + extra = "allow" + json_encoders = { + # OnDemandFeatureView + # FeatureViewProjection + # LoggingConfig + } + + @typechecked class FeatureService: """ diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index 0b62f08499..d56f1aea22 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -18,7 +18,7 @@ from typing import Dict, List, Optional, Tuple, Type from google.protobuf.duration_pb2 import Duration -from pydantic import BaseModel, root_validator +from pydantic import BaseModel from typeguard import typechecked from feast import utils diff --git a/sdk/python/feast/field.py b/sdk/python/feast/field.py index 63d869a0ff..9a247429f3 100644 --- a/sdk/python/feast/field.py +++ b/sdk/python/feast/field.py @@ -20,7 +20,7 @@ from feast.feature import Feature from feast.protos.feast.core.Feature_pb2 import FeatureSpecV2 as FieldProto -from feast.types import FeastType, from_value_type +from feast.types import FeastType, from_value_type, ComplexFeastType, PrimitiveFeastType from feast.value_type import ValueType @@ -45,7 +45,9 @@ class Config: arbitrary_types_allowed = True extra = "allow" json_encoders = { - FeastType: lambda v: int(dumps(v.to_value_type().value, default=str)) + FeastType: lambda v: int(dumps(v.to_value_type().value, default=str)), + ComplexFeastType: lambda v: int(dumps(v.to_value_type().value, default=str)), + PrimitiveFeastType: lambda v: int(dumps(v.to_value_type().value, default=str)) } @validator('dtype', pre=True, always=True) diff --git a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py index 5bf6910b09..ab780c5e40 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py @@ -5,7 +5,7 @@ from json import dumps from typing import Any, Callable, Dict, Iterable, Optional, Tuple -from pydantic import BaseModel, root_validator +from pydantic import BaseModel from pyspark.sql import SparkSession from feast import flags_helper @@ -41,7 +41,6 @@ class SparkSourceModel(DataSourceModel): query: Optional[str] = None path: Optional[str] = None file_format: Optional[str] = None - event_timestamp_column: Optional[str] = None created_timestamp_column: Optional[str] = None field_mapping: Optional[Dict[str, str]] = None description: Optional[str] = "" @@ -222,9 +221,8 @@ def to_pydantic_model(self) -> SparkSourceModel: query=self.query if self.query else "", path=self.path if self.path else "", file_format=self.file_format if self.file_format else "", - event_timestamp_column=self.event_timestamp_column if self.event_timestamp_column else "", created_timestamp_column=self.created_timestamp_column if self.created_timestamp_column else "", - field_mapping=self.field_mapping if self.field else {}, + field_mapping=self.field_mapping if self.field_mapping else {}, description=self.description if self.description else "", tags=self.tags if self.tags else {}, owner=self.owner if self.owner else "", @@ -244,7 +242,6 @@ def datasource_from_pydantic_model(pydantic_datasource): query=pydantic_datasource.query or "", path=pydantic_datasource.path or "", file_format=pydantic_datasource.file_format or "", - event_timestamp_column=pydantic_datasource.event_timestamp_column or "", created_timestamp_column=pydantic_datasource.created_timestamp_column or "", field_mapping=pydantic_datasource.field_mapping if pydantic_datasource.field_mapping else None, description=pydantic_datasource.description or "", diff --git a/sdk/python/tests/unit/test_pydantic_models.py b/sdk/python/tests/unit/test_pydantic_models.py new file mode 100644 index 0000000000..01e5fbcf8a --- /dev/null +++ b/sdk/python/tests/unit/test_pydantic_models.py @@ -0,0 +1,72 @@ +# Copyright 2020 The Feast Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import assertpy +import pytest + +from feast.entity import Entity, EntityModel +from feast.field import Field +from feast.data_source import DataSource, DataSourceModel, RequestSource, RequestSourceModel +from feast.feature_view import FeatureView, FeatureViewModel +from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import \ + SparkSource, SparkSourceModel +from feast.types import Bool, Float32, Int64 + + +def test_idempotent_entity_conversion(): + entity = Entity( + name="my-entity", + description="My entity", + tags={"key1": "val1", "key2": "val2"}, + ) + entity_model = entity.to_pydantic_model() + entity_2 = Entity.entity_from_pydantic_model(entity_model) + assert entity == entity_2 + + +def test_idempotent_requestsource_conversion(): + schema = [ + Field(name="f1", dtype=Float32), + Field(name="f2", dtype=Bool), + ] + request_source = RequestSource( + name="source", + schema=schema, + description="desc", + tags={}, + owner="feast", + ) + request_source_model = request_source.to_pydantic_model() + request_source_2 = RequestSource.datasource_from_pydantic_model(request_source_model) + assert request_source == request_source_2 + + +def test_idempotent_sparksource_conversion(): + spark_source = SparkSource( + name="source", + table="thingy", + description="desc", + tags={}, + owner="feast", + ) + spark_source_model = spark_source.to_pydantic_model() + spark_source_2 = SparkSource.datasource_from_pydantic_model(spark_source_model) + assert spark_source == spark_source_2 + + +def test_type_safety_when_converting_multiple_datasources(): + pass + + +def test_idempotent_featureview_conversion(): + pass From 83fdbc98f0cb2ac9c338bb0e7747800c1e141739 Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Sun, 2 Jul 2023 11:47:40 -0500 Subject: [PATCH 10/32] Add FeatureView Pydantic Model test, fix FeatureView model conversion, fix FeatureView to not desroy some of its arguments --- sdk/python/feast/feature_view.py | 33 ++++++++++++++----- sdk/python/tests/unit/test_pydantic_models.py | 26 ++++++++++++++- 2 files changed, 49 insertions(+), 10 deletions(-) diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index d56f1aea22..f2beb269ab 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -19,6 +19,7 @@ from google.protobuf.duration_pb2 import Duration from pydantic import BaseModel +from pydantic import Field as PydanticField from typeguard import typechecked from feast import utils @@ -59,7 +60,8 @@ class FeatureViewModel(BaseModel): """ name: str - entities: List[str] + original_entities: List[Entity] + original_schema: Optional[List[Field]] = None ttl: Optional[timedelta] batch_source: DataSource stream_source: Optional[DataSource] @@ -75,7 +77,8 @@ class Config: arbitrary_types_allowed = True extra = "allow" json_encoders = { - Field: lambda v: int(dumps(v.value, default=str)) + Field: lambda v: int(dumps(v.value, default=str)), + DataSource: lambda v: v.to_pydantic_model() } @@ -162,6 +165,13 @@ def __init__( self.name = name self.entities = [e.name for e in entities] if entities else [DUMMY_ENTITY_NAME] self.ttl = ttl + + # FeatureView is destructive of some of its original arguments, + # making it impossible to convert idempotently to another format. + # store these arguments to recover them in conversions. + self.original_schema = schema + self.original_entities = entities + schema = schema or [] # Initialize data sources. @@ -490,17 +500,18 @@ def to_pydantic_model(self) -> FeatureViewModel: """ return FeatureViewModel( name=self.name, - entities=self.entities if self.entities else None, + original_entities=self.original_entities, ttl=self.ttl, + original_schema=self.original_schema, batch_source=self.batch_source, stream_source=self.stream_source, - entity_columns=self.entity_columns if self.entity_columns else None, + entity_columns=self.entity_columns, features=self.features if self.features else None, online=self.online, description=self.description, tags=self.tags if self.tags else None, owner=self.owner, - materialization_intervals=self.materialization_intervals if self.materialization_intervals else None) + materialization_intervals=self.materialization_intervals) @staticmethod def featureview_from_pydantic_model(pydantic_featureview): @@ -510,14 +521,18 @@ def featureview_from_pydantic_model(pydantic_featureview): Returns: A FeatureView. """ - return FeatureView( + feature_view = FeatureView( name=pydantic_featureview.name, - source=pydantic_featureview.source, - schema=pydantic_featureview.schema, - entities=pydantic_featureview.entities, + source=pydantic_featureview.batch_source, + schema=pydantic_featureview.original_schema, + entities=pydantic_featureview.original_entities, ttl=pydantic_featureview.ttl, online=pydantic_featureview.online, description=pydantic_featureview.description, tags=pydantic_featureview.tags if pydantic_featureview.tags else None, owner=pydantic_featureview.owner) + # Correct the FeatureView to store both sources. + feature_view.batch_source=pydantic_featureview.batch_source + feature_view.stream_source=pydantic_featureview.stream_source + return feature_view diff --git a/sdk/python/tests/unit/test_pydantic_models.py b/sdk/python/tests/unit/test_pydantic_models.py index 01e5fbcf8a..68e47bd5fb 100644 --- a/sdk/python/tests/unit/test_pydantic_models.py +++ b/sdk/python/tests/unit/test_pydantic_models.py @@ -69,4 +69,28 @@ def test_type_safety_when_converting_multiple_datasources(): def test_idempotent_featureview_conversion(): - pass + schema = [ + Field(name="f1", dtype=Float32), + Field(name="f2", dtype=Bool), + ] + request_source = RequestSource( + name="source", + schema=schema, + description="desc", + tags={}, + owner="feast", + ) + feature_view = FeatureView( + name="my-feature-view", + entities=[], + schema=[ + Field(name="feature1", dtype=Float32), + Field(name="feature2", dtype=Float32), + ], + source=request_source, + ) + feature_view_model = feature_view.to_pydantic_model() + feature_view_2 = FeatureView.featureview_from_pydantic_model(feature_view_model) + print(feature_view.original_schema) + print(feature_view_2.original_schema) + assert feature_view == feature_view_2 From eae1e94e3fd51749a142b959a847416f7f1f0b07 Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Sun, 2 Jul 2023 12:09:32 -0500 Subject: [PATCH 11/32] Fix Entity json encoding in FeatureView pydantic model --- sdk/python/feast/feature_view.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index f2beb269ab..798c011d53 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -78,7 +78,8 @@ class Config: extra = "allow" json_encoders = { Field: lambda v: int(dumps(v.value, default=str)), - DataSource: lambda v: v.to_pydantic_model() + DataSource: lambda v: v.to_pydantic_model(), + Entity: lambda v: v.to_pydantic_model() } From d62e2d7d79f65ba86f3322e80f21b6d4973be4fc Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Sun, 2 Jul 2023 12:55:40 -0500 Subject: [PATCH 12/32] Small bug fix to make Entity list optional in FeatureViews pydantic model --- sdk/python/feast/feature_view.py | 2 +- sdk/python/tests/unit/test_pydantic_models.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index 798c011d53..22bb4ea5e7 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -60,7 +60,7 @@ class FeatureViewModel(BaseModel): """ name: str - original_entities: List[Entity] + original_entities: List[Entity] = None original_schema: Optional[List[Field]] = None ttl: Optional[timedelta] batch_source: DataSource diff --git a/sdk/python/tests/unit/test_pydantic_models.py b/sdk/python/tests/unit/test_pydantic_models.py index 68e47bd5fb..5f2fb0ac7f 100644 --- a/sdk/python/tests/unit/test_pydantic_models.py +++ b/sdk/python/tests/unit/test_pydantic_models.py @@ -73,6 +73,7 @@ def test_idempotent_featureview_conversion(): Field(name="f1", dtype=Float32), Field(name="f2", dtype=Bool), ] + user_entity = Entity(name="user1", join_keys=["user_id"]) request_source = RequestSource( name="source", schema=schema, @@ -82,7 +83,7 @@ def test_idempotent_featureview_conversion(): ) feature_view = FeatureView( name="my-feature-view", - entities=[], + entities=[user_entity], schema=[ Field(name="feature1", dtype=Float32), Field(name="feature2", dtype=Float32), From 66fb9232416e6f33dc31c9a4ce1c590ee3dab286 Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Sun, 2 Jul 2023 14:20:49 -0500 Subject: [PATCH 13/32] Fix Pydantic model conversions with dependencies. FeatureView depends on DataSource and Entity, so use those converters in the FeatureView converter. Also, fix a small SparkSource error which imports FeatureView into the SparkSource DataSource definition, breaking the hierarchy of FeatureService to FeatureView to DataSource and Entity, causing circular dependencies --- sdk/python/feast/data_source.py | 2 + sdk/python/feast/feature_view.py | 45 ++++++++++++++----- .../spark_offline_store/spark_source.py | 5 ++- sdk/python/tests/unit/test_pydantic_models.py | 23 +++++++++- 4 files changed, 59 insertions(+), 16 deletions(-) diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py index 692da484b3..b4b45902c5 100644 --- a/sdk/python/feast/data_source.py +++ b/sdk/python/feast/data_source.py @@ -173,6 +173,7 @@ class DataSourceModel(BaseModel): """ name: str + model_type: str = None timestamp_field: Optional[str] = "" created_timestamp_column: Optional[str] = "" field_mapping: Optional[Dict[str, str]] = None @@ -548,6 +549,7 @@ class RequestSourceModel(DataSourceModel): """ name: str + model_type: str = "RequestSource" schema_: List[Field] = PydanticField(None, alias='schema') description: Optional[str] = "" tags: Optional[Dict[str, str]] = None diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index 22bb4ea5e7..6b06abaa3e 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -16,6 +16,7 @@ from datetime import datetime, timedelta from json import dumps from typing import Dict, List, Optional, Tuple, Type +import sys from google.protobuf.duration_pb2 import Duration from pydantic import BaseModel @@ -24,10 +25,12 @@ from feast import utils from feast.base_feature_view import BaseFeatureView -from feast.data_source import DataSource, KafkaSource, KinesisSource, PushSource -from feast.entity import Entity +from feast.data_source import DataSource, RequestSource, DataSourceModel, KafkaSource, KinesisSource, PushSource +from feast.entity import Entity, EntityModel from feast.feature_view_projection import FeatureViewProjection from feast.field import Field +from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import \ + SparkSource from feast.protos.feast.core.FeatureView_pb2 import FeatureView as FeatureViewProto from feast.protos.feast.core.FeatureView_pb2 import ( FeatureViewMeta as FeatureViewMetaProto, @@ -60,11 +63,11 @@ class FeatureViewModel(BaseModel): """ name: str - original_entities: List[Entity] = None + original_entities: List[EntityModel] = None original_schema: Optional[List[Field]] = None ttl: Optional[timedelta] - batch_source: DataSource - stream_source: Optional[DataSource] + batch_source: DataSourceModel + stream_source: Optional[DataSourceModel] entity_columns: List[Field] features: List[Field] online: bool = True @@ -289,6 +292,11 @@ def __eq__(self, other): ): return False + if isinstance(self.original_entities, List) and isinstance(other.original_entities, List): + for entity1, entity2 in zip(self.original_entities, other.original_entities): + if entity1 != entity2: + return False + return True @property @@ -501,11 +509,11 @@ def to_pydantic_model(self) -> FeatureViewModel: """ return FeatureViewModel( name=self.name, - original_entities=self.original_entities, + original_entities=[entity.to_pydantic_model() for entity in self.original_entities], ttl=self.ttl, original_schema=self.original_schema, - batch_source=self.batch_source, - stream_source=self.stream_source, + batch_source=self.batch_source.to_pydantic_model() if self.batch_source else self.batch_source, + stream_source=self.stream_source.to_pydantic_model() if self.stream_source else self.stream_source, entity_columns=self.entity_columns, features=self.features if self.features else None, online=self.online, @@ -522,18 +530,31 @@ def featureview_from_pydantic_model(pydantic_featureview): Returns: A FeatureView. """ + # Convert each of the sources if they exist + batch_source = None + if pydantic_featureview.batch_source: + class_ = getattr(sys.modules[__name__], pydantic_featureview.batch_source.model_type) + batch_source = class_.datasource_from_pydantic_model(pydantic_featureview.batch_source) + stream_source = None + if pydantic_featureview.stream_source: + class_ = getattr(sys.modules[__name__], pydantic_featureview.stream_source.model_type) + stream_source = class_.datasource_from_pydantic_model(pydantic_featureview.stream_source) + + # Create the FeatureView feature_view = FeatureView( name=pydantic_featureview.name, - source=pydantic_featureview.batch_source, + source=batch_source, schema=pydantic_featureview.original_schema, - entities=pydantic_featureview.original_entities, + entities=[Entity.entity_from_pydantic_model(entity) for entity in pydantic_featureview.original_entities], ttl=pydantic_featureview.ttl, online=pydantic_featureview.online, description=pydantic_featureview.description, tags=pydantic_featureview.tags if pydantic_featureview.tags else None, owner=pydantic_featureview.owner) + # Correct the FeatureView to store both sources. - feature_view.batch_source=pydantic_featureview.batch_source - feature_view.stream_source=pydantic_featureview.stream_source + feature_view.batch_source=batch_source + feature_view.stream_source=stream_source + return feature_view diff --git a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py index ab780c5e40..e7a62f79cc 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py @@ -1,5 +1,6 @@ import logging import traceback +import uuid import warnings from enum import Enum from json import dumps @@ -11,7 +12,6 @@ from feast import flags_helper from feast.data_source import DataSource, DataSourceModel from feast.errors import DataSourceNoNameException -from feast.infra.offline_stores.offline_utils import get_temp_entity_table_name from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto from feast.protos.feast.core.SavedDataset_pb2 import ( SavedDatasetStorage as SavedDatasetStorageProto, @@ -37,6 +37,7 @@ class SparkSourceModel(DataSourceModel): Pydantic Model of a Feast SparkSource. """ name: str + model_type: str = "SparkSource" table: Optional[str] = None query: Optional[str] = None path: Optional[str] = None @@ -203,7 +204,7 @@ def get_table_query_string(self) -> str: logger.exception( "Spark read of file source failed.\n" + traceback.format_exc() ) - tmp_table_name = get_temp_entity_table_name() + tmp_table_name = "feast_entity_df_" + uuid.uuid4().hex df.createOrReplaceTempView(tmp_table_name) return f"`{tmp_table_name}`" diff --git a/sdk/python/tests/unit/test_pydantic_models.py b/sdk/python/tests/unit/test_pydantic_models.py index 5f2fb0ac7f..99899315da 100644 --- a/sdk/python/tests/unit/test_pydantic_models.py +++ b/sdk/python/tests/unit/test_pydantic_models.py @@ -92,6 +92,25 @@ def test_idempotent_featureview_conversion(): ) feature_view_model = feature_view.to_pydantic_model() feature_view_2 = FeatureView.featureview_from_pydantic_model(feature_view_model) - print(feature_view.original_schema) - print(feature_view_2.original_schema) assert feature_view == feature_view_2 + + + spark_source = SparkSource( + name="sparky_sparky_boom_man", + path=f"/data/driver_hourly_stats", + file_format="parquet", + timestamp_field="event_timestamp", + created_timestamp_column="created", + ) + feature_view_3 = FeatureView( + name="my-feature-view", + entities=[user_entity], + schema=[ + Field(name="feature1", dtype=Float32), + Field(name="feature2", dtype=Float32), + ], + source=spark_source, + ) + feature_view_model_3 = feature_view_3.to_pydantic_model() + feature_view_4 = FeatureView.featureview_from_pydantic_model(feature_view_model_3) + assert feature_view_3 == feature_view_4 From f0c46b667a4c9d03f92471cda4f44ca50d8d0e98 Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Sun, 2 Jul 2023 15:21:35 -0500 Subject: [PATCH 14/32] WIP. Try adding more types to json encoder to get type checking to work. --- sdk/python/feast/feature_view.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index 6b06abaa3e..7d56db644b 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -82,7 +82,10 @@ class Config: json_encoders = { Field: lambda v: int(dumps(v.value, default=str)), DataSource: lambda v: v.to_pydantic_model(), - Entity: lambda v: v.to_pydantic_model() + Entity: lambda v: v.to_pydantic_model(), + FeastType: lambda v: int(dumps(v.to_value_type().value, default=str)), + ComplexFeastType: lambda v: int(dumps(v.to_value_type().value, default=str)), + PrimitiveFeastType: lambda v: int(dumps(v.to_value_type().value, default=str)) } From db65d8025a24d1679d981d80c1b6963de98047a2 Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Sun, 2 Jul 2023 15:41:23 -0500 Subject: [PATCH 15/32] WIP. Fixing json encoding of types --- sdk/python/feast/feature_view.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index 7d56db644b..2ecee27f7a 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -41,7 +41,7 @@ from feast.protos.feast.core.FeatureView_pb2 import ( MaterializationInterval as MaterializationIntervalProto, ) -from feast.types import from_value_type +from feast.types import FeastType, from_value_type, ComplexFeastType, PrimitiveFeastType from feast.usage import log_exceptions from feast.value_type import ValueType From db3197e540df1cf2a82952720f1c4e5925e6e146 Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Sun, 2 Jul 2023 18:23:08 -0500 Subject: [PATCH 16/32] Make FeastTypes Pydantic compatible --- sdk/python/feast/embedded_go/type_map.py | 16 ++++++------ sdk/python/feast/types.py | 26 ++++++++++++++----- .../feature_repos/universal/feature_views.py | 12 ++++----- 3 files changed, 33 insertions(+), 21 deletions(-) diff --git a/sdk/python/feast/embedded_go/type_map.py b/sdk/python/feast/embedded_go/type_map.py index e70dc3be86..9b1ece7732 100644 --- a/sdk/python/feast/embedded_go/type_map.py +++ b/sdk/python/feast/embedded_go/type_map.py @@ -50,14 +50,14 @@ PrimitiveFeastType.BYTES: pa.binary(), PrimitiveFeastType.BOOL: pa.bool_(), PrimitiveFeastType.UNIX_TIMESTAMP: pa.timestamp("s"), - Array(PrimitiveFeastType.INT32): pa.list_(pa.int32()), - Array(PrimitiveFeastType.INT64): pa.list_(pa.int64()), - Array(PrimitiveFeastType.FLOAT32): pa.list_(pa.float32()), - Array(PrimitiveFeastType.FLOAT64): pa.list_(pa.float64()), - Array(PrimitiveFeastType.STRING): pa.list_(pa.string()), - Array(PrimitiveFeastType.BYTES): pa.list_(pa.binary()), - Array(PrimitiveFeastType.BOOL): pa.list_(pa.bool_()), - Array(PrimitiveFeastType.UNIX_TIMESTAMP): pa.list_(pa.timestamp("s")), + Array(base_type=PrimitiveFeastType.INT32): pa.list_(pa.int32()), + Array(base_type=PrimitiveFeastType.INT64): pa.list_(pa.int64()), + Array(base_type=PrimitiveFeastType.FLOAT32): pa.list_(pa.float32()), + Array(base_type=PrimitiveFeastType.FLOAT64): pa.list_(pa.float64()), + Array(base_type=PrimitiveFeastType.STRING): pa.list_(pa.string()), + Array(base_type=PrimitiveFeastType.BYTES): pa.list_(pa.binary()), + Array(base_type=PrimitiveFeastType.BOOL): pa.list_(pa.bool_()), + Array(base_type=PrimitiveFeastType.UNIX_TIMESTAMP): pa.list_(pa.timestamp("s")), } diff --git a/sdk/python/feast/types.py b/sdk/python/feast/types.py index 0ba1725f17..b65b8cb1d7 100644 --- a/sdk/python/feast/types.py +++ b/sdk/python/feast/types.py @@ -15,6 +15,8 @@ from enum import Enum from typing import Dict, Union +from pydantic import BaseModel, validator + from feast.value_type import ValueType PRIMITIVE_FEAST_TYPES_TO_VALUE_TYPES = { @@ -30,14 +32,14 @@ } -class ComplexFeastType(ABC): +class ComplexFeastType(ABC, BaseModel): """ A ComplexFeastType represents a structured type that is recognized by Feast. """ - def __init__(self): - """Creates a ComplexFeastType object.""" - pass + # def __init__(self): + # """Creates a ComplexFeastType object.""" + # pass @abstractmethod def to_value_type(self) -> ValueType: @@ -136,13 +138,24 @@ class Array(ComplexFeastType): base_type: Union[PrimitiveFeastType, ComplexFeastType] - def __init__(self, base_type: Union[PrimitiveFeastType, ComplexFeastType]): + @validator('base_type', pre=True, always=True) + def base_type_is_supported(cls, base_type): if base_type not in SUPPORTED_BASE_TYPES: raise ValueError( f"Type {type(base_type)} is currently not supported as a base type for Array." ) + return base_type + + def __init__(self, base_type: Union[PrimitiveFeastType, ComplexFeastType]): + super(Array, self).__init__(base_type=base_type) + + # def __init__(self, base_type: Union[PrimitiveFeastType, ComplexFeastType]): + # if base_type not in SUPPORTED_BASE_TYPES: + # raise ValueError( + # f"Type {type(base_type)} is currently not supported as a base type for Array." + # ) - self.base_type = base_type + # self.base_type = base_type def to_value_type(self) -> ValueType: assert isinstance(self.base_type, PrimitiveFeastType) @@ -156,7 +169,6 @@ def __str__(self): FeastType = Union[ComplexFeastType, PrimitiveFeastType] - VALUE_TYPES_TO_FEAST_TYPES: Dict["ValueType", FeastType] = { ValueType.UNKNOWN: Invalid, ValueType.BYTES: Bytes, diff --git a/sdk/python/tests/integration/feature_repos/universal/feature_views.py b/sdk/python/tests/integration/feature_repos/universal/feature_views.py index 5938a0c936..d2a4bab608 100644 --- a/sdk/python/tests/integration/feature_repos/universal/feature_views.py +++ b/sdk/python/tests/integration/feature_repos/universal/feature_views.py @@ -123,8 +123,8 @@ def create_similarity_request_source(): return RequestSource( name="similarity_input", schema=[ - Field(name="vector_doube", dtype=Array(Float64)), - Field(name="vector_float", dtype=Array(Float32)), + Field(name="vector_doube", dtype=Array(base_type=Float64)), + Field(name="vector_float", dtype=Array(base_type=Float32)), ], ) @@ -136,8 +136,8 @@ def create_item_embeddings_feature_view(source, infer_features: bool = False): schema=None if infer_features else [ - Field(name="embedding_double", dtype=Array(Float64)), - Field(name="embedding_float", dtype=Array(Float32)), + Field(name="embedding_double", dtype=Array(base_type=Float64)), + Field(name="embedding_float", dtype=Array(base_type=Float32)), ], source=source, ttl=timedelta(hours=2), @@ -154,8 +154,8 @@ def create_item_embeddings_batch_feature_view( schema=None if infer_features else [ - Field(name="embedding_double", dtype=Array(Float64)), - Field(name="embedding_float", dtype=Array(Float32)), + Field(name="embedding_double", dtype=Array(base_type=Float64)), + Field(name="embedding_float", dtype=Array(base_type=Float32)), ], source=source, ttl=timedelta(hours=2), From ba7bb27d70872fdb6bb71d1ae52358bf9abaaf3c Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Sun, 2 Jul 2023 19:43:33 -0500 Subject: [PATCH 17/32] Fix DatatSourceModel to SparkSource pydantic conversion and add tests --- .../spark_offline_store/spark_source.py | 12 ++-- sdk/python/tests/unit/test_pydantic_models.py | 57 +++++++++++++++++++ 2 files changed, 63 insertions(+), 6 deletions(-) diff --git a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py index e7a62f79cc..18bd959bf5 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py @@ -239,16 +239,16 @@ def datasource_from_pydantic_model(pydantic_datasource): """ return SparkSource( name=pydantic_datasource.name, - table=pydantic_datasource.table or "", - query=pydantic_datasource.query or "", - path=pydantic_datasource.path or "", - file_format=pydantic_datasource.file_format or "", - created_timestamp_column=pydantic_datasource.created_timestamp_column or "", + table=pydantic_datasource.table if hasattr(pydantic_datasource,"table") else "", + query=pydantic_datasource.query if hasattr(pydantic_datasource,"query") else "", + path=pydantic_datasource.path if hasattr(pydantic_datasource,"path") else "", + file_format=pydantic_datasource.file_format if hasattr(pydantic_datasource,"file_format") else "", + created_timestamp_column=pydantic_datasource.created_timestamp_column if hasattr(pydantic_datasource,"created_timestamp_column") else "", field_mapping=pydantic_datasource.field_mapping if pydantic_datasource.field_mapping else None, description=pydantic_datasource.description or "", tags=pydantic_datasource.tags if pydantic_datasource.tags else None, owner=pydantic_datasource.owner or "", - timestamp_field=pydantic_datasource.timestamp_field or "") + timestamp_field=pydantic_datasource.timestamp_field if hasattr(pydantic_datasource,"timestamp_field") else "") class SparkOptions: diff --git a/sdk/python/tests/unit/test_pydantic_models.py b/sdk/python/tests/unit/test_pydantic_models.py index 99899315da..700f25af89 100644 --- a/sdk/python/tests/unit/test_pydantic_models.py +++ b/sdk/python/tests/unit/test_pydantic_models.py @@ -23,6 +23,63 @@ from feast.types import Bool, Float32, Int64 +def test_datasourcemodel_to_sparksource(): + spark_source_model = DataSourceModel( + name= "string", + model_type= "string", + table= "table1", + timestamp_field= "", + created_timestamp_column= "", + description= "", + owner= "", + date_partition_column= "" + ) + spark_source = SparkSource.datasource_from_pydantic_model(spark_source_model) + + with pytest.raises(ValueError): + # No file_format specified + spark_source_model_2 = DataSourceModel( + name= "string", + model_type= "string", + path= "path1", + timestamp_field= "", + created_timestamp_column= "", + description= "", + owner= "", + date_partition_column= "" + ) + spark_source_2 = SparkSource.datasource_from_pydantic_model(spark_source_model_2) + + spark_source_model_2 = DataSourceModel( + name= "string", + model_type= "string", + path= "path1", + file_format="json", + timestamp_field= "", + created_timestamp_column= "", + description= "", + owner= "", + date_partition_column= "" + ) + spark_source_2 = SparkSource.datasource_from_pydantic_model(spark_source_model_2) + + + +def test_datasourcemodel_to_requestsource(): + schema = [ + Field(name="f1", dtype=Float32), + Field(name="f2", dtype=Bool), + ] + request_source_model = RequestSourceModel( + name="source", + schema=schema, + description="desc", + tags={}, + owner="feast", + ) + request_source = RequestSource.datasource_from_pydantic_model(request_source_model) + + def test_idempotent_entity_conversion(): entity = Entity( name="my-entity", From 47685320e00fba5fa1f23ada7413536aca2fb334 Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Mon, 3 Jul 2023 02:31:00 -0500 Subject: [PATCH 18/32] Allow dtypes to be defined by strings, add more tests --- sdk/python/feast/data_source.py | 2 +- sdk/python/feast/field.py | 17 ++++- .../spark_offline_store/spark_source.py | 15 +++-- sdk/python/feast/types.py | 40 +++++++++++ sdk/python/tests/unit/test_pydantic_models.py | 67 ++++++++++++------- 5 files changed, 106 insertions(+), 35 deletions(-) diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py index b4b45902c5..c0360eeab0 100644 --- a/sdk/python/feast/data_source.py +++ b/sdk/python/feast/data_source.py @@ -549,7 +549,7 @@ class RequestSourceModel(DataSourceModel): """ name: str - model_type: str = "RequestSource" + model_type: str = PydanticField("RequestSource", const=True) schema_: List[Field] = PydanticField(None, alias='schema') description: Optional[str] = "" tags: Optional[Dict[str, str]] = None diff --git a/sdk/python/feast/field.py b/sdk/python/feast/field.py index 9a247429f3..2936a48014 100644 --- a/sdk/python/feast/field.py +++ b/sdk/python/feast/field.py @@ -20,7 +20,7 @@ from feast.feature import Feature from feast.protos.feast.core.Feature_pb2 import FeatureSpecV2 as FieldProto -from feast.types import FeastType, from_value_type, ComplexFeastType, PrimitiveFeastType +from feast.types import FeastType, from_value_type, from_string, ComplexFeastType, PrimitiveFeastType from feast.value_type import ValueType @@ -51,9 +51,20 @@ class Config: } @validator('dtype', pre=True, always=True) - def dtype_is_feasttype(cls, v): + def dtype_is_feasttype_or_string_feasttype(cls, v): + """ + dtype must be a FeastType, but to allow wire transmission, + it is necessary to allow string representations of FeastTypes. + We therefore allow dtypes to be specified as strings which are + converted to FeastTypes at time of definition. + TO-DO: Investigate whether FeastType can be refactored to a json compatible + format. + """ if not isinstance(v, FeastType): - raise TypeError("dtype must be of type FeastType") + if isinstance(v, str): + return from_string(v) + else: + raise TypeError("dtype must be of type FeastType") return v def __eq__(self, other): diff --git a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py index 18bd959bf5..ee82c5f7ed 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py @@ -7,6 +7,7 @@ from typing import Any, Callable, Dict, Iterable, Optional, Tuple from pydantic import BaseModel +from pydantic import Field as PydanticField from pyspark.sql import SparkSession from feast import flags_helper @@ -37,7 +38,7 @@ class SparkSourceModel(DataSourceModel): Pydantic Model of a Feast SparkSource. """ name: str - model_type: str = "SparkSource" + model_type: str = PydanticField("SparkSource", const=True) table: Optional[str] = None query: Optional[str] = None path: Optional[str] = None @@ -218,14 +219,14 @@ def to_pydantic_model(self) -> SparkSourceModel: """ return SparkSourceModel( name=self.name, - table=self.table if self.table else "", - query=self.query if self.query else "", - path=self.path if self.path else "", - file_format=self.file_format if self.file_format else "", + table=self.table, + query=self.query, + path=self.path, + file_format=self.file_format, created_timestamp_column=self.created_timestamp_column if self.created_timestamp_column else "", - field_mapping=self.field_mapping if self.field_mapping else {}, + field_mapping=self.field_mapping if self.field_mapping else None, description=self.description if self.description else "", - tags=self.tags if self.tags else {}, + tags=self.tags if self.tags else None, owner=self.owner if self.owner else "", timestamp_field=self.timestamp_field if self.timestamp_field else "") diff --git a/sdk/python/feast/types.py b/sdk/python/feast/types.py index b65b8cb1d7..f07a68a81a 100644 --- a/sdk/python/feast/types.py +++ b/sdk/python/feast/types.py @@ -206,3 +206,43 @@ def from_value_type( return VALUE_TYPES_TO_FEAST_TYPES[value_type] raise ValueError(f"Could not convert value type {value_type} to FeastType.") + + + +TYPE_STRINGS_TO_FEAST_TYPES = { + "Unknown": Invalid, + "Bytes": Bytes, + "String": String, + "Int32": Int32, + "Int64": Int64, + "Float64": Float64, + "Float32": Float32, + "Bool": Bool, + "UnixTimestamp": UnixTimestamp, + "Array(Bytes)": Array(Bytes), + "Array(String)": Array(String), + "Array(Int32)": Array(Int32), + "Array(Int64)": Array(Int64), + "Array(Float64)": Array(Float64), + "Array(Float32)": Array(Float32), + "Array(Bool)": Array(Bool), + "Array(UnixTimestamp)": Array(UnixTimestamp) +} + + +def from_string( + value_type: str +) -> FeastType: + """ + Converts a string to a Feast type. + + Args: + value_type: String value type to be converted. + + Raises: + ValueError: The conversion could not be performed. + """ + if value_type in TYPE_STRINGS_TO_FEAST_TYPES: + return TYPE_STRINGS_TO_FEAST_TYPES[value_type] + + raise TypeError(f"Could not convert value type {value_type} to FeastType.") diff --git a/sdk/python/tests/unit/test_pydantic_models.py b/sdk/python/tests/unit/test_pydantic_models.py index 700f25af89..6ff6eaf017 100644 --- a/sdk/python/tests/unit/test_pydantic_models.py +++ b/sdk/python/tests/unit/test_pydantic_models.py @@ -14,20 +14,25 @@ import assertpy import pytest +from pydantic.error_wrappers import ValidationError + from feast.entity import Entity, EntityModel from feast.field import Field from feast.data_source import DataSource, DataSourceModel, RequestSource, RequestSourceModel from feast.feature_view import FeatureView, FeatureViewModel from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import \ SparkSource, SparkSourceModel -from feast.types import Bool, Float32, Int64 +from feast.types import Array, Bool, Float32, Int64 def test_datasourcemodel_to_sparksource(): spark_source_model = DataSourceModel( name= "string", - model_type= "string", + model_type="SparkSource", table= "table1", + query="", + path="", + file_format="", timestamp_field= "", created_timestamp_column= "", description= "", @@ -35,12 +40,14 @@ def test_datasourcemodel_to_sparksource(): date_partition_column= "" ) spark_source = SparkSource.datasource_from_pydantic_model(spark_source_model) + spark_source_model_b = spark_source.to_pydantic_model() + assert spark_source_model == spark_source_model_b with pytest.raises(ValueError): # No file_format specified - spark_source_model_2 = DataSourceModel( + spark_source_model = DataSourceModel( name= "string", - model_type= "string", + model_type="SparkSource", path= "path1", timestamp_field= "", created_timestamp_column= "", @@ -48,36 +55,48 @@ def test_datasourcemodel_to_sparksource(): owner= "", date_partition_column= "" ) - spark_source_2 = SparkSource.datasource_from_pydantic_model(spark_source_model_2) + spark_source = SparkSource.datasource_from_pydantic_model(spark_source_model) - spark_source_model_2 = DataSourceModel( + spark_source_model = DataSourceModel( name= "string", - model_type= "string", + model_type="SparkSource", path= "path1", file_format="json", + table= "", + query="", timestamp_field= "", created_timestamp_column= "", description= "", owner= "", date_partition_column= "" ) - spark_source_2 = SparkSource.datasource_from_pydantic_model(spark_source_model_2) - + spark_source = SparkSource.datasource_from_pydantic_model(spark_source_model) + spark_source_model_b = spark_source.to_pydantic_model() + assert spark_source_model == spark_source_model_b def test_datasourcemodel_to_requestsource(): + with pytest.raises(ValidationError): + bad_schema = [ + Field(name="f1", dtype="Array(Float323)"), + Field(name="f2", dtype="Bool"), + ] + schema = [ - Field(name="f1", dtype=Float32), - Field(name="f2", dtype=Bool), + Field(name="f1", dtype="Array(Float32)"), + Field(name="f2", dtype="Bool"), ] request_source_model = RequestSourceModel( name="source", + model_type="RequestSource", schema=schema, description="desc", - tags={}, + tags=None, owner="feast", ) request_source = RequestSource.datasource_from_pydantic_model(request_source_model) + request_source_model_b = request_source.to_pydantic_model() + assert request_source_model == request_source_model_b def test_idempotent_entity_conversion(): @@ -87,8 +106,8 @@ def test_idempotent_entity_conversion(): tags={"key1": "val1", "key2": "val2"}, ) entity_model = entity.to_pydantic_model() - entity_2 = Entity.entity_from_pydantic_model(entity_model) - assert entity == entity_2 + entity_b = Entity.entity_from_pydantic_model(entity_model) + assert entity == entity_b def test_idempotent_requestsource_conversion(): @@ -104,8 +123,8 @@ def test_idempotent_requestsource_conversion(): owner="feast", ) request_source_model = request_source.to_pydantic_model() - request_source_2 = RequestSource.datasource_from_pydantic_model(request_source_model) - assert request_source == request_source_2 + request_source_b = RequestSource.datasource_from_pydantic_model(request_source_model) + assert request_source == request_source_b def test_idempotent_sparksource_conversion(): @@ -117,8 +136,8 @@ def test_idempotent_sparksource_conversion(): owner="feast", ) spark_source_model = spark_source.to_pydantic_model() - spark_source_2 = SparkSource.datasource_from_pydantic_model(spark_source_model) - assert spark_source == spark_source_2 + spark_source_b = SparkSource.datasource_from_pydantic_model(spark_source_model) + assert spark_source == spark_source_b def test_type_safety_when_converting_multiple_datasources(): @@ -148,8 +167,8 @@ def test_idempotent_featureview_conversion(): source=request_source, ) feature_view_model = feature_view.to_pydantic_model() - feature_view_2 = FeatureView.featureview_from_pydantic_model(feature_view_model) - assert feature_view == feature_view_2 + feature_view_b = FeatureView.featureview_from_pydantic_model(feature_view_model) + assert feature_view == feature_view_b spark_source = SparkSource( @@ -159,7 +178,7 @@ def test_idempotent_featureview_conversion(): timestamp_field="event_timestamp", created_timestamp_column="created", ) - feature_view_3 = FeatureView( + feature_view = FeatureView( name="my-feature-view", entities=[user_entity], schema=[ @@ -168,6 +187,6 @@ def test_idempotent_featureview_conversion(): ], source=spark_source, ) - feature_view_model_3 = feature_view_3.to_pydantic_model() - feature_view_4 = FeatureView.featureview_from_pydantic_model(feature_view_model_3) - assert feature_view_3 == feature_view_4 + feature_view_model = feature_view.to_pydantic_model() + feature_view_b = FeatureView.featureview_from_pydantic_model(feature_view_model) + assert feature_view == feature_view_b From 65f86989b3006a6e09b15e7fc4d5646c6f553b62 Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Mon, 3 Jul 2023 02:59:48 -0500 Subject: [PATCH 19/32] Adjust schema conversion in RequestSource datasource_from_pydantic_model --- sdk/python/feast/data_source.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py index c0360eeab0..c1957ee718 100644 --- a/sdk/python/feast/data_source.py +++ b/sdk/python/feast/data_source.py @@ -691,12 +691,14 @@ def datasource_from_pydantic_model(pydantic_datasource): Returns: A RequestSource. """ - return RequestSource( - name=pydantic_datasource.name, - schema=pydantic_datasource.schema_, - description=pydantic_datasource.description, - tags=pydantic_datasource.tags if pydantic_datasource.tags else None, - owner=pydantic_datasource.owner) + params = { + "name":pydantic_datasource.name, + "description":pydantic_datasource.description, + "tags":pydantic_datasource.tags if pydantic_datasource.tags else None, + "owner":pydantic_datasource.owner + } + params["schema"] = [Field(name=sch.name,dtype=sch.dtype,description=sch.description,tags=sch.tags) for sch in pydantic_datasource.schema_] + return RequestSource(**params) @typechecked From 34109e5195f6e7af6b28edf99455aa74f63dc11b Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Mon, 3 Jul 2023 10:05:06 -0500 Subject: [PATCH 20/32] Remove unnecessary properties from FeatureView pydantic model --- sdk/python/feast/feature_view.py | 14 ++++---------- sdk/python/tests/unit/test_pydantic_models.py | 4 ---- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index 2ecee27f7a..35c371e30f 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -68,13 +68,13 @@ class FeatureViewModel(BaseModel): ttl: Optional[timedelta] batch_source: DataSourceModel stream_source: Optional[DataSourceModel] - entity_columns: List[Field] - features: List[Field] + __entity_columns: List[Field] + __features: List[Field] online: bool = True description: str = "" tags: Optional[Dict[str, str]] = None owner: str = "" - materialization_intervals: List[Tuple[datetime, datetime]] + __materialization_intervals: List[Tuple[datetime, datetime]] class Config: arbitrary_types_allowed = True @@ -123,13 +123,10 @@ class FeatureView(BaseFeatureView): ttl: Optional[timedelta] batch_source: DataSource stream_source: Optional[DataSource] - entity_columns: List[Field] - features: List[Field] online: bool description: str tags: Dict[str, str] owner: str - materialization_intervals: List[Tuple[datetime, datetime]] @log_exceptions def __init__( @@ -517,13 +514,10 @@ def to_pydantic_model(self) -> FeatureViewModel: original_schema=self.original_schema, batch_source=self.batch_source.to_pydantic_model() if self.batch_source else self.batch_source, stream_source=self.stream_source.to_pydantic_model() if self.stream_source else self.stream_source, - entity_columns=self.entity_columns, - features=self.features if self.features else None, online=self.online, description=self.description, tags=self.tags if self.tags else None, - owner=self.owner, - materialization_intervals=self.materialization_intervals) + owner=self.owner) @staticmethod def featureview_from_pydantic_model(pydantic_featureview): diff --git a/sdk/python/tests/unit/test_pydantic_models.py b/sdk/python/tests/unit/test_pydantic_models.py index 6ff6eaf017..79c27c6c63 100644 --- a/sdk/python/tests/unit/test_pydantic_models.py +++ b/sdk/python/tests/unit/test_pydantic_models.py @@ -140,10 +140,6 @@ def test_idempotent_sparksource_conversion(): assert spark_source == spark_source_b -def test_type_safety_when_converting_multiple_datasources(): - pass - - def test_idempotent_featureview_conversion(): schema = [ Field(name="f1", dtype=Float32), From 47e98b9e39cc492bf4360409083ba62e392da3b8 Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Mon, 3 Jul 2023 10:34:15 -0500 Subject: [PATCH 21/32] Remove parameters that weren't removed last time from FeatureView pydantic model, and do some linting --- sdk/python/feast/data_source.py | 27 +++--- sdk/python/feast/entity.py | 11 +-- sdk/python/feast/feature_service.py | 5 -- sdk/python/feast/feature_view.py | 85 +++++++++++++------ sdk/python/feast/field.py | 24 ++++-- .../spark_offline_store/spark_source.py | 37 ++++++-- sdk/python/feast/types.py | 11 +-- sdk/python/tests/unit/test_feature_views.py | 1 - sdk/python/tests/unit/test_pydantic_models.py | 65 +++++++------- 9 files changed, 165 insertions(+), 101 deletions(-) diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py index c1957ee718..77a7b97844 100644 --- a/sdk/python/feast/data_source.py +++ b/sdk/python/feast/data_source.py @@ -542,7 +542,6 @@ def get_table_query_string(self) -> str: raise NotImplementedError - class RequestSourceModel(DataSourceModel): """ Pydantic Model of a Feast RequestSource. @@ -550,7 +549,7 @@ class RequestSourceModel(DataSourceModel): name: str model_type: str = PydanticField("RequestSource", const=True) - schema_: List[Field] = PydanticField(None, alias='schema') + schema_: List[Field] = PydanticField(None, alias="schema") description: Optional[str] = "" tags: Optional[Dict[str, str]] = None owner: Optional[str] = "" @@ -558,9 +557,6 @@ class RequestSourceModel(DataSourceModel): class Config: arbitrary_types_allowed = True extra = "allow" - json_encoders = { - } - @typechecked @@ -681,7 +677,8 @@ def to_pydantic_model(self) -> RequestSourceModel: schema=self.schema, description=self.description, tags=self.tags if self.tags else None, - owner=self.owner) + owner=self.owner, + ) @staticmethod def datasource_from_pydantic_model(pydantic_datasource): @@ -692,12 +689,20 @@ def datasource_from_pydantic_model(pydantic_datasource): A RequestSource. """ params = { - "name":pydantic_datasource.name, - "description":pydantic_datasource.description, - "tags":pydantic_datasource.tags if pydantic_datasource.tags else None, - "owner":pydantic_datasource.owner + "name": pydantic_datasource.name, + "description": pydantic_datasource.description, + "tags": pydantic_datasource.tags if pydantic_datasource.tags else None, + "owner": pydantic_datasource.owner, } - params["schema"] = [Field(name=sch.name,dtype=sch.dtype,description=sch.description,tags=sch.tags) for sch in pydantic_datasource.schema_] + params["schema"] = [ + Field( + name=sch.name, + dtype=sch.dtype, + description=sch.description, + tags=sch.tags, + ) + for sch in pydantic_datasource.schema_ + ] return RequestSource(**params) diff --git a/sdk/python/feast/entity.py b/sdk/python/feast/entity.py index 31d63fdf5a..5ff3fc131f 100644 --- a/sdk/python/feast/entity.py +++ b/sdk/python/feast/entity.py @@ -13,7 +13,7 @@ # limitations under the License. from datetime import datetime from json import dumps -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Callable from google.protobuf.json_format import MessageToJson from pydantic import BaseModel @@ -43,7 +43,7 @@ class EntityModel(BaseModel): class Config: arbitrary_types_allowed = True extra = "allow" - json_encoders = { + json_encoders: Dict[object, Callable] = { ValueType: lambda v: int(dumps(v.value, default=str)) } @@ -230,7 +230,8 @@ def to_pydantic_model(self) -> EntityModel: value_type=self.value_type, description=self.description, tags=self.tags if self.tags else None, - owner=self.owner) + owner=self.owner, + ) @staticmethod def entity_from_pydantic_model(pydantic_entity): @@ -246,5 +247,5 @@ def entity_from_pydantic_model(pydantic_entity): value_type=pydantic_entity.value_type, description=pydantic_entity.description, tags=pydantic_entity.tags if pydantic_entity.tags else None, - owner=pydantic_entity.owner) - + owner=pydantic_entity.owner, + ) diff --git a/sdk/python/feast/feature_service.py b/sdk/python/feast/feature_service.py index bfaddbb331..abe61a2e74 100644 --- a/sdk/python/feast/feature_service.py +++ b/sdk/python/feast/feature_service.py @@ -42,11 +42,6 @@ class FeatureServiceModel(BaseModel): class Config: arbitrary_types_allowed = True extra = "allow" - json_encoders = { - # OnDemandFeatureView - # FeatureViewProjection - # LoggingConfig - } @typechecked diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index 35c371e30f..98bb3fc7f9 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. import copy +import sys import warnings from datetime import datetime, timedelta from json import dumps -from typing import Dict, List, Optional, Tuple, Type -import sys +from typing import Dict, List, Optional, Tuple, Type, Callable from google.protobuf.duration_pb2 import Duration from pydantic import BaseModel @@ -25,12 +25,20 @@ from feast import utils from feast.base_feature_view import BaseFeatureView -from feast.data_source import DataSource, RequestSource, DataSourceModel, KafkaSource, KinesisSource, PushSource +from feast.data_source import ( + DataSource, + DataSourceModel, + KafkaSource, + KinesisSource, + PushSource, + RequestSource, +) from feast.entity import Entity, EntityModel from feast.feature_view_projection import FeatureViewProjection from feast.field import Field -from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import \ - SparkSource +from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import ( + SparkSource, +) from feast.protos.feast.core.FeatureView_pb2 import FeatureView as FeatureViewProto from feast.protos.feast.core.FeatureView_pb2 import ( FeatureViewMeta as FeatureViewMetaProto, @@ -41,7 +49,7 @@ from feast.protos.feast.core.FeatureView_pb2 import ( MaterializationInterval as MaterializationIntervalProto, ) -from feast.types import FeastType, from_value_type, ComplexFeastType, PrimitiveFeastType +from feast.types import ComplexFeastType, FeastType, PrimitiveFeastType, from_value_type from feast.usage import log_exceptions from feast.value_type import ValueType @@ -68,24 +76,25 @@ class FeatureViewModel(BaseModel): ttl: Optional[timedelta] batch_source: DataSourceModel stream_source: Optional[DataSourceModel] - __entity_columns: List[Field] - __features: List[Field] online: bool = True description: str = "" tags: Optional[Dict[str, str]] = None owner: str = "" - __materialization_intervals: List[Tuple[datetime, datetime]] class Config: arbitrary_types_allowed = True extra = "allow" - json_encoders = { + json_encoders: Dict[object, Callable] = { Field: lambda v: int(dumps(v.value, default=str)), DataSource: lambda v: v.to_pydantic_model(), Entity: lambda v: v.to_pydantic_model(), FeastType: lambda v: int(dumps(v.to_value_type().value, default=str)), - ComplexFeastType: lambda v: int(dumps(v.to_value_type().value, default=str)), - PrimitiveFeastType: lambda v: int(dumps(v.to_value_type().value, default=str)) + ComplexFeastType: lambda v: int( + dumps(v.to_value_type().value, default=str) + ), + PrimitiveFeastType: lambda v: int( + dumps(v.to_value_type().value, default=str) + ), } @@ -292,8 +301,12 @@ def __eq__(self, other): ): return False - if isinstance(self.original_entities, List) and isinstance(other.original_entities, List): - for entity1, entity2 in zip(self.original_entities, other.original_entities): + if isinstance(self.original_entities, List) and isinstance( + other.original_entities, List + ): + for entity1, entity2 in zip( + self.original_entities, other.original_entities + ): if entity1 != entity2: return False @@ -509,15 +522,22 @@ def to_pydantic_model(self) -> FeatureViewModel: """ return FeatureViewModel( name=self.name, - original_entities=[entity.to_pydantic_model() for entity in self.original_entities], + original_entities=[ + entity.to_pydantic_model() for entity in self.original_entities + ], ttl=self.ttl, original_schema=self.original_schema, - batch_source=self.batch_source.to_pydantic_model() if self.batch_source else self.batch_source, - stream_source=self.stream_source.to_pydantic_model() if self.stream_source else self.stream_source, + batch_source=self.batch_source.to_pydantic_model() + if self.batch_source + else self.batch_source, + stream_source=self.stream_source.to_pydantic_model() + if self.stream_source + else self.stream_source, online=self.online, description=self.description, tags=self.tags if self.tags else None, - owner=self.owner) + owner=self.owner, + ) @staticmethod def featureview_from_pydantic_model(pydantic_featureview): @@ -530,28 +550,39 @@ def featureview_from_pydantic_model(pydantic_featureview): # Convert each of the sources if they exist batch_source = None if pydantic_featureview.batch_source: - class_ = getattr(sys.modules[__name__], pydantic_featureview.batch_source.model_type) - batch_source = class_.datasource_from_pydantic_model(pydantic_featureview.batch_source) + class_ = getattr( + sys.modules[__name__], pydantic_featureview.batch_source.model_type + ) + batch_source = class_.datasource_from_pydantic_model( + pydantic_featureview.batch_source + ) stream_source = None if pydantic_featureview.stream_source: - class_ = getattr(sys.modules[__name__], pydantic_featureview.stream_source.model_type) - stream_source = class_.datasource_from_pydantic_model(pydantic_featureview.stream_source) + class_ = getattr( + sys.modules[__name__], pydantic_featureview.stream_source.model_type + ) + stream_source = class_.datasource_from_pydantic_model( + pydantic_featureview.stream_source + ) # Create the FeatureView feature_view = FeatureView( name=pydantic_featureview.name, source=batch_source, schema=pydantic_featureview.original_schema, - entities=[Entity.entity_from_pydantic_model(entity) for entity in pydantic_featureview.original_entities], + entities=[ + Entity.entity_from_pydantic_model(entity) + for entity in pydantic_featureview.original_entities + ], ttl=pydantic_featureview.ttl, online=pydantic_featureview.online, description=pydantic_featureview.description, tags=pydantic_featureview.tags if pydantic_featureview.tags else None, - owner=pydantic_featureview.owner) + owner=pydantic_featureview.owner, + ) # Correct the FeatureView to store both sources. - feature_view.batch_source=batch_source - feature_view.stream_source=stream_source + feature_view.batch_source = batch_source + feature_view.stream_source = stream_source return feature_view - diff --git a/sdk/python/feast/field.py b/sdk/python/feast/field.py index 2936a48014..cdf8828f24 100644 --- a/sdk/python/feast/field.py +++ b/sdk/python/feast/field.py @@ -13,14 +13,20 @@ # limitations under the License. from json import dumps -from typing import Dict, Optional +from typing import Dict, Optional, Callable from pydantic import BaseModel, validator from typeguard import typechecked from feast.feature import Feature from feast.protos.feast.core.Feature_pb2 import FeatureSpecV2 as FieldProto -from feast.types import FeastType, from_value_type, from_string, ComplexFeastType, PrimitiveFeastType +from feast.types import ( + ComplexFeastType, + FeastType, + PrimitiveFeastType, + from_string, + from_value_type, +) from feast.value_type import ValueType @@ -44,13 +50,17 @@ class Field(BaseModel): class Config: arbitrary_types_allowed = True extra = "allow" - json_encoders = { + json_encoders: Dict[object, Callable] = { FeastType: lambda v: int(dumps(v.to_value_type().value, default=str)), - ComplexFeastType: lambda v: int(dumps(v.to_value_type().value, default=str)), - PrimitiveFeastType: lambda v: int(dumps(v.to_value_type().value, default=str)) + ComplexFeastType: lambda v: int( + dumps(v.to_value_type().value, default=str) + ), + PrimitiveFeastType: lambda v: int( + dumps(v.to_value_type().value, default=str) + ), } - @validator('dtype', pre=True, always=True) + @validator("dtype", pre=True, always=True) def dtype_is_feasttype_or_string_feasttype(cls, v): """ dtype must be a FeastType, but to allow wire transmission, @@ -60,7 +70,7 @@ def dtype_is_feasttype_or_string_feasttype(cls, v): TO-DO: Investigate whether FeastType can be refactored to a json compatible format. """ - if not isinstance(v, FeastType): + if not isinstance(v, FeastType): # noinspection if isinstance(v, str): return from_string(v) else: diff --git a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py index ee82c5f7ed..667a7bdf87 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py @@ -37,6 +37,7 @@ class SparkSourceModel(DataSourceModel): """ Pydantic Model of a Feast SparkSource. """ + name: str model_type: str = PydanticField("SparkSource", const=True) table: Optional[str] = None @@ -223,12 +224,15 @@ def to_pydantic_model(self) -> SparkSourceModel: query=self.query, path=self.path, file_format=self.file_format, - created_timestamp_column=self.created_timestamp_column if self.created_timestamp_column else "", + created_timestamp_column=self.created_timestamp_column + if self.created_timestamp_column + else "", field_mapping=self.field_mapping if self.field_mapping else None, description=self.description if self.description else "", tags=self.tags if self.tags else None, owner=self.owner if self.owner else "", - timestamp_field=self.timestamp_field if self.timestamp_field else "") + timestamp_field=self.timestamp_field if self.timestamp_field else "", + ) @staticmethod def datasource_from_pydantic_model(pydantic_datasource): @@ -240,16 +244,31 @@ def datasource_from_pydantic_model(pydantic_datasource): """ return SparkSource( name=pydantic_datasource.name, - table=pydantic_datasource.table if hasattr(pydantic_datasource,"table") else "", - query=pydantic_datasource.query if hasattr(pydantic_datasource,"query") else "", - path=pydantic_datasource.path if hasattr(pydantic_datasource,"path") else "", - file_format=pydantic_datasource.file_format if hasattr(pydantic_datasource,"file_format") else "", - created_timestamp_column=pydantic_datasource.created_timestamp_column if hasattr(pydantic_datasource,"created_timestamp_column") else "", - field_mapping=pydantic_datasource.field_mapping if pydantic_datasource.field_mapping else None, + table=pydantic_datasource.table + if hasattr(pydantic_datasource, "table") + else "", + query=pydantic_datasource.query + if hasattr(pydantic_datasource, "query") + else "", + path=pydantic_datasource.path + if hasattr(pydantic_datasource, "path") + else "", + file_format=pydantic_datasource.file_format + if hasattr(pydantic_datasource, "file_format") + else "", + created_timestamp_column=pydantic_datasource.created_timestamp_column + if hasattr(pydantic_datasource, "created_timestamp_column") + else "", + field_mapping=pydantic_datasource.field_mapping + if pydantic_datasource.field_mapping + else None, description=pydantic_datasource.description or "", tags=pydantic_datasource.tags if pydantic_datasource.tags else None, owner=pydantic_datasource.owner or "", - timestamp_field=pydantic_datasource.timestamp_field if hasattr(pydantic_datasource,"timestamp_field") else "") + timestamp_field=pydantic_datasource.timestamp_field + if hasattr(pydantic_datasource, "timestamp_field") + else "", + ) class SparkOptions: diff --git a/sdk/python/feast/types.py b/sdk/python/feast/types.py index f07a68a81a..3b4196c05b 100644 --- a/sdk/python/feast/types.py +++ b/sdk/python/feast/types.py @@ -138,7 +138,7 @@ class Array(ComplexFeastType): base_type: Union[PrimitiveFeastType, ComplexFeastType] - @validator('base_type', pre=True, always=True) + @validator("base_type", pre=True, always=True) def base_type_is_supported(cls, base_type): if base_type not in SUPPORTED_BASE_TYPES: raise ValueError( @@ -208,8 +208,7 @@ def from_value_type( raise ValueError(f"Could not convert value type {value_type} to FeastType.") - -TYPE_STRINGS_TO_FEAST_TYPES = { +TYPE_STRINGS_TO_FEAST_TYPES: Dict[str, FeastType] = { "Unknown": Invalid, "Bytes": Bytes, "String": String, @@ -226,13 +225,11 @@ def from_value_type( "Array(Float64)": Array(Float64), "Array(Float32)": Array(Float32), "Array(Bool)": Array(Bool), - "Array(UnixTimestamp)": Array(UnixTimestamp) + "Array(UnixTimestamp)": Array(UnixTimestamp), } -def from_string( - value_type: str -) -> FeastType: +def from_string(value_type: str) -> FeastType: """ Converts a string to a Feast type. diff --git a/sdk/python/tests/unit/test_feature_views.py b/sdk/python/tests/unit/test_feature_views.py index 64e5059026..c954b6e53b 100644 --- a/sdk/python/tests/unit/test_feature_views.py +++ b/sdk/python/tests/unit/test_feature_views.py @@ -1,7 +1,6 @@ from datetime import timedelta import pytest - from pydantic import ValidationError from feast.aggregation import Aggregation diff --git a/sdk/python/tests/unit/test_pydantic_models.py b/sdk/python/tests/unit/test_pydantic_models.py index 79c27c6c63..5f290e86af 100644 --- a/sdk/python/tests/unit/test_pydantic_models.py +++ b/sdk/python/tests/unit/test_pydantic_models.py @@ -13,31 +13,37 @@ # limitations under the License. import assertpy import pytest - from pydantic.error_wrappers import ValidationError +from feast.data_source import ( + DataSource, + DataSourceModel, + RequestSource, + RequestSourceModel, +) from feast.entity import Entity, EntityModel -from feast.field import Field -from feast.data_source import DataSource, DataSourceModel, RequestSource, RequestSourceModel from feast.feature_view import FeatureView, FeatureViewModel -from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import \ - SparkSource, SparkSourceModel +from feast.field import Field +from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import ( + SparkSource, + SparkSourceModel, +) from feast.types import Array, Bool, Float32, Int64 def test_datasourcemodel_to_sparksource(): spark_source_model = DataSourceModel( - name= "string", + name="string", model_type="SparkSource", - table= "table1", + table="table1", query="", path="", file_format="", - timestamp_field= "", - created_timestamp_column= "", - description= "", - owner= "", - date_partition_column= "" + timestamp_field="", + created_timestamp_column="", + description="", + owner="", + date_partition_column="", ) spark_source = SparkSource.datasource_from_pydantic_model(spark_source_model) spark_source_model_b = spark_source.to_pydantic_model() @@ -46,29 +52,29 @@ def test_datasourcemodel_to_sparksource(): with pytest.raises(ValueError): # No file_format specified spark_source_model = DataSourceModel( - name= "string", + name="string", model_type="SparkSource", - path= "path1", - timestamp_field= "", - created_timestamp_column= "", - description= "", - owner= "", - date_partition_column= "" + path="path1", + timestamp_field="", + created_timestamp_column="", + description="", + owner="", + date_partition_column="", ) spark_source = SparkSource.datasource_from_pydantic_model(spark_source_model) spark_source_model = DataSourceModel( - name= "string", + name="string", model_type="SparkSource", - path= "path1", + path="path1", file_format="json", - table= "", + table="", query="", - timestamp_field= "", - created_timestamp_column= "", - description= "", - owner= "", - date_partition_column= "" + timestamp_field="", + created_timestamp_column="", + description="", + owner="", + date_partition_column="", ) spark_source = SparkSource.datasource_from_pydantic_model(spark_source_model) spark_source_model_b = spark_source.to_pydantic_model() @@ -123,7 +129,9 @@ def test_idempotent_requestsource_conversion(): owner="feast", ) request_source_model = request_source.to_pydantic_model() - request_source_b = RequestSource.datasource_from_pydantic_model(request_source_model) + request_source_b = RequestSource.datasource_from_pydantic_model( + request_source_model + ) assert request_source == request_source_b @@ -166,7 +174,6 @@ def test_idempotent_featureview_conversion(): feature_view_b = FeatureView.featureview_from_pydantic_model(feature_view_model) assert feature_view == feature_view_b - spark_source = SparkSource( name="sparky_sparky_boom_man", path=f"/data/driver_hourly_stats", From a6f0253639cd8592769394bf98ce751cb5551318 Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Mon, 3 Jul 2023 10:46:54 -0500 Subject: [PATCH 22/32] Finish linting --- sdk/python/feast/data_source.py | 2 +- sdk/python/feast/feature_view.py | 7 +++++-- sdk/python/feast/field.py | 4 ++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py index 77a7b97844..dbbcada53c 100644 --- a/sdk/python/feast/data_source.py +++ b/sdk/python/feast/data_source.py @@ -173,7 +173,7 @@ class DataSourceModel(BaseModel): """ name: str - model_type: str = None + model_type: str = "" timestamp_field: Optional[str] = "" created_timestamp_column: Optional[str] = "" field_mapping: Optional[Dict[str, str]] = None diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index 98bb3fc7f9..58f3281c02 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -71,7 +71,7 @@ class FeatureViewModel(BaseModel): """ name: str - original_entities: List[EntityModel] = None + original_entities: List[EntityModel] = [] original_schema: Optional[List[Field]] = None ttl: Optional[timedelta] batch_source: DataSourceModel @@ -132,10 +132,13 @@ class FeatureView(BaseFeatureView): ttl: Optional[timedelta] batch_source: DataSource stream_source: Optional[DataSource] + entity_columns: List[Field] + features: List[Field] online: bool description: str tags: Dict[str, str] owner: str + materialization_intervals: List[Tuple[datetime, datetime]] @log_exceptions def __init__( @@ -183,7 +186,7 @@ def __init__( # making it impossible to convert idempotently to another format. # store these arguments to recover them in conversions. self.original_schema = schema - self.original_entities = entities + self.original_entities = entities or [] schema = schema or [] diff --git a/sdk/python/feast/field.py b/sdk/python/feast/field.py index cdf8828f24..07e3a36ff5 100644 --- a/sdk/python/feast/field.py +++ b/sdk/python/feast/field.py @@ -44,7 +44,7 @@ class Field(BaseModel): name: str dtype: FeastType - description: Optional[str] = "" + description: str = "" tags: Optional[Dict[str, str]] = {} class Config: @@ -70,7 +70,7 @@ def dtype_is_feasttype_or_string_feasttype(cls, v): TO-DO: Investigate whether FeastType can be refactored to a json compatible format. """ - if not isinstance(v, FeastType): # noinspection + if not isinstance(v, FeastType): # type: ignore if isinstance(v, str): return from_string(v) else: From 82cdec754e13903b87f0823e3357809affc8bd20 Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Mon, 3 Jul 2023 15:18:12 -0500 Subject: [PATCH 23/32] Fix source choice in FeatureView pydantic model conversion to match FeatureView logic --- sdk/python/feast/entity.py | 2 ++ sdk/python/feast/feature_view.py | 13 +++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/sdk/python/feast/entity.py b/sdk/python/feast/entity.py index 5ff3fc131f..35dce18e1a 100644 --- a/sdk/python/feast/entity.py +++ b/sdk/python/feast/entity.py @@ -249,3 +249,5 @@ def entity_from_pydantic_model(pydantic_entity): tags=pydantic_entity.tags if pydantic_entity.tags else None, owner=pydantic_entity.owner, ) + # TO-DO: add the timestamps to the entity after it is has been created + diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index 58f3281c02..691947fc91 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -568,10 +568,18 @@ def featureview_from_pydantic_model(pydantic_featureview): pydantic_featureview.stream_source ) + # Mirror the stream/batch source conditions in the FeatureView + # constructor; one source is passed, either a stream source + # which contains a batch source inside it, or a batch source + # on its own. + source = stream_source if stream_source else batch_source + if stream_source: + source.batch_source = batch_source + # Create the FeatureView feature_view = FeatureView( name=pydantic_featureview.name, - source=batch_source, + source=source, schema=pydantic_featureview.original_schema, entities=[ Entity.entity_from_pydantic_model(entity) @@ -584,8 +592,5 @@ def featureview_from_pydantic_model(pydantic_featureview): owner=pydantic_featureview.owner, ) - # Correct the FeatureView to store both sources. - feature_view.batch_source = batch_source - feature_view.stream_source = stream_source return feature_view From 89eb753cbb07ba1d1e48403f90528ded8f5fd44d Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Mon, 3 Jul 2023 15:41:17 -0500 Subject: [PATCH 24/32] Refine FeastType json outputs --- sdk/python/feast/feature_view.py | 9 ++------- sdk/python/feast/field.py | 10 ++-------- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index 691947fc91..fd4a81aab9 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -88,13 +88,8 @@ class Config: Field: lambda v: int(dumps(v.value, default=str)), DataSource: lambda v: v.to_pydantic_model(), Entity: lambda v: v.to_pydantic_model(), - FeastType: lambda v: int(dumps(v.to_value_type().value, default=str)), - ComplexFeastType: lambda v: int( - dumps(v.to_value_type().value, default=str) - ), - PrimitiveFeastType: lambda v: int( - dumps(v.to_value_type().value, default=str) - ), + ComplexFeastType: lambda v: str(v), + PrimitiveFeastType: lambda v: str(v) } diff --git a/sdk/python/feast/field.py b/sdk/python/feast/field.py index 07e3a36ff5..e5f984dfd1 100644 --- a/sdk/python/feast/field.py +++ b/sdk/python/feast/field.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from json import dumps from typing import Dict, Optional, Callable from pydantic import BaseModel, validator @@ -51,13 +50,8 @@ class Config: arbitrary_types_allowed = True extra = "allow" json_encoders: Dict[object, Callable] = { - FeastType: lambda v: int(dumps(v.to_value_type().value, default=str)), - ComplexFeastType: lambda v: int( - dumps(v.to_value_type().value, default=str) - ), - PrimitiveFeastType: lambda v: int( - dumps(v.to_value_type().value, default=str) - ), + ComplexFeastType: lambda v: str(v), + PrimitiveFeastType: lambda v: str(v) } @validator("dtype", pre=True, always=True) From f4a856eb1f57615e7de467b13ed3ce81982a3333 Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Wed, 5 Jul 2023 08:46:57 -0500 Subject: [PATCH 25/32] First round of PR fixes, the small stuff --- sdk/python/feast/base_feature_view.py | 1 - sdk/python/feast/embedded_go/type_map.py | 16 ++++++++-------- sdk/python/feast/entity.py | 14 +++++++++----- sdk/python/feast/feature_service.py | 22 ---------------------- sdk/python/feast/feature_store.py | 2 +- 5 files changed, 18 insertions(+), 37 deletions(-) diff --git a/sdk/python/feast/base_feature_view.py b/sdk/python/feast/base_feature_view.py index 4425fb3e4f..f64b495adf 100644 --- a/sdk/python/feast/base_feature_view.py +++ b/sdk/python/feast/base_feature_view.py @@ -18,7 +18,6 @@ from google.protobuf.json_format import MessageToJson from proto import Message -from pydantic import BaseModel, root_validator from feast.feature_view_projection import FeatureViewProjection from feast.field import Field diff --git a/sdk/python/feast/embedded_go/type_map.py b/sdk/python/feast/embedded_go/type_map.py index 9b1ece7732..e70dc3be86 100644 --- a/sdk/python/feast/embedded_go/type_map.py +++ b/sdk/python/feast/embedded_go/type_map.py @@ -50,14 +50,14 @@ PrimitiveFeastType.BYTES: pa.binary(), PrimitiveFeastType.BOOL: pa.bool_(), PrimitiveFeastType.UNIX_TIMESTAMP: pa.timestamp("s"), - Array(base_type=PrimitiveFeastType.INT32): pa.list_(pa.int32()), - Array(base_type=PrimitiveFeastType.INT64): pa.list_(pa.int64()), - Array(base_type=PrimitiveFeastType.FLOAT32): pa.list_(pa.float32()), - Array(base_type=PrimitiveFeastType.FLOAT64): pa.list_(pa.float64()), - Array(base_type=PrimitiveFeastType.STRING): pa.list_(pa.string()), - Array(base_type=PrimitiveFeastType.BYTES): pa.list_(pa.binary()), - Array(base_type=PrimitiveFeastType.BOOL): pa.list_(pa.bool_()), - Array(base_type=PrimitiveFeastType.UNIX_TIMESTAMP): pa.list_(pa.timestamp("s")), + Array(PrimitiveFeastType.INT32): pa.list_(pa.int32()), + Array(PrimitiveFeastType.INT64): pa.list_(pa.int64()), + Array(PrimitiveFeastType.FLOAT32): pa.list_(pa.float32()), + Array(PrimitiveFeastType.FLOAT64): pa.list_(pa.float64()), + Array(PrimitiveFeastType.STRING): pa.list_(pa.string()), + Array(PrimitiveFeastType.BYTES): pa.list_(pa.binary()), + Array(PrimitiveFeastType.BOOL): pa.list_(pa.bool_()), + Array(PrimitiveFeastType.UNIX_TIMESTAMP): pa.list_(pa.timestamp("s")), } diff --git a/sdk/python/feast/entity.py b/sdk/python/feast/entity.py index 35dce18e1a..482c0be60a 100644 --- a/sdk/python/feast/entity.py +++ b/sdk/python/feast/entity.py @@ -37,8 +37,8 @@ class EntityModel(BaseModel): description: str = "" tags: Optional[Dict[str, str]] = None owner: str = "" - created_timestamp: Optional[datetime] - last_updated_timestamp: Optional[datetime] + created_timestamp: Optional[datetime] = None + last_updated_timestamp: Optional[datetime] = None class Config: arbitrary_types_allowed = True @@ -231,6 +231,8 @@ def to_pydantic_model(self) -> EntityModel: description=self.description, tags=self.tags if self.tags else None, owner=self.owner, + created_timestamp=self.created_timestamp, + last_updated_timestamp=self.last_updated_timestamp ) @staticmethod @@ -241,7 +243,7 @@ def entity_from_pydantic_model(pydantic_entity): Returns: An Entity. """ - return Entity( + entity = Entity( name=pydantic_entity.name, join_keys=[pydantic_entity.join_key], value_type=pydantic_entity.value_type, @@ -249,5 +251,7 @@ def entity_from_pydantic_model(pydantic_entity): tags=pydantic_entity.tags if pydantic_entity.tags else None, owner=pydantic_entity.owner, ) - # TO-DO: add the timestamps to the entity after it is has been created - + entity.created_timestamp = pydantic_entity.created_timestamp, + entity.last_updated_timestamp = pydantic_entity.last_updated_timestamp + return entity + diff --git a/sdk/python/feast/feature_service.py b/sdk/python/feast/feature_service.py index abe61a2e74..c3037a55da 100644 --- a/sdk/python/feast/feature_service.py +++ b/sdk/python/feast/feature_service.py @@ -1,9 +1,7 @@ from datetime import datetime -from json import dumps from typing import Dict, List, Optional, Union from google.protobuf.json_format import MessageToJson -from pydantic import BaseModel, root_validator from typeguard import typechecked from feast.base_feature_view import BaseFeatureView @@ -24,26 +22,6 @@ from feast.usage import log_exceptions -class FeatureServiceModel(BaseModel): - """ - Pydantic Model of a Feast FeatureService. - """ - - name: str - _features: List[Union[FeatureView, OnDemandFeatureView]] - feature_view_projections: List[FeatureViewProjection] - description: str = "" - tags: Optional[Dict[str, str]] = None - owner: str = "" - created_timestamp: Optional[datetime] = None - last_updated_timestamp: Optional[datetime] = None - logging_config: Optional[LoggingConfig] = None - - class Config: - arbitrary_types_allowed = True - extra = "allow" - - @typechecked class FeatureService: """ diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index d2f2e10c78..70f7d3dcb7 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -819,7 +819,7 @@ def apply( >>> fs.apply([driver_hourly_stats_view, driver]) # register entity and feature view """ # TODO: Add locking - if not isinstance(objects, list): + if not isinstance(objects, Iterable): objects = [objects] assert isinstance(objects, list) From 493dbd472187acca1fcd8cc7a1d993848a769e4b Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Wed, 5 Jul 2023 16:37:26 -0500 Subject: [PATCH 26/32] Second round of PR fixes. Move Pydantic model conversions out of main Feast code. --- sdk/python/feast/base_feature_view.py | 1 - sdk/python/feast/data_source.py | 99 ---------- sdk/python/feast/entity.py | 65 +------ .../pydantic_models/data_source_model.py | 174 ++++++++++++++++++ .../pydantic_models/entity_model.py | 75 ++++++++ .../pydantic_models/feature_view_model.py | 126 +++++++++++++ sdk/python/feast/feature_view.py | 129 +------------ sdk/python/feast/field.py | 6 +- .../spark_offline_store/spark_source.py | 87 +-------- sdk/python/tests/unit/test_pydantic_models.py | 170 ++++++++--------- 10 files changed, 458 insertions(+), 474 deletions(-) create mode 100644 sdk/python/feast/expediagroup/pydantic_models/data_source_model.py create mode 100644 sdk/python/feast/expediagroup/pydantic_models/entity_model.py create mode 100644 sdk/python/feast/expediagroup/pydantic_models/feature_view_model.py diff --git a/sdk/python/feast/base_feature_view.py b/sdk/python/feast/base_feature_view.py index f64b495adf..975537a394 100644 --- a/sdk/python/feast/base_feature_view.py +++ b/sdk/python/feast/base_feature_view.py @@ -13,7 +13,6 @@ # limitations under the License. from abc import ABC, abstractmethod from datetime import datetime -from json import dumps from typing import Dict, List, Optional, Type from google.protobuf.json_format import MessageToJson diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py index dbbcada53c..b7ce19aad9 100644 --- a/sdk/python/feast/data_source.py +++ b/sdk/python/feast/data_source.py @@ -16,13 +16,10 @@ import warnings from abc import ABC, abstractmethod from datetime import timedelta -from json import dumps from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple from google.protobuf.duration_pb2 import Duration from google.protobuf.json_format import MessageToJson -from pydantic import BaseModel -from pydantic import Field as PydanticField from typeguard import typechecked from feast import type_map @@ -167,26 +164,6 @@ def to_proto(self) -> DataSourceProto.KinesisOptions: } -class DataSourceModel(BaseModel): - """ - Pydantic Model of a Feast DataSource. - """ - - name: str - model_type: str = "" - timestamp_field: Optional[str] = "" - created_timestamp_column: Optional[str] = "" - field_mapping: Optional[Dict[str, str]] = None - description: Optional[str] = "" - tags: Optional[Dict[str, str]] = None - owner: Optional[str] = "" - date_partition_column: Optional[str] = "" - - class Config: - arbitrary_types_allowed = True - extra = "allow" - - @typechecked class DataSource(ABC): """ @@ -364,25 +341,6 @@ def get_table_query_string(self) -> str: """ raise NotImplementedError - def to_pydantic_model(self) -> DataSourceModel: - """ - Converts a DataSource object to its pydantic model representation. - - Returns: - A DataSourceModel. - """ - raise NotImplementedError - - @staticmethod - def datasource_from_pydantic_model(pydantic_datasource): - """ - Given a Pydantic DataSourceModel, create and return a DataSource. - - Returns: - A DataSource. - """ - raise NotImplementedError - @typechecked class KafkaSource(DataSource): @@ -542,23 +500,6 @@ def get_table_query_string(self) -> str: raise NotImplementedError -class RequestSourceModel(DataSourceModel): - """ - Pydantic Model of a Feast RequestSource. - """ - - name: str - model_type: str = PydanticField("RequestSource", const=True) - schema_: List[Field] = PydanticField(None, alias="schema") - description: Optional[str] = "" - tags: Optional[Dict[str, str]] = None - owner: Optional[str] = "" - - class Config: - arbitrary_types_allowed = True - extra = "allow" - - @typechecked class RequestSource(DataSource): """ @@ -665,46 +606,6 @@ def get_table_query_string(self) -> str: def source_datatype_to_feast_value_type() -> Callable[[str], ValueType]: raise NotImplementedError - def to_pydantic_model(self) -> RequestSourceModel: - """ - Converts a RequestSource object to its pydantic model representation. - - Returns: - A RequestSourceModel. - """ - return RequestSourceModel( - name=self.name, - schema=self.schema, - description=self.description, - tags=self.tags if self.tags else None, - owner=self.owner, - ) - - @staticmethod - def datasource_from_pydantic_model(pydantic_datasource): - """ - Given a Pydantic RequestSourceModel, create and return a RequestSource. - - Returns: - A RequestSource. - """ - params = { - "name": pydantic_datasource.name, - "description": pydantic_datasource.description, - "tags": pydantic_datasource.tags if pydantic_datasource.tags else None, - "owner": pydantic_datasource.owner, - } - params["schema"] = [ - Field( - name=sch.name, - dtype=sch.dtype, - description=sch.description, - tags=sch.tags, - ) - for sch in pydantic_datasource.schema_ - ] - return RequestSource(**params) - @typechecked class KinesisSource(DataSource): diff --git a/sdk/python/feast/entity.py b/sdk/python/feast/entity.py index 482c0be60a..30f04e9c06 100644 --- a/sdk/python/feast/entity.py +++ b/sdk/python/feast/entity.py @@ -12,11 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. from datetime import datetime -from json import dumps -from typing import Dict, List, Optional, Callable +from typing import Dict, List, Optional from google.protobuf.json_format import MessageToJson -from pydantic import BaseModel from typeguard import typechecked from feast.protos.feast.core.Entity_pb2 import Entity as EntityProto @@ -26,28 +24,6 @@ from feast.value_type import ValueType -class EntityModel(BaseModel): - """ - Pydantic Model of a Feast Entity. - """ - - name: str - join_key: str - value_type: Optional[ValueType] = None - description: str = "" - tags: Optional[Dict[str, str]] = None - owner: str = "" - created_timestamp: Optional[datetime] = None - last_updated_timestamp: Optional[datetime] = None - - class Config: - arbitrary_types_allowed = True - extra = "allow" - json_encoders: Dict[object, Callable] = { - ValueType: lambda v: int(dumps(v.value, default=str)) - } - - @typechecked class Entity: """ @@ -216,42 +192,3 @@ def to_proto(self) -> EntityProto: ) return EntityProto(spec=spec, meta=meta) - - def to_pydantic_model(self) -> EntityModel: - """ - Converts an entity object to its pydantic model representation. - - Returns: - An EntityModel. - """ - return EntityModel( - name=self.name, - join_key=self.join_key, - value_type=self.value_type, - description=self.description, - tags=self.tags if self.tags else None, - owner=self.owner, - created_timestamp=self.created_timestamp, - last_updated_timestamp=self.last_updated_timestamp - ) - - @staticmethod - def entity_from_pydantic_model(pydantic_entity): - """ - Given a Pydantic EntityModel, create and return an Entity. - - Returns: - An Entity. - """ - entity = Entity( - name=pydantic_entity.name, - join_keys=[pydantic_entity.join_key], - value_type=pydantic_entity.value_type, - description=pydantic_entity.description, - tags=pydantic_entity.tags if pydantic_entity.tags else None, - owner=pydantic_entity.owner, - ) - entity.created_timestamp = pydantic_entity.created_timestamp, - entity.last_updated_timestamp = pydantic_entity.last_updated_timestamp - return entity - diff --git a/sdk/python/feast/expediagroup/pydantic_models/data_source_model.py b/sdk/python/feast/expediagroup/pydantic_models/data_source_model.py new file mode 100644 index 0000000000..ff536a4714 --- /dev/null +++ b/sdk/python/feast/expediagroup/pydantic_models/data_source_model.py @@ -0,0 +1,174 @@ +""" +Pydantic Model for Data Source + +Copyright 2023 Expedia Group +Author: matcarlin@expediagroup.com +""" +from typing import Dict, List, Literal, Optional + +from pydantic import BaseModel +from pydantic import Field as PydanticField + +from feast.data_source import RequestSource +from feast.field import Field +from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import ( + SparkSource, +) + + +class DataSourceModel(BaseModel): + """ + Pydantic Model of a Feast DataSource. + """ + + def to_data_source(self): + """ + Given a Pydantic DataSourceModel, create and return a DataSource. + + Returns: + A DataSource. + """ + raise NotImplementedError + + @classmethod + def from_data_source(cls, data_source): + """ + Converts a DataSource object to its pydantic model representation. + + Returns: + A DataSourceModel. + """ + raise NotImplementedError + + +class RequestSourceModel(DataSourceModel): + """ + Pydantic Model of a Feast RequestSource. + """ + + name: str + model_type: Literal["RequestSourceModel"] = "RequestSourceModel" + schema_: List[Field] = PydanticField(None, alias="schema") + description: Optional[str] = "" + tags: Optional[Dict[str, str]] = None + owner: Optional[str] = "" + + class Config: + arbitrary_types_allowed = True + extra = "allow" + + def to_data_source(self): + """ + Given a Pydantic RequestSourceModel, create and return a RequestSource. + + Returns: + A RequestSource. + """ + params = { + "name": self.name, + "description": self.description, + "tags": self.tags if self.tags else None, + "owner": self.owner, + } + params["schema"] = [ + Field( + name=sch.name, + dtype=sch.dtype, + description=sch.description, + tags=sch.tags, + ) + for sch in self.schema_ + ] + return RequestSource(**params) + + @classmethod + def from_data_source(cls, data_source): + """ + Converts a RequestSource object to its pydantic model representation. + + Returns: + A RequestSourceModel. + """ + return cls( + name=data_source.name, + schema=data_source.schema, + description=data_source.description, + tags=data_source.tags if data_source.tags else None, + owner=data_source.owner, + ) + + +class SparkSourceModel(DataSourceModel): + """ + Pydantic Model of a Feast SparkSource. + """ + + name: str + model_type: Literal["SparkSourceModel"] = "SparkSourceModel" + table: Optional[str] = None + query: Optional[str] = None + path: Optional[str] = None + file_format: Optional[str] = None + created_timestamp_column: Optional[str] = None + field_mapping: Optional[Dict[str, str]] = None + description: Optional[str] = "" + tags: Optional[Dict[str, str]] = None + owner: Optional[str] = "" + timestamp_field: Optional[str] = None + + class Config: + arbitrary_types_allowed = True + extra = "allow" + + def to_data_source(self): + """ + Given a Pydantic SparkSourceModel, create and return a SparkSource. + + Returns: + A SparkSource. + """ + return SparkSource( + name=self.name, + table=self.table if hasattr(self, "table") else "", + query=self.query if hasattr(self, "query") else "", + path=self.path if hasattr(self, "path") else "", + file_format=self.file_format if hasattr(self, "file_format") else "", + created_timestamp_column=self.created_timestamp_column + if hasattr(self, "created_timestamp_column") + else "", + field_mapping=self.field_mapping if self.field_mapping else None, + description=self.description or "", + tags=self.tags if self.tags else None, + owner=self.owner or "", + timestamp_field=self.timestamp_field + if hasattr(self, "timestamp_field") + else "", + ) + + @classmethod + def from_data_source(cls, data_source): + """ + Converts a SparkSource object to its pydantic model representation. + + Returns: + A SparkSourceModel. + """ + return cls( + name=data_source.name, + table=data_source.table, + query=data_source.query, + path=data_source.path, + file_format=data_source.file_format, + created_timestamp_column=data_source.created_timestamp_column + if data_source.created_timestamp_column + else "", + field_mapping=data_source.field_mapping + if data_source.field_mapping + else None, + description=data_source.description if data_source.description else "", + tags=data_source.tags if data_source.tags else None, + owner=data_source.owner if data_source.owner else "", + timestamp_field=data_source.timestamp_field + if data_source.timestamp_field + else "", + ) diff --git a/sdk/python/feast/expediagroup/pydantic_models/entity_model.py b/sdk/python/feast/expediagroup/pydantic_models/entity_model.py new file mode 100644 index 0000000000..e46e65924d --- /dev/null +++ b/sdk/python/feast/expediagroup/pydantic_models/entity_model.py @@ -0,0 +1,75 @@ +""" +Pydantic Model for Entity + +Copyright 2023 Expedia Group +Author: matcarlin@expediagroup.com +""" + +from datetime import datetime +from json import dumps +from typing import Callable, Dict, Optional + +from pydantic import BaseModel + +from feast.entity import Entity +from feast.value_type import ValueType + + +class EntityModel(BaseModel): + """ + Pydantic Model of a Feast Entity. + """ + + name: str + join_key: str + value_type: Optional[ValueType] = None + description: str = "" + tags: Optional[Dict[str, str]] = None + owner: str = "" + created_timestamp: Optional[datetime] = None + last_updated_timestamp: Optional[datetime] = None + + class Config: + arbitrary_types_allowed = True + extra = "allow" + json_encoders: Dict[object, Callable] = { + ValueType: lambda v: int(dumps(v.value, default=str)) + } + + def to_entity(self): + """ + Given a Pydantic EntityModel, create and return an Entity. + + Returns: + An Entity. + """ + entity = Entity( + name=self.name, + join_keys=[self.join_key], + value_type=self.value_type, + description=self.description, + tags=self.tags if self.tags else None, + owner=self.owner, + ) + entity.created_timestamp = (self.created_timestamp,) + entity.last_updated_timestamp = self.last_updated_timestamp + return entity + + @classmethod + def from_entity(cls, entity): + """ + Converts an entity object to its pydantic model representation. + + Returns: + An EntityModel. + """ + return cls( + name=entity.name, + join_key=entity.join_key, + value_type=entity.value_type, + description=entity.description, + tags=entity.tags if entity.tags else None, + owner=entity.owner, + created_timestamp=entity.created_timestamp, + last_updated_timestamp=entity.last_updated_timestamp, + ) diff --git a/sdk/python/feast/expediagroup/pydantic_models/feature_view_model.py b/sdk/python/feast/expediagroup/pydantic_models/feature_view_model.py new file mode 100644 index 0000000000..c141b18ce0 --- /dev/null +++ b/sdk/python/feast/expediagroup/pydantic_models/feature_view_model.py @@ -0,0 +1,126 @@ +""" +Pydantic Model for Data Source + +Copyright 2023 Expedia Group +Author: matcarlin@expediagroup.com +""" +import sys +from datetime import timedelta +from json import dumps +from typing import Callable, Dict, List, Optional + +from pydantic import BaseModel + +from feast.data_source import DataSource +from feast.entity import Entity +from feast.expediagroup.pydantic_models.data_source_model import ( + DataSourceModel, + RequestSourceModel, + SparkSourceModel, +) +from feast.expediagroup.pydantic_models.entity_model import EntityModel +from feast.feature_view import FeatureView +from feast.field import Field +from feast.types import ComplexFeastType, PrimitiveFeastType + + +class FeatureViewModel(BaseModel): + """ + Pydantic Model of a Feast FeatureView. + """ + + name: str + original_entities: List[EntityModel] = [] + original_schema: Optional[List[Field]] = None + ttl: Optional[timedelta] + batch_source: DataSourceModel + stream_source: Optional[DataSourceModel] + online: bool = True + description: str = "" + tags: Optional[Dict[str, str]] = None + owner: str = "" + + class Config: + arbitrary_types_allowed = True + extra = "allow" + json_encoders: Dict[object, Callable] = { + Field: lambda v: int(dumps(v.value, default=str)), + DataSource: lambda v: v.to_pydantic_model(), + Entity: lambda v: v.to_pydantic_model(), + ComplexFeastType: lambda v: str(v), + PrimitiveFeastType: lambda v: str(v), + } + + def to_feature_view(self): + """ + Given a Pydantic FeatureViewModel, create and return a FeatureView. + + Returns: + A FeatureView. + """ + # Convert each of the sources if they exist + batch_source = self.batch_source.to_data_source() if self.batch_source else None + stream_source = ( + self.stream_source.to_data_source() if self.stream_source else None + ) + + # Mirror the stream/batch source conditions in the FeatureView + # constructor; one source is passed, either a stream source + # which contains a batch source inside it, or a batch source + # on its own. + source = stream_source if stream_source else batch_source + if stream_source: + source.batch_source = batch_source + + # Create the FeatureView + feature_view = FeatureView( + name=self.name, + source=source, + schema=self.original_schema, + entities=[entity.to_entity() for entity in self.original_entities], + ttl=self.ttl, + online=self.online, + description=self.description, + tags=self.tags if self.tags else None, + owner=self.owner, + ) + + return feature_view + + @classmethod + def from_feature_view(cls, feature_view): + """ + Converts a FeatureView object to its pydantic model representation. + + Returns: + A FeatureViewModel. + """ + batch_source = None + if feature_view.batch_source: + class_ = getattr( + sys.modules[__name__], + type(feature_view.batch_source).__name__ + "Model", + ) + batch_source = class_.from_data_source(feature_view.batch_source) + stream_source = None + if feature_view.stream_source: + class_ = getattr( + sys.modules[__name__], + type(feature_view.stream_source).__name__ + "Model", + ) + stream_source = class_.from_data_source(feature_view.stream_source) + return cls( + name=feature_view.name, + original_entities=[ + EntityModel.from_entity(entity) + for entity in feature_view.original_entities + ], + ttl=feature_view.ttl, + original_schema=feature_view.original_schema, + batch_source=batch_source, + stream_source=stream_source, + online=feature_view.online, + description=feature_view.description, + tags=feature_view.tags if feature_view.tags else None, + owner=feature_view.owner, + ) diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index fd4a81aab9..13b15a3b5e 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -12,33 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. import copy -import sys import warnings from datetime import datetime, timedelta -from json import dumps -from typing import Dict, List, Optional, Tuple, Type, Callable +from typing import Dict, List, Optional, Tuple, Type from google.protobuf.duration_pb2 import Duration -from pydantic import BaseModel -from pydantic import Field as PydanticField from typeguard import typechecked from feast import utils from feast.base_feature_view import BaseFeatureView -from feast.data_source import ( - DataSource, - DataSourceModel, - KafkaSource, - KinesisSource, - PushSource, - RequestSource, -) -from feast.entity import Entity, EntityModel +from feast.data_source import DataSource, KafkaSource, KinesisSource, PushSource +from feast.entity import Entity from feast.feature_view_projection import FeatureViewProjection from feast.field import Field -from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import ( - SparkSource, -) from feast.protos.feast.core.FeatureView_pb2 import FeatureView as FeatureViewProto from feast.protos.feast.core.FeatureView_pb2 import ( FeatureViewMeta as FeatureViewMetaProto, @@ -49,7 +35,7 @@ from feast.protos.feast.core.FeatureView_pb2 import ( MaterializationInterval as MaterializationIntervalProto, ) -from feast.types import ComplexFeastType, FeastType, PrimitiveFeastType, from_value_type +from feast.types import from_value_type from feast.usage import log_exceptions from feast.value_type import ValueType @@ -65,34 +51,6 @@ ) -class FeatureViewModel(BaseModel): - """ - Pydantic Model of a Feast FeatureView. - """ - - name: str - original_entities: List[EntityModel] = [] - original_schema: Optional[List[Field]] = None - ttl: Optional[timedelta] - batch_source: DataSourceModel - stream_source: Optional[DataSourceModel] - online: bool = True - description: str = "" - tags: Optional[Dict[str, str]] = None - owner: str = "" - - class Config: - arbitrary_types_allowed = True - extra = "allow" - json_encoders: Dict[object, Callable] = { - Field: lambda v: int(dumps(v.value, default=str)), - DataSource: lambda v: v.to_pydantic_model(), - Entity: lambda v: v.to_pydantic_model(), - ComplexFeastType: lambda v: str(v), - PrimitiveFeastType: lambda v: str(v) - } - - @typechecked class FeatureView(BaseFeatureView): """ @@ -510,82 +468,3 @@ def most_recent_end_time(self) -> Optional[datetime]: if len(self.materialization_intervals) == 0: return None return max([interval[1] for interval in self.materialization_intervals]) - - def to_pydantic_model(self) -> FeatureViewModel: - """ - Converts a FeatureView object to its pydantic model representation. - - Returns: - A FeatureViewModel. - """ - return FeatureViewModel( - name=self.name, - original_entities=[ - entity.to_pydantic_model() for entity in self.original_entities - ], - ttl=self.ttl, - original_schema=self.original_schema, - batch_source=self.batch_source.to_pydantic_model() - if self.batch_source - else self.batch_source, - stream_source=self.stream_source.to_pydantic_model() - if self.stream_source - else self.stream_source, - online=self.online, - description=self.description, - tags=self.tags if self.tags else None, - owner=self.owner, - ) - - @staticmethod - def featureview_from_pydantic_model(pydantic_featureview): - """ - Given a Pydantic FeatureViewModel, create and return a FeatureView. - - Returns: - A FeatureView. - """ - # Convert each of the sources if they exist - batch_source = None - if pydantic_featureview.batch_source: - class_ = getattr( - sys.modules[__name__], pydantic_featureview.batch_source.model_type - ) - batch_source = class_.datasource_from_pydantic_model( - pydantic_featureview.batch_source - ) - stream_source = None - if pydantic_featureview.stream_source: - class_ = getattr( - sys.modules[__name__], pydantic_featureview.stream_source.model_type - ) - stream_source = class_.datasource_from_pydantic_model( - pydantic_featureview.stream_source - ) - - # Mirror the stream/batch source conditions in the FeatureView - # constructor; one source is passed, either a stream source - # which contains a batch source inside it, or a batch source - # on its own. - source = stream_source if stream_source else batch_source - if stream_source: - source.batch_source = batch_source - - # Create the FeatureView - feature_view = FeatureView( - name=pydantic_featureview.name, - source=source, - schema=pydantic_featureview.original_schema, - entities=[ - Entity.entity_from_pydantic_model(entity) - for entity in pydantic_featureview.original_entities - ], - ttl=pydantic_featureview.ttl, - online=pydantic_featureview.online, - description=pydantic_featureview.description, - tags=pydantic_featureview.tags if pydantic_featureview.tags else None, - owner=pydantic_featureview.owner, - ) - - - return feature_view diff --git a/sdk/python/feast/field.py b/sdk/python/feast/field.py index e5f984dfd1..c019e4e258 100644 --- a/sdk/python/feast/field.py +++ b/sdk/python/feast/field.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, Optional, Callable +from typing import Callable, Dict, Optional from pydantic import BaseModel, validator from typeguard import typechecked @@ -51,7 +51,7 @@ class Config: extra = "allow" json_encoders: Dict[object, Callable] = { ComplexFeastType: lambda v: str(v), - PrimitiveFeastType: lambda v: str(v) + PrimitiveFeastType: lambda v: str(v), } @validator("dtype", pre=True, always=True) @@ -64,7 +64,7 @@ def dtype_is_feasttype_or_string_feasttype(cls, v): TO-DO: Investigate whether FeastType can be refactored to a json compatible format. """ - if not isinstance(v, FeastType): # type: ignore + if not isinstance(v, FeastType): # type: ignore if isinstance(v, str): return from_string(v) else: diff --git a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py index 667a7bdf87..db4e744c24 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py @@ -3,15 +3,12 @@ import uuid import warnings from enum import Enum -from json import dumps from typing import Any, Callable, Dict, Iterable, Optional, Tuple -from pydantic import BaseModel -from pydantic import Field as PydanticField from pyspark.sql import SparkSession from feast import flags_helper -from feast.data_source import DataSource, DataSourceModel +from feast.data_source import DataSource from feast.errors import DataSourceNoNameException from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto from feast.protos.feast.core.SavedDataset_pb2 import ( @@ -33,29 +30,6 @@ class SparkSourceFormat(Enum): avro = "avro" -class SparkSourceModel(DataSourceModel): - """ - Pydantic Model of a Feast SparkSource. - """ - - name: str - model_type: str = PydanticField("SparkSource", const=True) - table: Optional[str] = None - query: Optional[str] = None - path: Optional[str] = None - file_format: Optional[str] = None - created_timestamp_column: Optional[str] = None - field_mapping: Optional[Dict[str, str]] = None - description: Optional[str] = "" - tags: Optional[Dict[str, str]] = None - owner: Optional[str] = "" - timestamp_field: Optional[str] = None - - class Config: - arbitrary_types_allowed = True - extra = "allow" - - class SparkSource(DataSource): def __init__( self, @@ -211,65 +185,6 @@ def get_table_query_string(self) -> str: return f"`{tmp_table_name}`" - def to_pydantic_model(self) -> SparkSourceModel: - """ - Converts a SparkSource object to its pydantic model representation. - - Returns: - A SparkSourceModel. - """ - return SparkSourceModel( - name=self.name, - table=self.table, - query=self.query, - path=self.path, - file_format=self.file_format, - created_timestamp_column=self.created_timestamp_column - if self.created_timestamp_column - else "", - field_mapping=self.field_mapping if self.field_mapping else None, - description=self.description if self.description else "", - tags=self.tags if self.tags else None, - owner=self.owner if self.owner else "", - timestamp_field=self.timestamp_field if self.timestamp_field else "", - ) - - @staticmethod - def datasource_from_pydantic_model(pydantic_datasource): - """ - Given a Pydantic SparkSourceModel, create and return a SparkSource. - - Returns: - A SparkSource. - """ - return SparkSource( - name=pydantic_datasource.name, - table=pydantic_datasource.table - if hasattr(pydantic_datasource, "table") - else "", - query=pydantic_datasource.query - if hasattr(pydantic_datasource, "query") - else "", - path=pydantic_datasource.path - if hasattr(pydantic_datasource, "path") - else "", - file_format=pydantic_datasource.file_format - if hasattr(pydantic_datasource, "file_format") - else "", - created_timestamp_column=pydantic_datasource.created_timestamp_column - if hasattr(pydantic_datasource, "created_timestamp_column") - else "", - field_mapping=pydantic_datasource.field_mapping - if pydantic_datasource.field_mapping - else None, - description=pydantic_datasource.description or "", - tags=pydantic_datasource.tags if pydantic_datasource.tags else None, - owner=pydantic_datasource.owner or "", - timestamp_field=pydantic_datasource.timestamp_field - if hasattr(pydantic_datasource, "timestamp_field") - else "", - ) - class SparkOptions: allowed_formats = [format.value for format in SparkSourceFormat] diff --git a/sdk/python/tests/unit/test_pydantic_models.py b/sdk/python/tests/unit/test_pydantic_models.py index 5f290e86af..2b43dead9c 100644 --- a/sdk/python/tests/unit/test_pydantic_models.py +++ b/sdk/python/tests/unit/test_pydantic_models.py @@ -11,98 +11,78 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import assertpy -import pytest -from pydantic.error_wrappers import ValidationError - -from feast.data_source import ( - DataSource, - DataSourceModel, - RequestSource, +from typing import List, Union + +from pydantic import BaseModel +from pydantic import Field as PydanticField +from typing_extensions import Annotated + +from feast.data_source import RequestSource +from feast.entity import Entity +from feast.expediagroup.pydantic_models.data_source_model import ( RequestSourceModel, + SparkSourceModel, ) -from feast.entity import Entity, EntityModel -from feast.feature_view import FeatureView, FeatureViewModel +from feast.expediagroup.pydantic_models.entity_model import EntityModel +from feast.expediagroup.pydantic_models.feature_view_model import FeatureViewModel +from feast.feature_view import FeatureView from feast.field import Field from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import ( SparkSource, - SparkSourceModel, ) -from feast.types import Array, Bool, Float32, Int64 - - -def test_datasourcemodel_to_sparksource(): - spark_source_model = DataSourceModel( - name="string", - model_type="SparkSource", - table="table1", - query="", - path="", - file_format="", - timestamp_field="", - created_timestamp_column="", - description="", - owner="", - date_partition_column="", - ) - spark_source = SparkSource.datasource_from_pydantic_model(spark_source_model) - spark_source_model_b = spark_source.to_pydantic_model() - assert spark_source_model == spark_source_model_b - - with pytest.raises(ValueError): - # No file_format specified - spark_source_model = DataSourceModel( - name="string", - model_type="SparkSource", - path="path1", - timestamp_field="", - created_timestamp_column="", - description="", - owner="", - date_partition_column="", - ) - spark_source = SparkSource.datasource_from_pydantic_model(spark_source_model) - - spark_source_model = DataSourceModel( - name="string", - model_type="SparkSource", - path="path1", - file_format="json", - table="", - query="", - timestamp_field="", - created_timestamp_column="", - description="", - owner="", - date_partition_column="", - ) - spark_source = SparkSource.datasource_from_pydantic_model(spark_source_model) - spark_source_model_b = spark_source.to_pydantic_model() - assert spark_source_model == spark_source_model_b +from feast.types import Bool, Float32 -def test_datasourcemodel_to_requestsource(): - with pytest.raises(ValidationError): - bad_schema = [ - Field(name="f1", dtype="Array(Float323)"), - Field(name="f2", dtype="Bool"), - ] - - schema = [ - Field(name="f1", dtype="Array(Float32)"), - Field(name="f2", dtype="Bool"), +def test_datasource_child_deserialization(): + # https://blog.devgenius.io/deserialize-child-classes-with-pydantic-that-gonna-work-784230e1cf83 + # This lets us discriminate child classes of DataSourceModel with type hints. + SourceTypes = Annotated[ + Union[RequestSourceModel, SparkSourceModel], + PydanticField(discriminator="model_type"), ] - request_source_model = RequestSourceModel( - name="source", - model_type="RequestSource", - schema=schema, - description="desc", - tags=None, - owner="feast", - ) - request_source = RequestSource.datasource_from_pydantic_model(request_source_model) - request_source_model_b = request_source.to_pydantic_model() - assert request_source_model == request_source_model_b + + class DataSourcesByWire(BaseModel): + source_models: List[SourceTypes] = [] + + class Config: + arbitrary_types_allowed = True + extra = "allow" + + spark_source_model_json = { + "name": "string", + "model_type": "SparkSourceModel", + "table": "table1", + "query": "", + "path": "", + "file_format": "", + "timestamp_field": "", + "created_timestamp_column": "", + "description": "", + "owner": "", + "date_partition_column": "", + } + + spark_source_model = SparkSourceModel(**spark_source_model_json) + + request_source_model_json = { + "name": "source", + "model_type": "RequestSourceModel", + "schema": [{"name": "string", "dtype": "Int32", "description": "", "tags": {}}], + "description": "desc", + "tags": {}, + "owner": "feast", + } + + request_source_model = RequestSourceModel(**request_source_model_json) + + data_dict = {"source_models": [spark_source_model, request_source_model]} + + sources = DataSourcesByWire(**data_dict) + + assert type(sources.source_models[0]).__name__ == "SparkSourceModel" + assert sources.source_models[0] == spark_source_model + assert type(sources.source_models[1]).__name__ == "RequestSourceModel" + assert sources.source_models[1] == request_source_model def test_idempotent_entity_conversion(): @@ -111,8 +91,8 @@ def test_idempotent_entity_conversion(): description="My entity", tags={"key1": "val1", "key2": "val2"}, ) - entity_model = entity.to_pydantic_model() - entity_b = Entity.entity_from_pydantic_model(entity_model) + entity_model = EntityModel.from_entity(entity) + entity_b = entity_model.to_entity() assert entity == entity_b @@ -128,10 +108,8 @@ def test_idempotent_requestsource_conversion(): tags={}, owner="feast", ) - request_source_model = request_source.to_pydantic_model() - request_source_b = RequestSource.datasource_from_pydantic_model( - request_source_model - ) + request_source_model = RequestSourceModel.from_data_source(request_source) + request_source_b = request_source_model.to_data_source() assert request_source == request_source_b @@ -143,8 +121,8 @@ def test_idempotent_sparksource_conversion(): tags={}, owner="feast", ) - spark_source_model = spark_source.to_pydantic_model() - spark_source_b = SparkSource.datasource_from_pydantic_model(spark_source_model) + spark_source_model = SparkSourceModel.from_data_source(spark_source) + spark_source_b = spark_source_model.to_data_source() assert spark_source == spark_source_b @@ -170,13 +148,13 @@ def test_idempotent_featureview_conversion(): ], source=request_source, ) - feature_view_model = feature_view.to_pydantic_model() - feature_view_b = FeatureView.featureview_from_pydantic_model(feature_view_model) + feature_view_model = FeatureViewModel.from_feature_view(feature_view) + feature_view_b = feature_view_model.to_feature_view() assert feature_view == feature_view_b spark_source = SparkSource( name="sparky_sparky_boom_man", - path=f"/data/driver_hourly_stats", + path="/data/driver_hourly_stats", file_format="parquet", timestamp_field="event_timestamp", created_timestamp_column="created", @@ -190,6 +168,6 @@ def test_idempotent_featureview_conversion(): ], source=spark_source, ) - feature_view_model = feature_view.to_pydantic_model() - feature_view_b = FeatureView.featureview_from_pydantic_model(feature_view_model) + feature_view_model = FeatureViewModel.from_feature_view(feature_view) + feature_view_b = feature_view_model.to_feature_view() assert feature_view == feature_view_b From 59e1f4ec63c69c510af2171b29026e13e69e05a1 Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Thu, 6 Jul 2023 07:20:25 -0500 Subject: [PATCH 27/32] Restore github workflow publish.yml --- .github/workflows/publish.yml | 182 ++++++++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 .github/workflows/publish.yml diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000000..11f08bf2e5 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,182 @@ +name: publish + +on: + push: + tags: + - 'v*.*.*' + +jobs: + get-version: + if: github.repository == 'feast-dev/feast' + runs-on: ubuntu-latest + outputs: + release_version: ${{ steps.get_release_version.outputs.release_version }} + version_without_prefix: ${{ steps.get_release_version_without_prefix.outputs.version_without_prefix }} + highest_semver_tag: ${{ steps.get_highest_semver.outputs.highest_semver_tag }} + steps: + - uses: actions/checkout@v2 + - name: Get release version + id: get_release_version + run: echo ::set-output name=release_version::${GITHUB_REF#refs/*/} + - name: Get release version without prefix + id: get_release_version_without_prefix + env: + RELEASE_VERSION: ${{ steps.get_release_version.outputs.release_version }} + run: | + echo ::set-output name=version_without_prefix::${RELEASE_VERSION:1} + - name: Get highest semver + id: get_highest_semver + env: + RELEASE_VERSION: ${{ steps.get_release_version.outputs.release_version }} + run: | + source infra/scripts/setup-common-functions.sh + SEMVER_REGEX='^v[0-9]+\.[0-9]+\.[0-9]+(-([0-9A-Za-z-]+(\.[0-9A-Za-z-]+)*))?$' + if echo "${RELEASE_VERSION}" | grep -P "$SEMVER_REGEX" &>/dev/null ; then + echo ::set-output name=highest_semver_tag::$(get_tag_release -m) + fi + - name: Check output + env: + RELEASE_VERSION: ${{ steps.get_release_version.outputs.release_version }} + VERSION_WITHOUT_PREFIX: ${{ steps.get_release_version_without_prefix.outputs.version_without_prefix }} + HIGHEST_SEMVER_TAG: ${{ steps.get_highest_semver.outputs.highest_semver_tag }} + run: | + echo $RELEASE_VERSION + echo $VERSION_WITHOUT_PREFIX + echo $HIGHEST_SEMVER_TAG + + build-publish-docker-images: + runs-on: ubuntu-latest + needs: [get-version, publish-python-sdk] + strategy: + matrix: + component: [feature-server, feature-server-python-aws, feature-server-java, feature-transformation-server] + env: + MAVEN_CACHE: gs://feast-templocation-kf-feast/.m2.2020-08-19.tar + REGISTRY: feastdev + steps: + - uses: actions/checkout@v2 + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + - name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Authenticate to Google Cloud + uses: 'google-github-actions/auth@v1' + with: + credentials_json: '${{ secrets.GCP_SA_KEY }}' + - name: Set up gcloud SDK + uses: google-github-actions/setup-gcloud@v1 + with: + project_id: ${{ secrets.GCP_PROJECT_ID }} + - name: Use gcloud CLI + run: gcloud info + - run: gcloud auth configure-docker --quiet + - name: Build image + run: | + make build-${{ matrix.component }}-docker REGISTRY=${REGISTRY} VERSION=${VERSION_WITHOUT_PREFIX} + env: + RELEASE_VERSION: ${{ needs.get-version.outputs.release_version }} + VERSION_WITHOUT_PREFIX: ${{ needs.get-version.outputs.version_without_prefix }} + HIGHEST_SEMVER_TAG: ${{ needs.get-version.outputs.highest_semver_tag }} + - name: Push versioned images + env: + RELEASE_VERSION: ${{ needs.get-version.outputs.release_version }} + VERSION_WITHOUT_PREFIX: ${{ needs.get-version.outputs.version_without_prefix }} + HIGHEST_SEMVER_TAG: ${{ needs.get-version.outputs.highest_semver_tag }} + run: | + make push-${{ matrix.component }}-docker REGISTRY=${REGISTRY} VERSION=${VERSION_WITHOUT_PREFIX} + + echo "Only push to latest tag if tag is the highest semver version $HIGHEST_SEMVER_TAG" + if [ "${VERSION_WITHOUT_PREFIX}" = "${HIGHEST_SEMVER_TAG:1}" ] + then + docker tag feastdev/${{ matrix.component }}:${VERSION_WITHOUT_PREFIX} feastdev/${{ matrix.component }}:latest + docker push feastdev/${{ matrix.component }}:latest + fi + + publish-helm-charts: + if: github.repository == 'feast-dev/feast' + runs-on: ubuntu-latest + needs: get-version + env: + HELM_VERSION: v3.8.0 + VERSION_WITHOUT_PREFIX: ${{ needs.get-version.outputs.version_without_prefix }} + steps: + - uses: actions/checkout@v2 + - name: Authenticate to Google Cloud + uses: 'google-github-actions/auth@v1' + with: + credentials_json: '${{ secrets.GCP_SA_KEY }}' + - name: Set up gcloud SDK + uses: google-github-actions/setup-gcloud@v1 + with: + project_id: ${{ secrets.GCP_PROJECT_ID }} + - run: gcloud auth configure-docker --quiet + - name: Remove previous Helm + run: sudo rm -rf $(which helm) + - name: Install Helm + run: ./infra/scripts/helm/install-helm.sh + - name: Validate Helm chart prior to publishing + run: ./infra/scripts/helm/validate-helm-chart-publish.sh + - name: Validate all version consistency + run: ./infra/scripts/helm/validate-helm-chart-versions.sh $VERSION_WITHOUT_PREFIX + - name: Publish Helm charts + run: ./infra/scripts/helm/push-helm-charts.sh $VERSION_WITHOUT_PREFIX + + build_wheels: + uses: ./.github/workflows/build_wheels.yml + + publish-python-sdk: + if: github.repository == 'feast-dev/feast' + runs-on: ubuntu-latest + needs: [build_wheels] + steps: + - uses: actions/download-artifact@v2 + with: + name: wheels + path: dist + - uses: pypa/gh-action-pypi-publish@v1.4.2 + with: + user: __token__ + password: ${{ secrets.PYPI_PASSWORD }} + + publish-java-sdk: + if: github.repository == 'feast-dev/feast' + container: maven:3.6-jdk-11 + runs-on: ubuntu-latest + needs: get-version + steps: + - uses: actions/checkout@v2 + with: + submodules: 'true' + - name: Set up JDK 11 + uses: actions/setup-java@v1 + with: + java-version: '11' + java-package: jdk + architecture: x64 + - uses: actions/setup-python@v2 + with: + python-version: '3.7' + architecture: 'x64' + - uses: actions/cache@v2 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-it-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-it-maven- + - name: Publish java sdk + env: + VERSION_WITHOUT_PREFIX: ${{ needs.get-version.outputs.version_without_prefix }} + GPG_PUBLIC_KEY: ${{ secrets.GPG_PUBLIC_KEY }} + GPG_PRIVATE_KEY: ${{ secrets.GPG_PRIVATE_KEY }} + MAVEN_SETTINGS: ${{ secrets.MAVEN_SETTINGS }} + run: | + echo -n "$GPG_PUBLIC_KEY" > /root/public-key + echo -n "$GPG_PRIVATE_KEY" > /root/private-key + mkdir -p /root/.m2/ + echo -n "$MAVEN_SETTINGS" > /root/.m2/settings.xml + infra/scripts/publish-java-sdk.sh --revision ${VERSION_WITHOUT_PREFIX} --gpg-key-import-dir /root From e3fe369b837150b426231a4b4faf3b49dcfa7695 Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Thu, 6 Jul 2023 08:15:52 -0500 Subject: [PATCH 28/32] Fix entity comparison in FeatureView --- sdk/python/feast/feature_view.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index 13b15a3b5e..6cae903eea 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -260,6 +260,9 @@ def __eq__(self, other): if isinstance(self.original_entities, List) and isinstance( other.original_entities, List ): + if len(entity1) != len(entity2): + return False + for entity1, entity2 in zip( self.original_entities, other.original_entities ): From 6b9be1754a1bc522485b421b9a164609c60fc7f3 Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Thu, 6 Jul 2023 10:10:01 -0500 Subject: [PATCH 29/32] Fix type checking in Pydantic models --- .../pydantic_models/data_source_model.py | 11 ++++++++++- .../pydantic_models/feature_view_model.py | 16 +++++++++++++--- sdk/python/feast/feature_view.py | 3 ++- sdk/python/feast/stream_feature_view.py | 1 + sdk/python/tests/unit/test_pydantic_models.py | 14 +++----------- 5 files changed, 29 insertions(+), 16 deletions(-) diff --git a/sdk/python/feast/expediagroup/pydantic_models/data_source_model.py b/sdk/python/feast/expediagroup/pydantic_models/data_source_model.py index ff536a4714..83419af6fb 100644 --- a/sdk/python/feast/expediagroup/pydantic_models/data_source_model.py +++ b/sdk/python/feast/expediagroup/pydantic_models/data_source_model.py @@ -4,10 +4,11 @@ Copyright 2023 Expedia Group Author: matcarlin@expediagroup.com """ -from typing import Dict, List, Literal, Optional +from typing import Dict, List, Literal, Optional, Union from pydantic import BaseModel from pydantic import Field as PydanticField +from typing_extensions import Annotated from feast.data_source import RequestSource from feast.field import Field @@ -172,3 +173,11 @@ def from_data_source(cls, data_source): if data_source.timestamp_field else "", ) + + +# https://blog.devgenius.io/deserialize-child-classes-with-pydantic-that-gonna-work-784230e1cf83 +# This lets us discriminate child classes of DataSourceModel with type hints. +AnyDataSource = Annotated[ + Union[RequestSourceModel, SparkSourceModel], + PydanticField(discriminator="model_type"), +] diff --git a/sdk/python/feast/expediagroup/pydantic_models/feature_view_model.py b/sdk/python/feast/expediagroup/pydantic_models/feature_view_model.py index c141b18ce0..f9b99e4b3e 100644 --- a/sdk/python/feast/expediagroup/pydantic_models/feature_view_model.py +++ b/sdk/python/feast/expediagroup/pydantic_models/feature_view_model.py @@ -14,7 +14,7 @@ from feast.data_source import DataSource from feast.entity import Entity from feast.expediagroup.pydantic_models.data_source_model import ( - DataSourceModel, + AnyDataSource, RequestSourceModel, SparkSourceModel, ) @@ -23,6 +23,8 @@ from feast.field import Field from feast.types import ComplexFeastType, PrimitiveFeastType +SUPPORTED_DATA_SOURCES = [RequestSourceModel, SparkSourceModel] + class FeatureViewModel(BaseModel): """ @@ -33,8 +35,8 @@ class FeatureViewModel(BaseModel): original_entities: List[EntityModel] = [] original_schema: Optional[List[Field]] = None ttl: Optional[timedelta] - batch_source: DataSourceModel - stream_source: Optional[DataSourceModel] + batch_source: AnyDataSource + stream_source: Optional[AnyDataSource] online: bool = True description: str = "" tags: Optional[Dict[str, str]] = None @@ -101,6 +103,10 @@ def from_feature_view(cls, feature_view): sys.modules[__name__], type(feature_view.batch_source).__name__ + "Model", ) + if class_ not in SUPPORTED_DATA_SOURCES: + raise ValueError( + "Batch source type is not a supported data source type." + ) batch_source = class_.from_data_source(feature_view.batch_source) stream_source = None if feature_view.stream_source: @@ -108,6 +114,10 @@ def from_feature_view(cls, feature_view): sys.modules[__name__], type(feature_view.stream_source).__name__ + "Model", ) + if class_ not in SUPPORTED_DATA_SOURCES: + raise ValueError( + "Stream source type is not a supported data source type." + ) stream_source = class_.from_data_source(feature_view.stream_source) return cls( name=feature_view.name, diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index 6cae903eea..6797aade8c 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -260,7 +260,7 @@ def __eq__(self, other): if isinstance(self.original_entities, List) and isinstance( other.original_entities, List ): - if len(entity1) != len(entity2): + if len(self.original_entities) != len(other.original_entities): return False for entity1, entity2 in zip( @@ -420,6 +420,7 @@ def from_proto(cls, feature_view_proto: FeatureViewProto): # This avoids the deprecation warning. feature_view.entities = list(feature_view_proto.spec.entities) + feature_view.original_entities = feature_view_proto.spec.entities # Instead of passing in a schema, we set the features and entity columns. feature_view.features = [ diff --git a/sdk/python/feast/stream_feature_view.py b/sdk/python/feast/stream_feature_view.py index d3a2164788..17d032ec04 100644 --- a/sdk/python/feast/stream_feature_view.py +++ b/sdk/python/feast/stream_feature_view.py @@ -251,6 +251,7 @@ def from_proto(cls, sfv_proto): stream_feature_view.stream_source = stream_source stream_feature_view.entities = list(sfv_proto.spec.entities) + stream_feature_view.original_entities = sfv_proto.spec.entities stream_feature_view.features = [ Field.from_proto(field_proto) for field_proto in sfv_proto.spec.features diff --git a/sdk/python/tests/unit/test_pydantic_models.py b/sdk/python/tests/unit/test_pydantic_models.py index 2b43dead9c..8b84dd3d5a 100644 --- a/sdk/python/tests/unit/test_pydantic_models.py +++ b/sdk/python/tests/unit/test_pydantic_models.py @@ -11,15 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import List, Union +from typing import List from pydantic import BaseModel -from pydantic import Field as PydanticField -from typing_extensions import Annotated from feast.data_source import RequestSource from feast.entity import Entity from feast.expediagroup.pydantic_models.data_source_model import ( + AnyDataSource, RequestSourceModel, SparkSourceModel, ) @@ -34,15 +33,8 @@ def test_datasource_child_deserialization(): - # https://blog.devgenius.io/deserialize-child-classes-with-pydantic-that-gonna-work-784230e1cf83 - # This lets us discriminate child classes of DataSourceModel with type hints. - SourceTypes = Annotated[ - Union[RequestSourceModel, SparkSourceModel], - PydanticField(discriminator="model_type"), - ] - class DataSourcesByWire(BaseModel): - source_models: List[SourceTypes] = [] + source_models: List[AnyDataSource] = [] class Config: arbitrary_types_allowed = True From b709ce176e3a9481db06d61c0510ce4794dec472 Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Thu, 6 Jul 2023 10:42:44 -0500 Subject: [PATCH 30/32] Update pydantic model type checking to avoid Subscripted generics error --- sdk/python/feast/field.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sdk/python/feast/field.py b/sdk/python/feast/field.py index c019e4e258..4b1b7ba247 100644 --- a/sdk/python/feast/field.py +++ b/sdk/python/feast/field.py @@ -15,7 +15,7 @@ from typing import Callable, Dict, Optional from pydantic import BaseModel, validator -from typeguard import typechecked +from typeguard import check_type, typechecked from feast.feature import Feature from feast.protos.feast.core.Feature_pb2 import FeatureSpecV2 as FieldProto @@ -64,10 +64,13 @@ def dtype_is_feasttype_or_string_feasttype(cls, v): TO-DO: Investigate whether FeastType can be refactored to a json compatible format. """ - if not isinstance(v, FeastType): # type: ignore - if isinstance(v, str): + try: + check_type('v', v, FeastType) # type: ignore + except TypeError: + try: + check_type('v', v, str) return from_string(v) - else: + except TypeError: raise TypeError("dtype must be of type FeastType") return v From d0866d1b31205b4ebd06dbd95e1809d69c19f5ac Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Thu, 6 Jul 2023 10:48:21 -0500 Subject: [PATCH 31/32] Small lint fix --- sdk/python/feast/field.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/python/feast/field.py b/sdk/python/feast/field.py index 4b1b7ba247..b2f6a5d250 100644 --- a/sdk/python/feast/field.py +++ b/sdk/python/feast/field.py @@ -65,10 +65,10 @@ def dtype_is_feasttype_or_string_feasttype(cls, v): format. """ try: - check_type('v', v, FeastType) # type: ignore + check_type("v", v, FeastType) # type: ignore except TypeError: try: - check_type('v', v, str) + check_type("v", v, str) return from_string(v) except TypeError: raise TypeError("dtype must be of type FeastType") From 1d622e766c506215826c278c2261793a82bfb7ab Mon Sep 17 00:00:00 2001 From: Matt Carlin Date: Thu, 6 Jul 2023 11:09:25 -0500 Subject: [PATCH 32/32] Restory github workflow release.yml --- .github/workflows/release.yml | 157 ++++++++++++++++++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000000..da16c5f8f1 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,157 @@ +name: release + +on: + workflow_dispatch: + inputs: + dry_run: + description: 'Dry Run' + required: true + default: true + type: boolean + token: + description: 'Personal Access Token' + required: true + default: "" + type: string + publish_ui: + description: 'Publish to NPM?' + required: true + default: true + type: boolean + +jobs: + + get_dry_release_versions: + runs-on: ubuntu-latest + env: + GITHUB_TOKEN: ${{ github.event.inputs.token }} + outputs: + current_version: ${{ steps.get_versions.outputs.current_version }} + next_version: ${{ steps.get_versions.outputs.next_version }} + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + persist-credentials: false + - name: Setup Node.js + uses: actions/setup-node@v2 + with: + node-version: '18.x' + registry-url: 'https://registry.npmjs.org' + - name: Release (Dry Run) + id: get_versions + run: | + CURRENT_VERSION=$(npx -p @semantic-release/changelog -p @semantic-release/git -p @semantic-release/exec -p semantic-release semantic-release --dry-run | grep "associated with version " | sed -E 's/.* version//' | sed -E 's/ on.*//') + NEXT_VERSION=$(npx -p @semantic-release/changelog -p @semantic-release/git -p @semantic-release/exec -p semantic-release semantic-release --dry-run | grep 'The next release version is' | sed -E 's/.* ([[:digit:].]+)$/\1/') + echo ::set-output name=current_version::$CURRENT_VERSION + echo ::set-output name=next_version::$NEXT_VERSION + echo "Current version is ${CURRENT_VERSION}" + echo "Next version is ${NEXT_VERSION}" + + validate_version_bumps: + if: github.repository == 'feast-dev/feast' + needs: get_dry_release_versions + runs-on: ubuntu-latest + env: + # This publish is working using an NPM automation token to bypass 2FA + NPM_TOKEN: ${{ secrets.NPM_TOKEN }} + HELM_VERSION: v3.8.0 + CURRENT_VERSION: ${{ needs.get_dry_release_versions.outputs.current_version }} + NEXT_VERSION: ${{ needs.get_dry_release_versions.outputs.next_version }} + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-node@v2 + with: + node-version: '18.x' + registry-url: 'https://registry.npmjs.org' + - name: Bump file versions + run: python ./infra/scripts/release/bump_file_versions.py ${CURRENT_VERSION} ${NEXT_VERSION} + - name: Install yarn dependencies + working-directory: ./ui + run: yarn install + - name: Build yarn rollup + working-directory: ./ui + run: yarn build:lib + - name: Bundle UI in SDK + run: make build-ui + - name: Remove previous Helm + run: sudo rm -rf $(which helm) + - name: Set up Homebrew + uses: Homebrew/actions/setup-homebrew@master + - name: Setup Helm-docs + run: | + brew install norwoodj/tap/helm-docs + - name: Generate helm chart READMEs + run: make build-helm-docs + - name: Install Helm + run: ./infra/scripts/helm/install-helm.sh + - name: Validate Helm chart prior to publishing + run: ./infra/scripts/helm/validate-helm-chart-publish.sh + - name: Validate all version consistency + run: ./infra/scripts/helm/validate-helm-chart-versions.sh $NEXT_VERSION + + + publish-web-ui-npm: + needs: [validate_version_bumps, get_dry_release_versions] + runs-on: ubuntu-latest + env: + # This publish is working using an NPM automation token to bypass 2FA + NPM_TOKEN: ${{ secrets.NPM_TOKEN }} + CURRENT_VERSION: ${{ needs.get_dry_release_versions.outputs.current_version }} + NEXT_VERSION: ${{ needs.get_dry_release_versions.outputs.next_version }} + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-node@v2 + with: + node-version: '18.x' + registry-url: 'https://registry.npmjs.org' + - name: Bump file versions (temporarily for Web UI publish) + run: python ./infra/scripts/release/bump_file_versions.py ${CURRENT_VERSION} ${NEXT_VERSION} + - name: Install yarn dependencies + working-directory: ./ui + run: yarn install + - name: Build yarn rollup + working-directory: ./ui + run: yarn build:lib + - name: Publish UI package + working-directory: ./ui + if: github.event.inputs.dry_run == 'false' && github.event.inputs.publish_ui == 'true' + run: npm publish + env: + # This publish is working using an NPM automation token to bypass 2FA + NPM_TOKEN: ${{ secrets.NPM_TOKEN }} + + release: + name: release + runs-on: ubuntu-latest + needs: publish-web-ui-npm + env: + GITHUB_TOKEN: ${{ github.event.inputs.token }} + GIT_AUTHOR_NAME: feast-ci-bot + GIT_AUTHOR_EMAIL: feast-ci-bot@willem.co + GIT_COMMITTER_NAME: feast-ci-bot + GIT_COMMITTER_EMAIL: feast-ci-bot@willem.co + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + persist-credentials: false + - name: Setup Node.js + uses: actions/setup-node@v2 + with: + node-version: '18.x' + registry-url: 'https://registry.npmjs.org' + - name: Set up Homebrew + id: set-up-homebrew + uses: Homebrew/actions/setup-homebrew@master + - name: Setup Helm-docs + run: | + brew install norwoodj/tap/helm-docs + - name: Release (Dry Run) + if: github.event.inputs.dry_run == 'true' + run: | + npx -p @semantic-release/changelog -p @semantic-release/git -p @semantic-release/exec -p semantic-release semantic-release --dry-run + - name: Release + if: github.event.inputs.dry_run == 'false' + run: | + npx -p @semantic-release/changelog -p @semantic-release/git -p @semantic-release/exec -p semantic-release semantic-release